[linux] 03/03: [amd64] Implement Kernel Page Table Isolation (KPTI, aka KAISER) (CVE-2017-5754)

Fri Jan 5 16:48:08 UTC 2018

This is an automated email from the git hooks/post-receive script.

benh pushed a commit to branch jessie-security
in repository linux.

commit e6513f8c3f27fef5fdaea693e802643b221ddcc5
Author: Ben Hutchings <ben at decadent.org.uk>
Date:   Fri Jan 5 16:47:41 2018 +0000

    [amd64] Implement Kernel Page Table Isolation (KPTI, aka KAISER) (CVE-2017-5754)
---
 debian/changelog                                   |    2 +
 ...finish_arch_post_lock_switch-from-modules.patch |   92 +
 ...dd-nokaiser-boot-option-using-alternative.patch |  641 ++++++
 ...iser-alloc_ldt_struct-use-get_zeroed_page.patch |   28 +
 ...sm-tlbflush.h-handle-nopge-at-lower-level.patch |   83 +
 .../all/kpti/kaiser-disabled-on-xen-pv.patch       |   49 +
 ...er_flush_tlb_on_return_to_user-check-pcid.patch |   83 +
 .../all/kpti/kaiser-kernel-address-isolation.patch | 2295 ++++++++++++++++++++
 ...aiser-set-_page_user-of-the-vsyscall-page.patch |  145 ++
 ...ternative-instead-of-x86_cr3_pcid_noflush.patch |  127 ++
 .../kpti/kpti-rename-to-page_table_isolation.patch |  302 +++
 .../bugfix/all/kpti/kpti-report-when-enabled.patch |   44 +
 ...t-sched-core-fix-mmu_context.h-assumption.patch |   37 +
 ...h_mm_irqs_off-and-use-it-in-the-scheduler.patch |   73 +
 ...ask_exit-shouldn-t-use-switch_mm_irqs_off.patch |   41 +
 .../x86-alternatives-add-instruction-padding.patch |  424 ++++
 .../x86-alternatives-cleanup-dprintk-macro.patch   |   99 +
 .../x86-alternatives-make-jmps-more-robust.patch   |  284 +++
 ...ternatives-use-optimized-nops-for-padding.patch |   50 +
 ...mdline-parsing-for-options-with-arguments.patch |  175 ++
 ...command-line-parsing-when-matching-at-end.patch |  120 +
 ...nd-line-parsing-when-partial-word-matches.patch |  101 +
 ...oot-pass-in-size-to-early-cmdline-parsing.patch |   60 +
 ...-boot-simplify-early-command-line-parsing.patch |   52 +
 ...bstract-irq_tlb_count-from-irq_call_count.patch |  103 +
 .../x86-kaiser-check-boottime-cmdline-params.patch |  121 ++
 .../x86-kaiser-move-feature-detection-up.patch     |   77 +
 .../all/kpti/x86-kaiser-reenable-paravirt.patch    |   26 +
 ...-and-simplify-x86_feature_kaiser-handling.patch |   95 +
 ...-disable-use-from-vdso-if-kpti-is-enabled.patch |   45 +
 ...-64-fix-reboot-interaction-with-cr4.pcide.patch |   40 +
 ...noinvpcid-boot-option-to-turn-off-invpcid.patch |   72 +
 .../all/kpti/x86-mm-add-invpcid-helpers.patch      |   91 +
 ...d-the-nopcid-boot-option-to-turn-off-pcid.patch |   71 +
 ...86-mm-build-arch-x86-mm-tlb.c-even-on-smp.patch |   63 +
 .../x86-mm-clean-up-the-tlb-flushing-code.patch    |   93 +
 .../x86-mm-disable-pcid-on-32-bit-kernels.patch    |   63 +
 ...-mm-enable-cr4.pcide-on-supported-systems.patch |  109 +
 .../kpti/x86-mm-fix-invpcid-asm-constraint.patch   |   66 +
 .../x86-mm-fix-missed-global-tlb-flush-stat.patch  |   72 +
 ...warning-and-make-the-variable-read-mostly.patch |   42 +
 ...available-use-it-to-flush-global-mappings.patch |   54 +
 ...-make-flush_tlb_mm_range-more-predictable.patch |   77 +
 ...-new-tunable-for-single-vs-full-tlb-flush.patch |  160 ++
 ...t-flush_tlb_page-using-flush_tlb_mm_range.patch |  100 +
 ...move-flush_tlb-and-flush_tlb_current_task.patch |   97 +
 ...h.h-code-always-use-the-formerly-smp-code.patch |  259 +++
 ...omplicated-out-of-date-buggy-tlb-flushing.patch |  284 +++
 ...-mm-sched-core-turn-off-irqs-in-switch_mm.patch |   64 +
 .../x86-mm-sched-core-uninline-switch_mm.patch     |  204 ++
 ...mm-set-tlb-flush-tunable-to-sane-value-33.patch |  268 +++
 .../x86-paravirt-dont-patch-flush_tlb_single.patch |   66 +
 ...-flush_tlb_mm_range-in-mark_screen_rdonly.patch |   46 +
 debian/patches/series                              |   52 +
 54 files changed, 8387 insertions(+)

diff --git a/debian/changelog b/debian/changelog
index 597fda3..de95ac6 100644
--- a/debian/changelog
+++ b/debian/changelog
@@ -42,6 +42,8 @@ linux (3.16.51-3+deb8u1) UNRELEASED; urgency=high
     - scsi: scsi_error: count medium access timeout only once per EH run
     - [x86] panic: replace smp_send_stop() with kdump friendly version in panic
       path
+  * [amd64] Implement Kernel Page Table Isolation (KPTI, aka KAISER)
+    (CVE-2017-5754)
 
  -- Ben Hutchings <ben at decadent.org.uk>  Fri, 05 Jan 2018 16:12:55 +0000
 
diff --git a/debian/patches/bugfix/all/kpti/arm-hide-finish_arch_post_lock_switch-from-modules.patch b/debian/patches/bugfix/all/kpti/arm-hide-finish_arch_post_lock_switch-from-modules.patch
new file mode 100644
index 0000000..871ce3b
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/arm-hide-finish_arch_post_lock_switch-from-modules.patch
@@ -0,0 +1,92 @@
+From: Steven Rostedt <rostedt at goodmis.org>
+Date: Fri, 13 May 2016 15:30:13 +0200
+Subject: ARM: Hide finish_arch_post_lock_switch() from modules
+
+commit ef0491ea17f8019821c7e9c8e801184ecf17f85a upstream.
+
+The introduction of switch_mm_irqs_off() brought back an old bug
+regarding the use of preempt_enable_no_resched:
+
+As part of:
+
+  62b94a08da1b ("sched/preempt: Take away preempt_enable_no_resched() from modules")
+
+the definition of preempt_enable_no_resched() is only available in
+built-in code, not in loadable modules, so we can't generally use
+it from header files.
+
+However, the ARM version of finish_arch_post_lock_switch()
+calls preempt_enable_no_resched() and is defined as a static
+inline function in asm/mmu_context.h. This in turn means we cannot
+include asm/mmu_context.h from modules.
+
+With today's tip tree, asm/mmu_context.h gets included from
+linux/mmu_context.h, which is normally the exact pattern one would
+expect, but unfortunately, linux/mmu_context.h can be included from
+the vhost driver that is a loadable module, now causing this compile
+time error with modular configs:
+
+  In file included from ../include/linux/mmu_context.h:4:0,
+                   from ../drivers/vhost/vhost.c:18:
+  ../arch/arm/include/asm/mmu_context.h: In function 'finish_arch_post_lock_switch':
+  ../arch/arm/include/asm/mmu_context.h:88:3: error: implicit declaration of function 'preempt_enable_no_resched' [-Werror=implicit-function-declaration]
+     preempt_enable_no_resched();
+
+Andy already tried to fix the bug by including linux/preempt.h
+from asm/mmu_context.h, but that didn't help. Arnd suggested reordering
+the header files, which wasn't popular, so let's use this
+workaround instead:
+
+The finish_arch_post_lock_switch() definition is now also hidden
+inside of #ifdef MODULE, so we don't see anything referencing
+preempt_enable_no_resched() from a header file. I've built a
+few hundred randconfig kernels with this, and did not see any
+new problems.
+
+Tested-by: Guenter Roeck <linux at roeck-us.net>
+Signed-off-by: Steven Rostedt <rostedt at goodmis.org>
+Signed-off-by: Arnd Bergmann <arnd at arndb.de>
+Acked-by: Russell King <rmk+kernel at arm.linux.org.uk>
+Cc: Alexander Shishkin <alexander.shishkin at linux.intel.com>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Andy Lutomirski <luto at kernel.org>
+Cc: Ard Biesheuvel <ard.biesheuvel at linaro.org>
+Cc: Arnaldo Carvalho de Melo <acme at redhat.com>
+Cc: Borislav Petkov <bp at suse.de>
+Cc: Frederic Weisbecker <fweisbec at gmail.com>
+Cc: Jiri Olsa <jolsa at redhat.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Mel Gorman <mgorman at techsingularity.net>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Russell King - ARM Linux <linux at armlinux.org.uk>
+Cc: Stephane Eranian <eranian at google.com>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: Vince Weaver <vincent.weaver at maine.edu>
+Cc: linux-arm-kernel at lists.infradead.org
+Fixes: f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler")
+Link: http://lkml.kernel.org/r/1463146234-161304-1-git-send-email-arnd@arndb.de
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/arm/include/asm/mmu_context.h | 2 ++
+ 1 file changed, 2 insertions(+)
+
+--- a/arch/arm/include/asm/mmu_context.h
++++ b/arch/arm/include/asm/mmu_context.h
+@@ -61,6 +61,7 @@ static inline void check_and_switch_cont
+ 		cpu_switch_mm(mm->pgd, mm);
+ }
+ 
++#ifndef MODULE
+ #define finish_arch_post_lock_switch \
+ 	finish_arch_post_lock_switch
+ static inline void finish_arch_post_lock_switch(void)
+@@ -82,6 +83,7 @@ static inline void finish_arch_post_lock
+ 		preempt_enable_no_resched();
+ 	}
+ }
++#endif /* !MODULE */
+ 
+ #endif	/* CONFIG_MMU */
+ 
diff --git a/debian/patches/bugfix/all/kpti/kaiser-add-nokaiser-boot-option-using-alternative.patch b/debian/patches/bugfix/all/kpti/kaiser-add-nokaiser-boot-option-using-alternative.patch
new file mode 100644
index 0000000..de4e698
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-add-nokaiser-boot-option-using-alternative.patch
@@ -0,0 +1,641 @@
+From: Hugh Dickins <hughd at google.com>
+Date: Sun, 24 Sep 2017 16:59:49 -0700
+Subject: kaiser: add "nokaiser" boot option, using ALTERNATIVE
+
+Added "nokaiser" boot option: an early param like "noinvpcid".
+Most places now check int kaiser_enabled (#defined 0 when not
+CONFIG_KAISER) instead of #ifdef CONFIG_KAISER; but entry_64.S
+and entry_64_compat.S are using the ALTERNATIVE technique, which
+patches in the preferred instructions at runtime.  That technique
+is tied to x86 cpu features, so X86_FEATURE_KAISER fabricated
+("" in its comment so "kaiser" not magicked into /proc/cpuinfo).
+
+Prior to "nokaiser", Kaiser #defined _PAGE_GLOBAL 0: revert that,
+but be careful with both _PAGE_GLOBAL and CR4.PGE: setting them when
+nokaiser like when !CONFIG_KAISER, but not setting either when kaiser -
+neither matters on its own, but it's hard to be sure that _PAGE_GLOBAL
+won't get set in some obscure corner, or something add PGE into CR4.
+By omitting _PAGE_GLOBAL from __supported_pte_mask when kaiser_enabled,
+all page table setup which uses pte_pfn() masks it out of the ptes.
+
+It's slightly shameful that the same declaration versus definition of
+kaiser_enabled appears in not one, not two, but in three header files
+(asm/kaiser.h, asm/pgtable.h, asm/tlbflush.h).  I felt safer that way,
+than with #including any of those in any of the others; and did not
+feel it worth an asm/kaiser_enabled.h - kernel/cpu/common.c includes
+them all, so we shall hear about it if they get out of synch.
+
+Cleanups while in the area: removed the silly #ifdef CONFIG_KAISER
+from kaiser.c; removed the unused native_get_normal_pgd(); removed
+the spurious reg clutter from SWITCH_*_CR3 macro stubs; corrected some
+comments.  But more interestingly, set CR4.PSE in secondary_startup_64:
+the manual is clear that it does not matter whether it's 0 or 1 when
+4-level-pts are enabled, but I was distracted to find cr4 different on
+BSP and auxiliaries - BSP alone was adding PSE, in probe_page_size_mask().
+
+(cherry picked from Change-Id: I8e5bec716944444359cbd19f6729311eff943e9a)
+
+Signed-off-by: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16:
+ - Use set_in_cr4()
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ Documentation/kernel-parameters.txt  |  2 ++
+ arch/x86/include/asm/cpufeature.h    |  3 +++
+ arch/x86/include/asm/kaiser.h        | 27 ++++++++++++++++++-------
+ arch/x86/include/asm/pgtable.h       | 20 ++++++++++++------
+ arch/x86/include/asm/pgtable_64.h    | 13 ++++--------
+ arch/x86/include/asm/pgtable_types.h |  4 ----
+ arch/x86/include/asm/tlbflush.h      | 39 +++++++++++++++++++++++-------------
+ arch/x86/kernel/cpu/common.c         | 29 ++++++++++++++++++++++++++-
+ arch/x86/kernel/entry_64.S           | 15 +++++++-------
+ arch/x86/kernel/espfix_64.c          |  3 ++-
+ arch/x86/kernel/head_64.S            |  4 ++--
+ arch/x86/mm/init.c                   |  2 +-
+ arch/x86/mm/init_64.c                | 10 +++++++++
+ arch/x86/mm/kaiser.c                 | 26 ++++++++++++++++++++----
+ arch/x86/mm/pgtable.c                |  8 ++------
+ arch/x86/mm/tlb.c                    |  4 +---
+ 16 files changed, 144 insertions(+), 65 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2229,6 +2229,8 @@ bytes respectively. Such letter suffixes
+ 
+ 	nojitter	[IA-64] Disables jitter checking for ITC timers.
+ 
++	nokaiser	[X86-64] Disable KAISER isolation of kernel from user.
++
+ 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
+ 
+ 	no-kvmapf	[X86,KVM] Disable paravirtualized asynchronous page
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -188,6 +188,9 @@
+ #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
+ #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */
+ 
++/* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
++#define X86_FEATURE_KAISER	(7*32+31) /* "" CONFIG_KAISER w/o nokaiser */
++
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  (8*32+ 0) /* Intel TPR Shadow */
+ #define X86_FEATURE_VNMI        (8*32+ 1) /* Intel Virtual NMI */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -46,28 +46,33 @@ movq \reg, %cr3
+ .endm
+ 
+ .macro SWITCH_KERNEL_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ popq %rax
++8:
+ .endm
+ 
+ .macro SWITCH_USER_CR3
+-pushq %rax
++ALTERNATIVE "jmp 8f", "pushq %rax", X86_FEATURE_KAISER
+ _SWITCH_TO_USER_CR3 %rax %al
+ popq %rax
++8:
+ .endm
+ 
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+-movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++ALTERNATIVE "jmp 8f", \
++	__stringify(movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)), \
++	X86_FEATURE_KAISER
+ _SWITCH_TO_KERNEL_CR3 %rax
+ movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++8:
+ .endm
+ 
+ #else /* CONFIG_KAISER */
+ 
+-.macro SWITCH_KERNEL_CR3 reg
++.macro SWITCH_KERNEL_CR3
+ .endm
+-.macro SWITCH_USER_CR3 reg regb
++.macro SWITCH_USER_CR3
+ .endm
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+@@ -90,6 +95,16 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+ 
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+ 
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled	0
++#endif /* CONFIG_KAISER */
++
++/*
++ * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
++ * so as to build with tests on kaiser_enabled instead of #ifdefs.
++ */
++
+ /**
+  *  kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
+  *  @addr: the start address of the range
+@@ -119,8 +134,6 @@ extern void kaiser_remove_mapping(unsign
+  */
+ extern void kaiser_init(void);
+ 
+-#endif /* CONFIG_KAISER */
+-
+ #endif /* __ASSEMBLY */
+ 
+ #endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -17,6 +17,12 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/x86_init.h>
+ 
++#ifdef CONFIG_KAISER
++extern int kaiser_enabled;
++#else
++#define kaiser_enabled 0
++#endif
++
+ void ptdump_walk_pgd_level(struct seq_file *m, pgd_t *pgd);
+ 
+ /*
+@@ -648,7 +654,7 @@ static inline int pgd_bad(pgd_t pgd)
+ 	 * page table by accident; it will fault on the first
+ 	 * instruction it tries to run.  See native_set_pgd().
+ 	 */
+-	if (IS_ENABLED(CONFIG_KAISER))
++	if (kaiser_enabled)
+ 		ignore_flags |= _PAGE_NX;
+ 
+ 	return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+@@ -853,12 +859,14 @@ static inline void pmdp_set_wrprotect(st
+  */
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+-       memcpy(dst, src, count * sizeof(pgd_t));
++	memcpy(dst, src, count * sizeof(pgd_t));
+ #ifdef CONFIG_KAISER
+-	/* Clone the shadow pgd part as well */
+-	memcpy(native_get_shadow_pgd(dst),
+-	       native_get_shadow_pgd(src),
+-	       count * sizeof(pgd_t));
++	if (kaiser_enabled) {
++		/* Clone the shadow pgd part as well */
++		memcpy(native_get_shadow_pgd(dst),
++			native_get_shadow_pgd(src),
++			count * sizeof(pgd_t));
++	}
+ #endif
+ }
+ 
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -111,13 +111,12 @@ extern pgd_t kaiser_set_shadow_pgd(pgd_t
+ 
+ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+ {
++#ifdef CONFIG_DEBUG_VM
++	/* linux/mmdebug.h may not have been included at this point */
++	BUG_ON(!kaiser_enabled);
++#endif
+ 	return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
+ }
+-
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+-	return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
+-}
+ #else
+ static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+@@ -128,10 +127,6 @@ static inline pgd_t *native_get_shadow_p
+ 	BUILD_BUG_ON(1);
+ 	return NULL;
+ }
+-static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
+-{
+-	return pgdp;
+-}
+ #endif /* CONFIG_KAISER */
+ 
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -50,11 +50,7 @@
+ #define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
+-#ifdef CONFIG_KAISER
+-#define _PAGE_GLOBAL	(_AT(pteval_t, 0))
+-#else
+ #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
+-#endif
+ #define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
+ #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -69,9 +69,11 @@ static inline void invpcid_flush_all_non
+  * to avoid the need for asm/kaiser.h in unexpected places.
+  */
+ #ifdef CONFIG_KAISER
++extern int kaiser_enabled;
+ extern void kaiser_setup_pcid(void);
+ extern void kaiser_flush_tlb_on_return_to_user(void);
+ #else
++#define kaiser_enabled 0
+ static inline void kaiser_setup_pcid(void)
+ {
+ }
+@@ -96,7 +98,7 @@ static inline void __native_flush_tlb(vo
+ 	 * back:
+ 	 */
+ 	preempt_disable();
+-	if (this_cpu_has(X86_FEATURE_PCID))
++	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	native_write_cr3(native_read_cr3());
+ 	preempt_enable();
+@@ -107,20 +109,30 @@ static inline void __native_flush_tlb_gl
+ 	unsigned long cr4;
+ 
+ 	cr4 = native_read_cr4();
+-	/* clear PGE */
+-	native_write_cr4(cr4 & ~X86_CR4_PGE);
+-	/* write old PGE again and flush TLBs */
+-	native_write_cr4(cr4);
++	if (cr4 & X86_CR4_PGE) {
++		/* clear PGE and flush TLB of all entries */
++		native_write_cr4(cr4 & ~X86_CR4_PGE);
++		/* restore PGE as it was before */
++		native_write_cr4(cr4);
++	} else {
++		/*
++		 * x86_64 microcode update comes this way when CR4.PGE is not
++		 * enabled, and it's safer for all callers to allow this case.
++		 */
++		native_write_cr3(native_read_cr3());
++	}
+ }
+ 
+ static inline void __native_flush_tlb_global(void)
+ {
+-#ifdef CONFIG_KAISER
+-	/* Globals are not used at all */
+-	__native_flush_tlb();
+-#else
+ 	unsigned long flags;
+ 
++	if (kaiser_enabled) {
++		/* Globals are not used at all */
++		__native_flush_tlb();
++		return;
++	}
++
+ 	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ 		/*
+ 		 * Using INVPCID is considerably faster than a pair of writes
+@@ -140,7 +152,6 @@ static inline void __native_flush_tlb_gl
+ 	raw_local_irq_save(flags);
+ 	__native_flush_tlb_global_irq_disabled();
+ 	raw_local_irq_restore(flags);
+-#endif
+ }
+ 
+ static inline void __native_flush_tlb_single(unsigned long addr)
+@@ -155,7 +166,7 @@ static inline void __native_flush_tlb_si
+ 	 */
+ 
+ 	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+-		if (this_cpu_has(X86_FEATURE_PCID))
++		if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
+ 			kaiser_flush_tlb_on_return_to_user();
+ 		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ 		return;
+@@ -170,9 +181,9 @@ static inline void __native_flush_tlb_si
+ 	 * Make sure to do only a single invpcid when KAISER is
+ 	 * disabled and we have only a single ASID.
+ 	 */
+-	if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
+-		invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+-	invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++	if (kaiser_enabled)
++		invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
++	invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
+ }
+ 
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -179,6 +179,20 @@ static int __init x86_pcid_setup(char *s
+ 	return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++	/* nokaiser doesn't accept parameters */
++	if (s)
++		return -EINVAL;
++#ifdef CONFIG_KAISER
++	kaiser_enabled = 0;
++	setup_clear_cpu_cap(X86_FEATURE_KAISER);
++	pr_info("nokaiser: KAISER feature disabled\n");
++#endif
++	return 0;
++}
++early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+ 
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -333,7 +347,7 @@ static __always_inline void setup_smap(s
+ static void setup_pcid(struct cpuinfo_x86 *c)
+ {
+ 	if (cpu_has(c, X86_FEATURE_PCID)) {
+-		if (cpu_has(c, X86_FEATURE_PGE)) {
++		if (cpu_has(c, X86_FEATURE_PGE) || kaiser_enabled) {
+ 			set_in_cr4(X86_CR4_PCIDE);
+ 			/*
+ 			 * INVPCID has two "groups" of types:
+@@ -719,6 +733,10 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		c->x86_power = cpuid_edx(0x80000007);
+ 
+ 	init_scattered_cpuid_features(c);
++#ifdef CONFIG_KAISER
++	if (kaiser_enabled)
++		set_cpu_cap(c, X86_FEATURE_KAISER);
++#endif
+ }
+ 
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+@@ -1343,6 +1361,15 @@ void cpu_init(void)
+ 	int cpu;
+ 	int i;
+ 
++	if (!kaiser_enabled) {
++		/*
++		 * secondary_startup_64() deferred setting PGE in cr4:
++		 * probe_page_size_mask() sets it on the boot cpu,
++		 * but it needs to be set on each secondary cpu.
++		 */
++		set_in_cr4(X86_CR4_PGE);
++	}
++
+ 	/*
+ 	 * Load microcode on this cpu if a valid microcode is available.
+ 	 * This is early microcode loading procedure.
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -326,7 +326,7 @@ ENTRY(save_paranoid)
+ 	 * unconditionally, but we need to find out whether the reverse
+ 	 * should be done on return (conveyed to paranoid_exit in %ebx).
+ 	 */
+-	movq	%cr3, %rax
++	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	testl	$KAISER_SHADOW_PGD_OFFSET, %eax
+ 	jz	2f
+ 	orl	$2, %ebx
+@@ -1351,6 +1351,7 @@ paranoid_kernel:
+ 	movq	%r12, %rbx		/* restore after paranoid_userspace */
+ 	TRACE_IRQS_IRETQ_DEBUG 0
+ #ifdef CONFIG_KAISER
++	/* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ 	testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */
+ 	jz	paranoid_exit_no_switch
+ 	SWITCH_USER_CR3
+@@ -1600,13 +1601,14 @@ ENTRY(nmi)
+ #ifdef CONFIG_KAISER
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+-	movq	%cr3, %rax
++	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ 	orq	x86_cr3_pcid_noflush, %rax
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ 	movq	%rax, %cr3
++2:
+ #endif
+ 	call	do_nmi
+ 
+@@ -1616,8 +1618,7 @@ ENTRY(nmi)
+ 	 * kernel code that needs user CR3, but do we ever return
+ 	 * to "user mode" where we need the kernel CR3?
+ 	 */
+-	popq	%rax
+-	mov	%rax, %cr3
++	ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+ 
+ 	/*
+@@ -1870,13 +1871,14 @@ end_repeat_nmi:
+ #ifdef CONFIG_KAISER
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+-	movq	%cr3, %rax
++	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+ 	orq	x86_cr3_pcid_noflush, %rax
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+ 	movq	%rax, %cr3
++2:
+ #endif
+ 	DEFAULT_FRAME 0		/* ???? */
+ 
+@@ -1889,8 +1891,7 @@ end_repeat_nmi:
+ 	 * kernel code that needs user CR3, like just just before
+ 	 * a sysret.
+ 	 */
+-	popq	%rax
+-	mov	%rax, %cr3
++	ALTERNATIVE "", "popq %rax; movq %rax, %cr3", X86_FEATURE_KAISER
+ #endif
+ 
+ 	testl %ebx,%ebx				/* swapgs needed? */
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -135,9 +135,10 @@ void __init init_espfix_bsp(void)
+ 	 * area to ensure it is mapped into the shadow user page
+ 	 * tables.
+ 	 */
+-	if (IS_ENABLED(CONFIG_KAISER))
++	if (kaiser_enabled) {
+ 		set_pgd(native_get_shadow_pgd(pgd_p),
+ 			__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
++	}
+ 
+ 	/* Randomize the locations */
+ 	init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -183,8 +183,8 @@ ENTRY(secondary_startup_64)
+ 	movq	$(init_level4_pgt - __START_KERNEL_map), %rax
+ 1:
+ 
+-	/* Enable PAE mode and PGE */
+-	movl	$(X86_CR4_PAE | X86_CR4_PGE), %ecx
++	/* Enable PAE and PSE, but defer PGE until kaiser_enabled is decided */
++	movl	$(X86_CR4_PAE | X86_CR4_PSE), %ecx
+ 	movq	%rcx, %cr4
+ 
+ 	/* Setup early boot stage 4 level pagetables. */
+--- a/arch/x86/mm/init.c
++++ b/arch/x86/mm/init.c
+@@ -140,7 +140,7 @@ static void __init probe_page_size_mask(
+ 		set_in_cr4(X86_CR4_PSE);
+ 
+ 	/* Enable PGE if available */
+-	if (cpu_has_pge) {
++	if (cpu_has_pge && !kaiser_enabled) {
+ 		set_in_cr4(X86_CR4_PGE);
+ 		__supported_pte_mask |= _PAGE_GLOBAL;
+ 	}
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -396,6 +396,16 @@ void __init cleanup_highmap(void)
+ 			continue;
+ 		if (vaddr < (unsigned long) _text || vaddr > end)
+ 			set_pmd(pmd, __pmd(0));
++		else if (kaiser_enabled) {
++			/*
++			 * level2_kernel_pgt is initialized with _PAGE_GLOBAL:
++			 * clear that now.  This is not important, so long as
++			 * CR4.PGE remains clear, but it removes an anomaly.
++			 * Physical mapping setup below avoids _PAGE_GLOBAL
++			 * by use of massage_pgprot() inside pfn_pte() etc.
++			 */
++			set_pmd(pmd, pmd_clear_flags(*pmd, _PAGE_GLOBAL));
++		}
+ 	}
+ }
+ 
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -20,7 +20,9 @@ extern struct mm_struct init_mm;
+ #include <asm/desc.h>
+ #include <asm/vsyscall.h>
+ 
+-#ifdef CONFIG_KAISER
++int kaiser_enabled __read_mostly = 1;
++EXPORT_SYMBOL(kaiser_enabled);	/* for inlined TLB flush functions */
++
+ __visible
+ DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
+@@ -171,8 +173,8 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 	return pte_offset_kernel(pmd, address);
+ }
+ 
+-int kaiser_add_user_map(const void *__start_addr, unsigned long size,
+-			unsigned long flags)
++static int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++			       unsigned long flags)
+ {
+ 	int ret = 0;
+ 	pte_t *pte;
+@@ -181,6 +183,15 @@ int kaiser_add_user_map(const void *__st
+ 	unsigned long end_addr = PAGE_ALIGN(start_addr + size);
+ 	unsigned long target_address;
+ 
++	/*
++	 * It is convenient for callers to pass in __PAGE_KERNEL etc,
++	 * and there is no actual harm from setting _PAGE_GLOBAL, so
++	 * long as CR4.PGE is not set.  But it is nonetheless troubling
++	 * to see Kaiser itself setting _PAGE_GLOBAL (now that "nokaiser"
++	 * requires that not to be #defined to 0): so mask it off here.
++	 */
++	flags &= ~_PAGE_GLOBAL;
++
+ 	for (; address < end_addr; address += PAGE_SIZE) {
+ 		target_address = get_pa_from_mapping(address);
+ 		if (target_address == -1) {
+@@ -267,6 +278,8 @@ void __init kaiser_init(void)
+ {
+ 	int cpu, idx;
+ 
++	if (!kaiser_enabled)
++		return;
+ 	kaiser_init_all_pgds();
+ 
+ 	for_each_possible_cpu(cpu) {
+@@ -319,6 +332,8 @@ void __init kaiser_init(void)
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
+ int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
+ {
++	if (!kaiser_enabled)
++		return 0;
+ 	return kaiser_add_user_map((const void *)addr, size, flags);
+ }
+ 
+@@ -330,6 +345,8 @@ void kaiser_remove_mapping(unsigned long
+ 	unsigned long addr, next;
+ 	pgd_t *pgd;
+ 
++	if (!kaiser_enabled)
++		return;
+ 	pgd = native_get_shadow_pgd(pgd_offset_k(start));
+ 	for (addr = start; addr < end; pgd++, addr = next) {
+ 		next = pgd_addr_end(addr, end);
+@@ -351,6 +368,8 @@ static inline bool is_userspace_pgd(pgd_
+ 
+ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
++	if (!kaiser_enabled)
++		return pgd;
+ 	/*
+ 	 * Do we need to also populate the shadow pgd?  Check _PAGE_USER to
+ 	 * skip cases like kexec and EFI which make temporary low mappings.
+@@ -407,4 +426,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -271,16 +271,12 @@ static void pgd_prepopulate_pmd(struct m
+ 	}
+ }
+ 
+-#ifdef CONFIG_KAISER
+ /*
+- * Instead of one pmd, we aquire two pmds.  Being order-1, it is
++ * Instead of one pgd, Kaiser acquires two pgds.  Being order-1, it is
+  * both 8k in size and 8k-aligned.  That lets us just flip bit 12
+  * in a pointer to swap between the two 4k halves.
+  */
+-#define PGD_ALLOCATION_ORDER 1
+-#else
+-#define PGD_ALLOCATION_ORDER 0
+-#endif
++#define PGD_ALLOCATION_ORDER	kaiser_enabled
+ 
+ static inline pgd_t *_pgd_alloc(void)
+ {
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -42,8 +42,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ 	unsigned long new_mm_cr3 = __pa(pgdir);
+ 
+-#ifdef CONFIG_KAISER
+-	if (this_cpu_has(X86_FEATURE_PCID)) {
++	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
+ 		/*
+ 		 * We reuse the same PCID for different tasks, so we must
+ 		 * flush all the entries for the PCID out when we change tasks.
+@@ -60,7 +59,6 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ 		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	}
+-#endif /* CONFIG_KAISER */
+ 
+ 	/*
+ 	 * Caution: many callers of this function expect
diff --git a/debian/patches/bugfix/all/kpti/kaiser-alloc_ldt_struct-use-get_zeroed_page.patch b/debian/patches/bugfix/all/kpti/kaiser-alloc_ldt_struct-use-get_zeroed_page.patch
new file mode 100644
index 0000000..7c3397b
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-alloc_ldt_struct-use-get_zeroed_page.patch
@@ -0,0 +1,28 @@
+From: Hugh Dickins <hughd at google.com>
+Date: Sun, 17 Dec 2017 19:53:01 -0800
+Subject: kaiser: alloc_ldt_struct() use get_zeroed_page()
+
+Change the 3.2.96 and 3.18.72 alloc_ldt_struct() to allocate its entries
+with get_zeroed_page(), as 4.3 onwards does since f454b4788613 ("x86/ldt:
+Fix small LDT allocation for Xen").  This then matches the free_page()
+I had misported in __free_ldt_struct(), and fixes the
+"BUG: Bad page state in process ldt_gdt_32 ... flags: 0x80(slab)"
+reported by Kees Cook and Jiri Kosina, and analysed by Jiri.
+
+Signed-off-by: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/ldt.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -69,7 +69,7 @@ static struct ldt_struct *alloc_ldt_stru
+ 	if (alloc_size > PAGE_SIZE)
+ 		new_ldt->entries = vzalloc(alloc_size);
+ 	else
+-		new_ldt->entries = kzalloc(PAGE_SIZE, GFP_KERNEL);
++		new_ldt->entries = (void *)get_zeroed_page(GFP_KERNEL);
+ 
+ 	if (!new_ldt->entries) {
+ 		kfree(new_ldt);
diff --git a/debian/patches/bugfix/all/kpti/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch b/debian/patches/bugfix/all/kpti/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
new file mode 100644
index 0000000..2ae714f
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
@@ -0,0 +1,83 @@
+From: Hugh Dickins <hughd at google.com>
+Date: Sat, 4 Nov 2017 18:23:24 -0700
+Subject: kaiser: asm/tlbflush.h handle noPGE at lower level
+
+I found asm/tlbflush.h too twisty, and think it safer not to avoid
+__native_flush_tlb_global_irq_disabled() in the kaiser_enabled case,
+but instead let it handle kaiser_enabled along with cr3: it can just
+use __native_flush_tlb() for that, no harm in re-disabling preemption.
+
+(This is not the same change as Kirill and Dave have suggested for
+upstream, flipping PGE in cr4: that's neat, but needs a cpu_has_pge
+check; cr3 is enough for kaiser, and thought to be cheaper than cr4.)
+
+Also delete the X86_FEATURE_INVPCID invpcid_flush_all_nonglobals()
+preference from __native_flush_tlb(): unlike the invpcid_flush_all()
+preference in __native_flush_tlb_global(), it's not seen in upstream
+4.14, and was recently reported to be surprisingly slow.
+
+(cherry picked from Change-Id: I0da819a797ff46bca6590040b6480178dff6ba1e)
+
+Signed-off-by: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h | 27 +++------------------------
+ 1 file changed, 3 insertions(+), 24 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -84,14 +84,6 @@ static inline void kaiser_flush_tlb_on_r
+ 
+ static inline void __native_flush_tlb(void)
+ {
+-	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+-		/*
+-		 * Note, this works with CR4.PCIDE=0 or 1.
+-		 */
+-		invpcid_flush_all_nonglobals();
+-		return;
+-	}
+-
+ 	/*
+ 	 * If current->mm == NULL then we borrow a mm which may change during a
+ 	 * task switch and therefore we must not be preempted while we write CR3
+@@ -115,11 +107,8 @@ static inline void __native_flush_tlb_gl
+ 		/* restore PGE as it was before */
+ 		native_write_cr4(cr4);
+ 	} else {
+-		/*
+-		 * x86_64 microcode update comes this way when CR4.PGE is not
+-		 * enabled, and it's safer for all callers to allow this case.
+-		 */
+-		native_write_cr3(native_read_cr3());
++		/* do it with cr3, letting kaiser flush user PCID */
++		__native_flush_tlb();
+ 	}
+ }
+ 
+@@ -127,12 +116,6 @@ static inline void __native_flush_tlb_gl
+ {
+ 	unsigned long flags;
+ 
+-	if (kaiser_enabled) {
+-		/* Globals are not used at all */
+-		__native_flush_tlb();
+-		return;
+-	}
+-
+ 	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ 		/*
+ 		 * Using INVPCID is considerably faster than a pair of writes
+@@ -188,11 +171,7 @@ static inline void __native_flush_tlb_si
+ 
+ static inline void __flush_tlb_all(void)
+ {
+-	if (cpu_has_pge)
+-		__flush_tlb_global();
+-	else
+-		__flush_tlb();
+-
++	__flush_tlb_global();
+ 	/*
+ 	 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
+ 	 * we'd end up flushing kernel translations for the current ASID but
diff --git a/debian/patches/bugfix/all/kpti/kaiser-disabled-on-xen-pv.patch b/debian/patches/bugfix/all/kpti/kaiser-disabled-on-xen-pv.patch
new file mode 100644
index 0000000..64a22b7
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-disabled-on-xen-pv.patch
@@ -0,0 +1,49 @@
+From: Jiri Kosina <jkosina at suse.cz>
+Date: Tue, 2 Jan 2018 14:19:49 +0100
+Subject: kaiser: disabled on Xen PV
+
+Kaiser cannot be used on paravirtualized MMUs (namely reading and writing CR3).
+This does not work with KAISER as the CR3 switch from and to user space PGD
+would require to map the whole XEN_PV machinery into both.
+
+More importantly, enabling KAISER on Xen PV doesn't make too much sense, as PV
+guests use distinct %cr3 values for kernel and user already.
+
+Signed-off-by: Jiri Kosina <jkosina at suse.cz>
+Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+Cc: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16: use xen_pv_domain()]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/kaiser.c | 6 ++++++
+ 1 file changed, 6 insertions(+)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -10,6 +10,7 @@
+ #include <linux/mm.h>
+ #include <linux/uaccess.h>
+ #include <linux/ftrace.h>
++#include <xen/xen.h>
+ 
+ extern struct mm_struct init_mm;
+ 
+@@ -273,6 +274,9 @@ void __init kaiser_check_boottime_disabl
+ 	char arg[5];
+ 	int ret;
+ 
++	if (xen_pv_domain())
++		goto silent_disable;
++
+ 	ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
+ 	if (ret > 0) {
+ 		if (!strncmp(arg, "on", 2))
+@@ -300,6 +304,8 @@ enable:
+ 
+ disable:
+ 	pr_info("Kernel/User page tables isolation: disabled\n");
++
++silent_disable:
+ 	kaiser_enabled = 0;
+ 	setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ }
diff --git a/debian/patches/bugfix/all/kpti/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch b/debian/patches/bugfix/all/kpti/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
new file mode 100644
index 0000000..83e1526
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
@@ -0,0 +1,83 @@
+From: Hugh Dickins <hughd at google.com>
+Date: Sat, 4 Nov 2017 18:43:06 -0700
+Subject: kaiser: kaiser_flush_tlb_on_return_to_user() check PCID
+
+Let kaiser_flush_tlb_on_return_to_user() do the X86_FEATURE_PCID
+check, instead of each caller doing it inline first: nobody needs
+to optimize for the noPCID case, it's clearer this way, and better
+suits later changes.  Replace those no-op X86_CR3_PCID_KERN_FLUSH lines
+by a BUILD_BUG_ON() in load_new_mm_cr3(), in case something changes.
+
+(cherry picked from Change-Id: I9b528ed9d7c1ae4a3b4738c2894ee1740b6fb0b9)
+
+Signed-off-by: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h | 4 ++--
+ arch/x86/mm/kaiser.c            | 6 +++---
+ arch/x86/mm/tlb.c               | 8 ++++----
+ 3 files changed, 9 insertions(+), 9 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -90,7 +90,7 @@ static inline void __native_flush_tlb(vo
+ 	 * back:
+ 	 */
+ 	preempt_disable();
+-	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++	if (kaiser_enabled)
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	native_write_cr3(native_read_cr3());
+ 	preempt_enable();
+@@ -149,7 +149,7 @@ static inline void __native_flush_tlb_si
+ 	 */
+ 
+ 	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
+-		if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID))
++		if (kaiser_enabled)
+ 			kaiser_flush_tlb_on_return_to_user();
+ 		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
+ 		return;
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -449,12 +449,12 @@ void kaiser_setup_pcid(void)
+ 
+ /*
+  * Make a note that this cpu will need to flush USER tlb on return to user.
+- * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
+- * if cpu does not, then the NOFLUSH bit will never have been set.
++ * If cpu does not have PCID, then the NOFLUSH bit will never have been set.
+  */
+ void kaiser_flush_tlb_on_return_to_user(void)
+ {
+-	this_cpu_write(x86_cr3_pcid_user,
++	if (this_cpu_has(X86_FEATURE_PCID))
++		this_cpu_write(x86_cr3_pcid_user,
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -42,7 +42,7 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ {
+ 	unsigned long new_mm_cr3 = __pa(pgdir);
+ 
+-	if (kaiser_enabled && this_cpu_has(X86_FEATURE_PCID)) {
++	if (kaiser_enabled) {
+ 		/*
+ 		 * We reuse the same PCID for different tasks, so we must
+ 		 * flush all the entries for the PCID out when we change tasks.
+@@ -53,10 +53,10 @@ static void load_new_mm_cr3(pgd_t *pgdir
+ 		 * do it here, but can only be used if X86_FEATURE_INVPCID is
+ 		 * available - and many machines support pcid without invpcid.
+ 		 *
+-		 * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
+-		 * but keep that line in there in case something changes.
++		 * If X86_CR3_PCID_KERN_FLUSH actually added something, then it
++		 * would be needed in the write_cr3() below - if PCIDs enabled.
+ 		 */
+-		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++		BUILD_BUG_ON(X86_CR3_PCID_KERN_FLUSH);
+ 		kaiser_flush_tlb_on_return_to_user();
+ 	}
+ 
diff --git a/debian/patches/bugfix/all/kpti/kaiser-kernel-address-isolation.patch b/debian/patches/bugfix/all/kpti/kaiser-kernel-address-isolation.patch
new file mode 100644
index 0000000..e08cd41
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-kernel-address-isolation.patch
@@ -0,0 +1,2295 @@
+From: Richard Fellner <richard.fellner at student.tugraz.at>
+Date: Thu, 4 May 2017 14:26:50 +0200
+Subject: KAISER: Kernel Address Isolation
+
+This patch introduces our implementation of KAISER (Kernel Address Isolation to
+have Side-channels Efficiently Removed), a kernel isolation technique to close
+hardware side channels on kernel address information.
+
+More information about the patch can be found on:
+
+        https://github.com/IAIK/KAISER
+
+From: Richard Fellner <richard.fellner at student.tugraz.at>
+From: Daniel Gruss <daniel.gruss at iaik.tugraz.at>
+Subject: [RFC, PATCH] x86_64: KAISER - do not map kernel in user mode
+Date: Thu, 4 May 2017 14:26:50 +0200
+Link: http://marc.info/?l=linux-kernel&m=149390087310405&w=2
+Kaiser-4.10-SHA1: c4b1831d44c6144d3762ccc72f0c4e71a0c713e5
+
+To: <linux-kernel at vger.kernel.org>
+To: <kernel-hardening at lists.openwall.com>
+Cc: <clementine.maurice at iaik.tugraz.at>
+Cc: <moritz.lipp at iaik.tugraz.at>
+Cc: Michael Schwarz <michael.schwarz at iaik.tugraz.at>
+Cc: Richard Fellner <richard.fellner at student.tugraz.at>
+Cc: Ingo Molnar <mingo at kernel.org>
+Cc: <kirill.shutemov at linux.intel.com>
+Cc: <anders.fogh at gdata-adan.de>
+
+After several recent works [1,2,3] KASLR on x86_64 was basically
+considered dead by many researchers. We have been working on an
+efficient but effective fix for this problem and found that not mapping
+the kernel space when running in user mode is the solution to this
+problem [4] (the corresponding paper [5] will be presented at ESSoS17).
+
+With this RFC patch we allow anybody to configure their kernel with the
+flag CONFIG_KAISER to add our defense mechanism.
+
+If there are any questions we would love to answer them.
+We also appreciate any comments!
+
+Cheers,
+Daniel (+ the KAISER team from Graz University of Technology)
+
+[1] http://www.ieee-security.org/TC/SP2013/papers/4977a191.pdf
+[2] https://www.blackhat.com/docs/us-16/materials/us-16-Fogh-Using-Undocumented-CPU-Behaviour-To-See-Into-Kernel-Mode-And-Break-KASLR-In-The-Process.pdf
+[3] https://www.blackhat.com/docs/us-16/materials/us-16-Jang-Breaking-Kernel-Address-Space-Layout-Randomization-KASLR-With-Intel-TSX.pdf
+[4] https://github.com/IAIK/KAISER
+[5] https://gruss.cc/files/kaiser.pdf
+
+(cherry picked from Change-Id: I0eb000c33290af01fc4454ca0c701d00f1d30b1d)
+
+Conflicts:
+arch/x86/entry/entry_64.S (not in this tree)
+arch/x86/kernel/entry_64.S (patched instead of that)
+arch/x86/entry/entry_64_compat.S (not in this tree)
+arch/x86/ia32/ia32entry.S (patched instead of that)
+arch/x86/include/asm/hw_irq.h
+arch/x86/include/asm/pgtable_types.h
+arch/x86/include/asm/processor.h
+arch/x86/kernel/irqinit.c
+arch/x86/kernel/process.c
+arch/x86/mm/Makefile
+arch/x86/mm/pgtable.c
+init/main.c
+
+Signed-off-by: Hugh Dickins <hughd at google.com>
+[bwh: Folded in the follow-up patches from Hugh:
+ - kaiser: merged update
+ - kaiser: do not set _PAGE_NX on pgd_none
+ - kaiser: stack map PAGE_SIZE at THREAD_SIZE-PAGE_SIZE
+ - kaiser: fix build and FIXME in alloc_ldt_struct()
+ - kaiser: KAISER depends on SMP
+ - kaiser: fix regs to do_nmi() ifndef CONFIG_KAISER
+ - kaiser: fix perf crashes
+ - kaiser: ENOMEM if kaiser_pagetable_walk() NULL
+ - kaiser: tidied up asm/kaiser.h somewhat
+ - kaiser: tidied up kaiser_add/remove_mapping slightly
+ - kaiser: kaiser_remove_mapping() move along the pgd
+ - kaiser: align addition to x86/mm/Makefile
+ - kaiser: cleanups while trying for gold link
+ - kaiser: name that 0x1000 KAISER_SHADOW_PGD_OFFSET
+ - kaiser: delete KAISER_REAL_SWITCH option
+ - kaiser: vmstat show NR_KAISERTABLE as nr_overhead
+ - kaiser: enhanced by kernel and user PCIDs
+ - kaiser: load_new_mm_cr3() let SWITCH_USER_CR3 flush user
+ - kaiser: PCID 0 for kernel and 128 for user
+ - kaiser: x86_cr3_pcid_noflush and x86_cr3_pcid_user
+ - kaiser: paranoid_entry pass cr3 need to paranoid_exit
+ - kaiser: _pgd_alloc() without __GFP_REPEAT to avoid stalls
+ - kaiser: fix unlikely error in alloc_ldt_struct()
+ - kaiser: drop is_atomic arg to kaiser_pagetable_walk()
+ Backported to 3.16:
+ - Add missing #include in arch/x86/mm/kaiser.c
+ - Use variable PEBS buffer size since we have "perf/x86/intel: Use PAGE_SIZE
+   for PEBS buffer size on Core2"
+ - Renumber X86_FEATURE_INVPCID_SINGLE to avoid collision
+ - Adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/boot/compressed/misc.h             |   1 +
+ arch/x86/ia32/ia32entry.S                   |   7 +
+ arch/x86/include/asm/cpufeature.h           |   1 +
+ arch/x86/include/asm/desc.h                 |   2 +-
+ arch/x86/include/asm/hw_irq.h               |   2 +-
+ arch/x86/include/asm/kaiser.h               | 126 +++++++++
+ arch/x86/include/asm/pgtable.h              |  18 +-
+ arch/x86/include/asm/pgtable_64.h           |  30 ++-
+ arch/x86/include/asm/pgtable_types.h        |  33 ++-
+ arch/x86/include/asm/processor.h            |   4 +-
+ arch/x86/include/asm/tlbflush.h             |  66 ++++-
+ arch/x86/include/uapi/asm/processor-flags.h |   3 +-
+ arch/x86/kernel/cpu/common.c                |  18 +-
+ arch/x86/kernel/cpu/perf_event_intel_ds.c   |  56 +++-
+ arch/x86/kernel/entry_64.S                  | 188 +++++++++++--
+ arch/x86/kernel/espfix_64.c                 |   9 +
+ arch/x86/kernel/head_64.S                   |  31 ++-
+ arch/x86/kernel/irqinit.c                   |   2 +-
+ arch/x86/kernel/ldt.c                       |  25 +-
+ arch/x86/kernel/process.c                   |   2 +-
+ arch/x86/kernel/process_64.c                |   2 +-
+ arch/x86/kernel/tracepoint.c                |   2 +
+ arch/x86/kvm/x86.c                          |   3 +-
+ arch/x86/mm/Makefile                        |   1 +
+ arch/x86/mm/kaiser.c                        | 399 ++++++++++++++++++++++++++++
+ arch/x86/mm/pageattr.c                      |  63 +++--
+ arch/x86/mm/pgtable.c                       |  31 ++-
+ arch/x86/mm/tlb.c                           |  41 ++-
+ include/asm-generic/vmlinux.lds.h           |   7 +
+ include/linux/kaiser.h                      |  52 ++++
+ include/linux/mmzone.h                      |   3 +-
+ include/linux/percpu-defs.h                 |  32 ++-
+ init/main.c                                 |   2 +
+ kernel/fork.c                               |   6 +
+ mm/vmstat.c                                 |   1 +
+ security/Kconfig                            |  10 +
+ 36 files changed, 1188 insertions(+), 91 deletions(-)
+ create mode 100644 arch/x86/include/asm/kaiser.h
+ create mode 100644 arch/x86/mm/kaiser.c
+ create mode 100644 include/linux/kaiser.h
+
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -9,6 +9,7 @@
+  */
+ #undef CONFIG_PARAVIRT
+ #undef CONFIG_PARAVIRT_SPINLOCKS
++#undef CONFIG_KAISER
+ 
+ #include <linux/linkage.h>
+ #include <linux/screen_info.h>
+--- a/arch/x86/ia32/ia32entry.S
++++ b/arch/x86/ia32/ia32entry.S
+@@ -15,6 +15,8 @@
+ #include <asm/irqflags.h>
+ #include <asm/asm.h>
+ #include <asm/smap.h>
++#include <asm/pgtable_types.h>
++#include <asm/kaiser.h>
+ #include <linux/linkage.h>
+ #include <linux/err.h>
+ 
+@@ -121,6 +123,7 @@ ENTRY(ia32_sysenter_target)
+ 	CFI_DEF_CFA	rsp,0
+ 	CFI_REGISTER	rsp,rbp
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	movq	PER_CPU_VAR(kernel_stack), %rsp
+ 	addq	$(KERNEL_STACK_OFFSET),%rsp
+ 	/*
+@@ -192,6 +195,7 @@ sysexit_from_sys_call:
+ 	popq_cfi %rcx				/* User %esp */
+ 	CFI_REGISTER rsp,rcx
+ 	TRACE_IRQS_ON
++	SWITCH_USER_CR3
+ 	ENABLE_INTERRUPTS_SYSEXIT32
+ 
+ 	CFI_RESTORE_STATE
+@@ -296,6 +300,7 @@ ENTRY(ia32_cstar_target)
+ 	CFI_REGISTER	rip,rcx
+ 	/*CFI_REGISTER	rflags,r11*/
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	movl	%esp,%r8d
+ 	CFI_REGISTER	rsp,r8
+ 	movq	PER_CPU_VAR(kernel_stack),%rsp
+@@ -350,6 +355,7 @@ sysretl_from_sys_call:
+ 	xorq	%r9,%r9
+ 	xorq	%r8,%r8
+ 	TRACE_IRQS_ON
++	SWITCH_USER_CR3
+ 	movl RSP-ARGOFFSET(%rsp),%esp
+ 	CFI_RESTORE rsp
+ 	USERGS_SYSRET32
+@@ -424,6 +430,7 @@ ENTRY(ia32_syscall)
+ 	PARAVIRT_ADJUST_EXCEPTION_FRAME
+ 	ASM_CLAC			/* Do this early to minimize exposure */
+ 	SWAPGS
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	/*
+ 	 * No need to follow this irqs on/off section: the syscall
+ 	 * disabled irqs and here we enable it straight after entry:
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -186,6 +186,7 @@
+ #define X86_FEATURE_DTHERM	(7*32+ 7) /* Digital Thermal Sensor */
+ #define X86_FEATURE_HW_PSTATE	(7*32+ 8) /* AMD HW-PState */
+ #define X86_FEATURE_PROC_FEEDBACK (7*32+ 9) /* AMD ProcFeedbackInterface */
++#define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  (8*32+ 0) /* Intel TPR Shadow */
+--- a/arch/x86/include/asm/desc.h
++++ b/arch/x86/include/asm/desc.h
+@@ -43,7 +43,7 @@ struct gdt_page {
+ 	struct desc_struct gdt[GDT_ENTRIES];
+ } __attribute__((aligned(PAGE_SIZE)));
+ 
+-DECLARE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page);
++DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page);
+ 
+ static inline struct desc_struct *get_cpu_gdt_table(unsigned int cpu)
+ {
+--- a/arch/x86/include/asm/hw_irq.h
++++ b/arch/x86/include/asm/hw_irq.h
+@@ -194,7 +194,7 @@ extern void (*__initconst interrupt[NR_V
+ #define VECTOR_RETRIGGERED	(-2)
+ 
+ typedef int vector_irq_t[NR_VECTORS];
+-DECLARE_PER_CPU(vector_irq_t, vector_irq);
++DECLARE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq);
+ extern void setup_vector_irq(int cpu);
+ 
+ #ifdef CONFIG_X86_IO_APIC
+--- /dev/null
++++ b/arch/x86/include/asm/kaiser.h
+@@ -0,0 +1,126 @@
++#ifndef _ASM_X86_KAISER_H
++#define _ASM_X86_KAISER_H
++
++#include <uapi/asm/processor-flags.h> /* For PCID constants */
++
++/*
++ * This file includes the definitions for the KAISER feature.
++ * KAISER is a counter measure against x86_64 side channel attacks on
++ * the kernel virtual memory.  It has a shadow pgd for every process: the
++ * shadow pgd has a minimalistic kernel-set mapped, but includes the whole
++ * user memory. Within a kernel context switch, or when an interrupt is handled,
++ * the pgd is switched to the normal one. When the system switches to user mode,
++ * the shadow pgd is enabled. By this, the virtual memory caches are freed,
++ * and the user may not attack the whole kernel memory.
++ *
++ * A minimalistic kernel mapping holds the parts needed to be mapped in user
++ * mode, such as the entry/exit functions of the user space, or the stacks.
++ */
++
++#define KAISER_SHADOW_PGD_OFFSET 0x1000
++
++#ifdef __ASSEMBLY__
++#ifdef CONFIG_KAISER
++
++.macro _SWITCH_TO_KERNEL_CR3 reg
++movq %cr3, \reg
++andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
++orq  x86_cr3_pcid_noflush, \reg
++movq \reg, %cr3
++.endm
++
++.macro _SWITCH_TO_USER_CR3 reg regb
++/*
++ * regb must be the low byte portion of reg: because we have arranged
++ * for the low byte of the user PCID to serve as the high byte of NOFLUSH
++ * (0x80 for each when PCID is enabled, or 0x00 when PCID and NOFLUSH are
++ * not enabled): so that the one register can update both memory and cr3.
++ */
++movq %cr3, \reg
++orq  PER_CPU_VAR(x86_cr3_pcid_user), \reg
++js   9f
++/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
++9:
++movq \reg, %cr3
++.endm
++
++.macro SWITCH_KERNEL_CR3
++pushq %rax
++_SWITCH_TO_KERNEL_CR3 %rax
++popq %rax
++.endm
++
++.macro SWITCH_USER_CR3
++pushq %rax
++_SWITCH_TO_USER_CR3 %rax %al
++popq %rax
++.endm
++
++.macro SWITCH_KERNEL_CR3_NO_STACK
++movq %rax, PER_CPU_VAR(unsafe_stack_register_backup)
++_SWITCH_TO_KERNEL_CR3 %rax
++movq PER_CPU_VAR(unsafe_stack_register_backup), %rax
++.endm
++
++#else /* CONFIG_KAISER */
++
++.macro SWITCH_KERNEL_CR3 reg
++.endm
++.macro SWITCH_USER_CR3 reg regb
++.endm
++.macro SWITCH_KERNEL_CR3_NO_STACK
++.endm
++
++#endif /* CONFIG_KAISER */
++
++#else /* __ASSEMBLY__ */
++
++#ifdef CONFIG_KAISER
++/*
++ * Upon kernel/user mode switch, it may happen that the address
++ * space has to be switched before the registers have been
++ * stored.  To change the address space, another register is
++ * needed.  A register therefore has to be stored/restored.
++*/
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++extern unsigned long x86_cr3_pcid_noflush;
++DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
++
++extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
++
++/**
++ *  kaiser_add_mapping - map a virtual memory part to the shadow (user) mapping
++ *  @addr: the start address of the range
++ *  @size: the size of the range
++ *  @flags: The mapping flags of the pages
++ *
++ *  The mapping is done on a global scope, so no bigger
++ *  synchronization has to be done.  the pages have to be
++ *  manually unmapped again when they are not needed any longer.
++ */
++extern int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags);
++
++/**
++ *  kaiser_remove_mapping - unmap a virtual memory part of the shadow mapping
++ *  @addr: the start address of the range
++ *  @size: the size of the range
++ */
++extern void kaiser_remove_mapping(unsigned long start, unsigned long size);
++
++/**
++ *  kaiser_init - Initialize the shadow mapping
++ *
++ *  Most parts of the shadow mapping can be mapped upon boot
++ *  time.  Only per-process things like the thread stacks
++ *  or a new LDT have to be mapped at runtime.  These boot-
++ *  time mappings are permanent and never unmapped.
++ */
++extern void kaiser_init(void);
++
++#endif /* CONFIG_KAISER */
++
++#endif /* __ASSEMBLY */
++
++#endif /* _ASM_X86_KAISER_H */
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -641,7 +641,17 @@ static inline pud_t *pud_offset(pgd_t *p
+ 
+ static inline int pgd_bad(pgd_t pgd)
+ {
+-	return (pgd_flags(pgd) & ~_PAGE_USER) != _KERNPG_TABLE;
++	pgdval_t ignore_flags = _PAGE_USER;
++	/*
++	 * We set NX on KAISER pgds that map userspace memory so
++	 * that userspace can not meaningfully use the kernel
++	 * page table by accident; it will fault on the first
++	 * instruction it tries to run.  See native_set_pgd().
++	 */
++	if (IS_ENABLED(CONFIG_KAISER))
++		ignore_flags |= _PAGE_NX;
++
++	return (pgd_flags(pgd) & ~ignore_flags) != _KERNPG_TABLE;
+ }
+ 
+ static inline int pgd_none(pgd_t pgd)
+@@ -844,6 +854,12 @@ static inline void pmdp_set_wrprotect(st
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+        memcpy(dst, src, count * sizeof(pgd_t));
++#ifdef CONFIG_KAISER
++	/* Clone the shadow pgd part as well */
++	memcpy(native_get_shadow_pgd(dst),
++	       native_get_shadow_pgd(src),
++	       count * sizeof(pgd_t));
++#endif
+ }
+ 
+ #define PTE_SHIFT ilog2(PTRS_PER_PTE)
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,9 +106,37 @@ static inline void native_pud_clear(pud_
+ 	native_set_pud(pud, native_make_pud(0));
+ }
+ 
++#ifdef CONFIG_KAISER
++extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
++
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
++{
++	return (pgd_t *)((unsigned long)pgdp | (unsigned long)PAGE_SIZE);
++}
++
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
++{
++	return (pgd_t *)((unsigned long)pgdp & ~(unsigned long)PAGE_SIZE);
++}
++#else
++static inline pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++	return pgd;
++}
++static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
++{
++	BUILD_BUG_ON(1);
++	return NULL;
++}
++static inline pgd_t *native_get_normal_pgd(pgd_t *pgdp)
++{
++	return pgdp;
++}
++#endif /* CONFIG_KAISER */
++
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+-	*pgdp = pgd;
++	*pgdp = kaiser_set_shadow_pgd(pgdp, pgd);
+ }
+ 
+ static inline void native_pgd_clear(pgd_t *pgd)
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -50,7 +50,11 @@
+ #define _PAGE_ACCESSED	(_AT(pteval_t, 1) << _PAGE_BIT_ACCESSED)
+ #define _PAGE_DIRTY	(_AT(pteval_t, 1) << _PAGE_BIT_DIRTY)
+ #define _PAGE_PSE	(_AT(pteval_t, 1) << _PAGE_BIT_PSE)
++#ifdef CONFIG_KAISER
++#define _PAGE_GLOBAL	(_AT(pteval_t, 0))
++#else
+ #define _PAGE_GLOBAL	(_AT(pteval_t, 1) << _PAGE_BIT_GLOBAL)
++#endif
+ #define _PAGE_SOFTW1	(_AT(pteval_t, 1) << _PAGE_BIT_SOFTW1)
+ #define _PAGE_IOMAP	(_AT(pteval_t, 1) << _PAGE_BIT_IOMAP)
+ #define _PAGE_PAT	(_AT(pteval_t, 1) << _PAGE_BIT_PAT)
+@@ -116,7 +120,7 @@
+ #endif
+ 
+ #define _PAGE_FILE	(_AT(pteval_t, 1) << _PAGE_BIT_FILE)
+-#define _PAGE_PROTNONE	(_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
++#define _PAGE_PROTNONE  (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE)
+ 
+ #define _PAGE_TABLE	(_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |	\
+ 			 _PAGE_ACCESSED | _PAGE_DIRTY)
+@@ -129,6 +133,33 @@
+ 			 _PAGE_SOFT_DIRTY | _PAGE_NUMA)
+ #define _HPAGE_CHG_MASK (_PAGE_CHG_MASK | _PAGE_PSE | _PAGE_NUMA)
+ 
++/* The ASID is the lower 12 bits of CR3 */
++#define X86_CR3_PCID_ASID_MASK  (_AC((1<<12)-1,UL))
++
++/* Mask for all the PCID-related bits in CR3: */
++#define X86_CR3_PCID_MASK       (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
++#define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
++
++#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
++/* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
++#define X86_CR3_PCID_ASID_USER	(_AC(0x80,UL))
++
++#define X86_CR3_PCID_KERN_FLUSH		(X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_FLUSH		(X86_CR3_PCID_ASID_USER)
++#define X86_CR3_PCID_KERN_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_KERN)
++#define X86_CR3_PCID_USER_NOFLUSH	(X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_USER)
++#else
++#define X86_CR3_PCID_ASID_USER  (_AC(0x0,UL))
++/*
++ * PCIDs are unsupported on 32-bit and none of these bits can be
++ * set in CR3:
++ */
++#define X86_CR3_PCID_KERN_FLUSH		(0)
++#define X86_CR3_PCID_USER_FLUSH		(0)
++#define X86_CR3_PCID_KERN_NOFLUSH	(0)
++#define X86_CR3_PCID_USER_NOFLUSH	(0)
++#endif
++
+ #define _PAGE_CACHE_MASK	(_PAGE_PCD | _PAGE_PWT)
+ #define _PAGE_CACHE_WB		(0)
+ #define _PAGE_CACHE_WC		(_PAGE_PWT)
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -282,7 +282,7 @@ struct tss_struct {
+ 
+ } ____cacheline_aligned;
+ 
+-DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss);
++DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, init_tss);
+ 
+ /*
+  * Save the original ist values for checking stack pointers during debugging
+@@ -929,7 +929,7 @@ extern unsigned long KSTK_ESP(struct tas
+ /*
+  * User space RSP while inside the SYSCALL fast path
+  */
+-DECLARE_PER_CPU(unsigned long, old_rsp);
++DECLARE_PER_CPU_USER_MAPPED(unsigned long, old_rsp);
+ 
+ #endif /* CONFIG_X86_64 */
+ 
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -64,14 +64,40 @@ static inline void invpcid_flush_all_non
+ #define __flush_tlb_single(addr) __native_flush_tlb_single(addr)
+ #endif
+ 
++/*
++ * Declare a couple of kaiser interfaces here for convenience,
++ * to avoid the need for asm/kaiser.h in unexpected places.
++ */
++#ifdef CONFIG_KAISER
++extern void kaiser_setup_pcid(void);
++extern void kaiser_flush_tlb_on_return_to_user(void);
++#else
++static inline void kaiser_setup_pcid(void)
++{
++}
++static inline void kaiser_flush_tlb_on_return_to_user(void)
++{
++}
++#endif
++
+ static inline void __native_flush_tlb(void)
+ {
++	if (this_cpu_has(X86_FEATURE_INVPCID)) {
++		/*
++		 * Note, this works with CR4.PCIDE=0 or 1.
++		 */
++		invpcid_flush_all_nonglobals();
++		return;
++	}
++
+ 	/*
+ 	 * If current->mm == NULL then we borrow a mm which may change during a
+ 	 * task switch and therefore we must not be preempted while we write CR3
+ 	 * back:
+ 	 */
+ 	preempt_disable();
++	if (this_cpu_has(X86_FEATURE_PCID))
++		kaiser_flush_tlb_on_return_to_user();
+ 	native_write_cr3(native_read_cr3());
+ 	preempt_enable();
+ }
+@@ -89,12 +115,18 @@ static inline void __native_flush_tlb_gl
+ 
+ static inline void __native_flush_tlb_global(void)
+ {
++#ifdef CONFIG_KAISER
++	/* Globals are not used at all */
++	__native_flush_tlb();
++#else
+ 	unsigned long flags;
+ 
+-	if (static_cpu_has(X86_FEATURE_INVPCID)) {
++	if (this_cpu_has(X86_FEATURE_INVPCID)) {
+ 		/*
+ 		 * Using INVPCID is considerably faster than a pair of writes
+ 		 * to CR4 sandwiched inside an IRQ flag save/restore.
++		 *
++	 	 * Note, this works with CR4.PCIDE=0 or 1.
+ 		 */
+ 		invpcid_flush_all();
+ 		return;
+@@ -106,15 +138,41 @@ static inline void __native_flush_tlb_gl
+ 	 * be called from deep inside debugging code.)
+ 	 */
+ 	raw_local_irq_save(flags);
+-
+ 	__native_flush_tlb_global_irq_disabled();
+-
+ 	raw_local_irq_restore(flags);
++#endif
+ }
+ 
+ static inline void __native_flush_tlb_single(unsigned long addr)
+ {
+-	asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++	/*
++	 * SIMICS #GP's if you run INVPCID with type 2/3
++	 * and X86_CR4_PCIDE clear.  Shame!
++	 *
++	 * The ASIDs used below are hard-coded.  But, we must not
++	 * call invpcid(type=1/2) before CR4.PCIDE=1.  Just call
++	 * invlpg in the case we are called early.
++	 */
++
++	if (!this_cpu_has(X86_FEATURE_INVPCID_SINGLE)) {
++		if (this_cpu_has(X86_FEATURE_PCID))
++			kaiser_flush_tlb_on_return_to_user();
++		asm volatile("invlpg (%0)" ::"r" (addr) : "memory");
++		return;
++	}
++	/* Flush the address out of both PCIDs. */
++	/*
++	 * An optimization here might be to determine addresses
++	 * that are only kernel-mapped and only flush the kernel
++	 * ASID.  But, userspace flushes are probably much more
++	 * important performance-wise.
++	 *
++	 * Make sure to do only a single invpcid when KAISER is
++	 * disabled and we have only a single ASID.
++	 */
++	if (X86_CR3_PCID_ASID_KERN != X86_CR3_PCID_ASID_USER)
++		invpcid_flush_one(X86_CR3_PCID_ASID_KERN, addr);
++	invpcid_flush_one(X86_CR3_PCID_ASID_USER, addr);
+ }
+ 
+ static inline void __flush_tlb_all(void)
+--- a/arch/x86/include/uapi/asm/processor-flags.h
++++ b/arch/x86/include/uapi/asm/processor-flags.h
+@@ -79,7 +79,8 @@
+ #define X86_CR3_PWT		_BITUL(X86_CR3_PWT_BIT)
+ #define X86_CR3_PCD_BIT		4 /* Page Cache Disable */
+ #define X86_CR3_PCD		_BITUL(X86_CR3_PCD_BIT)
+-#define X86_CR3_PCID_MASK	_AC(0x00000fff,UL) /* PCID Mask */
++#define X86_CR3_PCID_NOFLUSH_BIT 63 /* Preserve old PCID */
++#define X86_CR3_PCID_NOFLUSH    _BITULL(X86_CR3_PCID_NOFLUSH_BIT)
+ 
+ /*
+  * Intel CPU features in CR4
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -90,7 +90,7 @@ static const struct cpu_dev default_cpu
+ 
+ static const struct cpu_dev *this_cpu = &default_cpu;
+ 
+-DEFINE_PER_CPU_PAGE_ALIGNED(struct gdt_page, gdt_page) = { .gdt = {
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(struct gdt_page, gdt_page) = { .gdt = {
+ #ifdef CONFIG_X86_64
+ 	/*
+ 	 * We need valid kernel segments for data and code in long mode too
+@@ -335,6 +335,19 @@ static void setup_pcid(struct cpuinfo_x8
+ 	if (cpu_has(c, X86_FEATURE_PCID)) {
+ 		if (cpu_has(c, X86_FEATURE_PGE)) {
+ 			set_in_cr4(X86_CR4_PCIDE);
++			/*
++			 * INVPCID has two "groups" of types:
++			 * 1/2: Invalidate an individual address
++			 * 3/4: Invalidate all contexts
++			 *
++			 * 1/2 take a PCID, but 3/4 do not.  So, 3/4
++			 * ignore the PCID argument in the descriptor.
++			 * But, we have to be careful not to call 1/2
++			 * with an actual non-zero PCID in them before
++			 * we do the above cr4_set_bits().
++			 */
++			if (cpu_has(c, X86_FEATURE_INVPCID))
++				set_cpu_cap(c, X86_FEATURE_INVPCID_SINGLE);
+ 		} else {
+ 			/*
+ 			 * flush_tlb_all(), as currently implemented, won't
+@@ -347,6 +360,7 @@ static void setup_pcid(struct cpuinfo_x8
+ 			clear_cpu_cap(c, X86_FEATURE_PCID);
+ 		}
+ 	}
++	kaiser_setup_pcid();
+ }
+ 
+ /*
+@@ -1207,7 +1221,7 @@ static const unsigned int exception_stac
+ 	  [DEBUG_STACK - 1]			= DEBUG_STKSZ
+ };
+ 
+-static DEFINE_PER_CPU_PAGE_ALIGNED(char, exception_stacks
++DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(char, exception_stacks
+ 	[(N_EXCEPTION_STACKS - 1) * EXCEPTION_STKSZ + DEBUG_STKSZ]);
+ 
+ /* May not be marked __init: used by software suspend */
+--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
++++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
+@@ -2,11 +2,15 @@
+ #include <linux/types.h>
+ #include <linux/slab.h>
+ 
++#include <asm/kaiser.h>
+ #include <asm/perf_event.h>
+ #include <asm/insn.h>
+ 
+ #include "perf_event.h"
+ 
++static
++DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct debug_store, cpu_debug_store);
++
+ /* The size of a BTS record in bytes: */
+ #define BTS_RECORD_SIZE		24
+ 
+@@ -256,6 +260,39 @@ void fini_debug_store_on_cpu(int cpu)
+ 
+ static DEFINE_PER_CPU(void *, insn_buffer);
+ 
++static void *dsalloc(size_t size, gfp_t flags, int node)
++{
++#ifdef CONFIG_KAISER
++	unsigned int order = get_order(size);
++	struct page *page;
++	unsigned long addr;
++
++	page = alloc_pages_node(node, flags | __GFP_ZERO, order);
++	if (!page)
++		return NULL;
++	addr = (unsigned long)page_address(page);
++	if (kaiser_add_mapping(addr, size, __PAGE_KERNEL) < 0) {
++		__free_pages(page, order);
++		addr = 0;
++	}
++	return (void *)addr;
++#else
++	return kmalloc_node(size, flags | __GFP_ZERO, node);
++#endif
++}
++
++static void dsfree(const void *buffer, size_t size)
++{
++#ifdef CONFIG_KAISER
++	if (!buffer)
++		return;
++	kaiser_remove_mapping((unsigned long)buffer, size);
++	free_pages((unsigned long)buffer, get_order(size));
++#else
++	kfree(buffer);
++#endif
++}
++
+ static int alloc_pebs_buffer(int cpu)
+ {
+ 	struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds;
+@@ -266,7 +303,7 @@ static int alloc_pebs_buffer(int cpu)
+ 	if (!x86_pmu.pebs)
+ 		return 0;
+ 
+-	buffer = kzalloc_node(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
++	buffer = dsalloc(x86_pmu.pebs_buffer_size, GFP_KERNEL, node);
+ 	if (unlikely(!buffer))
+ 		return -ENOMEM;
+ 
+@@ -277,7 +314,7 @@ static int alloc_pebs_buffer(int cpu)
+ 	if (x86_pmu.intel_cap.pebs_format < 2) {
+ 		ibuffer = kzalloc_node(PEBS_FIXUP_SIZE, GFP_KERNEL, node);
+ 		if (!ibuffer) {
+-			kfree(buffer);
++			dsfree(buffer, x86_pmu.pebs_buffer_size);
+ 			return -ENOMEM;
+ 		}
+ 		per_cpu(insn_buffer, cpu) = ibuffer;
+@@ -306,7 +343,7 @@ static void release_pebs_buffer(int cpu)
+ 	kfree(per_cpu(insn_buffer, cpu));
+ 	per_cpu(insn_buffer, cpu) = NULL;
+ 
+-	kfree((void *)(unsigned long)ds->pebs_buffer_base);
++	dsfree((void *)(unsigned long)ds->pebs_buffer_base, x86_pmu.pebs_buffer_size);
+ 	ds->pebs_buffer_base = 0;
+ }
+ 
+@@ -320,7 +357,7 @@ static int alloc_bts_buffer(int cpu)
+ 	if (!x86_pmu.bts)
+ 		return 0;
+ 
+-	buffer = kzalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
++	buffer = dsalloc(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_NOWARN, node);
+ 	if (unlikely(!buffer)) {
+ 		WARN_ONCE(1, "%s: BTS buffer allocation failure\n", __func__);
+ 		return -ENOMEM;
+@@ -346,19 +383,15 @@ static void release_bts_buffer(int cpu)
+ 	if (!ds || !x86_pmu.bts)
+ 		return;
+ 
+-	kfree((void *)(unsigned long)ds->bts_buffer_base);
++	dsfree((void *)(unsigned long)ds->bts_buffer_base, BTS_BUFFER_SIZE);
+ 	ds->bts_buffer_base = 0;
+ }
+ 
+ static int alloc_ds_buffer(int cpu)
+ {
+-	int node = cpu_to_node(cpu);
+-	struct debug_store *ds;
+-
+-	ds = kzalloc_node(sizeof(*ds), GFP_KERNEL, node);
+-	if (unlikely(!ds))
+-		return -ENOMEM;
++	struct debug_store *ds = per_cpu_ptr(&cpu_debug_store, cpu);
+ 
++	memset(ds, 0, sizeof(*ds));
+ 	per_cpu(cpu_hw_events, cpu).ds = ds;
+ 
+ 	return 0;
+@@ -372,7 +405,6 @@ static void release_ds_buffer(int cpu)
+ 		return;
+ 
+ 	per_cpu(cpu_hw_events, cpu).ds = NULL;
+-	kfree(ds);
+ }
+ 
+ void release_ds_buffers(void)
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -58,6 +58,7 @@
+ #include <asm/context_tracking.h>
+ #include <asm/smap.h>
+ #include <asm/pgtable_types.h>
++#include <asm/kaiser.h>
+ #include <linux/err.h>
+ 
+ /* Avoid __ASSEMBLER__'ifying <linux/audit.h> just for this.  */
+@@ -263,6 +264,7 @@ ENDPROC(native_usergs_sysret64)
+ 	testl $3, CS-RBP(%rsi)
+ 	je 1f
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 	/*
+ 	 * irq_count is used to check if a CPU is already on an interrupt stack
+ 	 * or not. While this is essentially redundant with preempt_count it is
+@@ -284,6 +286,12 @@ ENDPROC(native_usergs_sysret64)
+ 	TRACE_IRQS_OFF
+ 	.endm
+ 
++/*
++ * Return: ebx=0: needs swapgs but not SWITCH_USER_CR3 in paranoid_exit
++ *         ebx=1: needs neither swapgs nor SWITCH_USER_CR3 in paranoid_exit
++ *         ebx=2: needs both swapgs and SWITCH_USER_CR3 in paranoid_exit
++ *         ebx=3: needs SWITCH_USER_CR3 but not swapgs in paranoid_exit
++ */
+ ENTRY(save_paranoid)
+ 	XCPT_FRAME 1 RDI+8
+ 	cld
+@@ -309,7 +317,25 @@ ENTRY(save_paranoid)
+ 	js 1f	/* negative -> in kernel */
+ 	SWAPGS
+ 	xorl %ebx,%ebx
+-1:	ret
++1:
++#ifdef CONFIG_KAISER
++	/*
++	 * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
++	 * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
++	 * Do a conditional SWITCH_KERNEL_CR3: this could safely be done
++	 * unconditionally, but we need to find out whether the reverse
++	 * should be done on return (conveyed to paranoid_exit in %ebx).
++	 */
++	movq	%cr3, %rax
++	testl	$KAISER_SHADOW_PGD_OFFSET, %eax
++	jz	2f
++	orl	$2, %ebx
++	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++	orq	x86_cr3_pcid_noflush, %rax
++	movq	%rax, %cr3
++2:
++#endif
++	ret
+ 	CFI_ENDPROC
+ END(save_paranoid)
+ 
+@@ -394,6 +420,7 @@ ENTRY(system_call)
+ 	CFI_REGISTER	rip,rcx
+ 	/*CFI_REGISTER	rflags,r11*/
+ 	SWAPGS_UNSAFE_STACK
++	SWITCH_KERNEL_CR3_NO_STACK
+ 	/*
+ 	 * A hypervisor implementation might want to use a label
+ 	 * after the swapgs, so that it can do the swapgs
+@@ -448,6 +475,14 @@ sysret_check:
+ 	CFI_REGISTER	rip,rcx
+ 	RESTORE_ARGS 1,-ARG_SKIP,0
+ 	/*CFI_REGISTER	rflags,r11*/
++	/*
++	 * This opens a window where we have a user CR3, but are
++	 * running in the kernel.  This makes using the CS
++	 * register useless for telling whether or not we need to
++	 * switch CR3 in NMIs.  Normal interrupts are OK because
++	 * they are off here.
++	 */
++	SWITCH_USER_CR3
+ 	movq	PER_CPU_VAR(old_rsp), %rsp
+ 	USERGS_SYSRET64
+ 
+@@ -820,6 +855,14 @@ retint_swapgs:		/* return to user-space
+ 	 */
+ 	DISABLE_INTERRUPTS(CLBR_ANY)
+ 	TRACE_IRQS_IRETQ
++	/*
++	 * This opens a window where we have a user CR3, but are
++	 * running in the kernel.  This makes using the CS
++	 * register useless for telling whether or not we need to
++	 * switch CR3 in NMIs.  Normal interrupts are OK because
++	 * they are off here.
++	 */
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	jmp restore_args
+ 
+@@ -860,6 +903,7 @@ native_irq_return_ldt:
+ 	pushq_cfi %rax
+ 	pushq_cfi %rdi
+ 	SWAPGS
++	SWITCH_KERNEL_CR3
+ 	movq PER_CPU_VAR(espfix_waddr),%rdi
+ 	movq %rax,(0*8)(%rdi)	/* RAX */
+ 	movq (2*8)(%rsp),%rax	/* RIP */
+@@ -875,6 +919,7 @@ native_irq_return_ldt:
+ 	andl $0xffff0000,%eax
+ 	popq_cfi %rdi
+ 	orq PER_CPU_VAR(espfix_stack),%rax
++	SWITCH_USER_CR3
+ 	SWAPGS
+ 	movq %rax,%rsp
+ 	popq_cfi %rax
+@@ -1289,30 +1334,40 @@ idtentry machine_check has_error_code=0
+ 	 * is fundamentally NMI-unsafe. (we cannot change the soft and
+ 	 * hard flags at once, atomically)
+ 	 */
+-
+-	/* ebx:	no swapgs flag */
++/*
++ * On entry: ebx=0: needs swapgs but not SWITCH_USER_CR3
++ *           ebx=1: needs neither swapgs nor SWITCH_USER_CR3
++ *           ebx=2: needs both swapgs and SWITCH_USER_CR3
++ *           ebx=3: needs SWITCH_USER_CR3 but not swapgs
++ */
+ ENTRY(paranoid_exit)
+ 	DEFAULT_FRAME
+ 	DISABLE_INTERRUPTS(CLBR_NONE)
+ 	TRACE_IRQS_OFF_DEBUG
+-	testl %ebx,%ebx				/* swapgs needed? */
+-	jnz paranoid_restore
+-	testl $3,CS(%rsp)
+-	jnz   paranoid_userspace
+-paranoid_swapgs:
+-	TRACE_IRQS_IRETQ 0
+-	SWAPGS_UNSAFE_STACK
+-	RESTORE_ALL 8
+-	jmp irq_return
+-paranoid_restore:
++	movq	%rbx, %r12		/* paranoid_userspace uses %ebx */
++	testl	$3, CS(%rsp)
++	jnz	paranoid_userspace
++paranoid_kernel:
++	movq	%r12, %rbx		/* restore after paranoid_userspace */
+ 	TRACE_IRQS_IRETQ_DEBUG 0
++#ifdef CONFIG_KAISER
++	testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */
++	jz	paranoid_exit_no_switch
++	SWITCH_USER_CR3
++paranoid_exit_no_switch:
++#endif
++	testl	$1, %ebx			/* swapgs needed? */
++	jnz	paranoid_exit_no_swapgs
++	SWAPGS_UNSAFE_STACK
++paranoid_exit_no_swapgs:
+ 	RESTORE_ALL 8
+-	jmp irq_return
++	jmp	irq_return
++
+ paranoid_userspace:
+ 	GET_THREAD_INFO(%rcx)
+ 	movl TI_flags(%rcx),%ebx
+ 	andl $_TIF_WORK_MASK,%ebx
+-	jz paranoid_swapgs
++	jz paranoid_kernel
+ 	movq %rsp,%rdi			/* &pt_regs */
+ 	call sync_regs
+ 	movq %rax,%rsp			/* switch stack for scheduling */
+@@ -1361,6 +1416,13 @@ ENTRY(error_entry)
+ 	movq_cfi r13, R13+8
+ 	movq_cfi r14, R14+8
+ 	movq_cfi r15, R15+8
++	/*
++	 * error_entry() always returns with a kernel gsbase and
++	 * CR3.  We must also have a kernel CR3/gsbase before
++	 * calling TRACE_IRQS_*.  Just unconditionally switch to
++	 * the kernel CR3 here.
++	 */
++	SWITCH_KERNEL_CR3
+ 	xorl %ebx,%ebx
+ 	testl $3,CS+8(%rsp)
+ 	je error_kernelspace
+@@ -1497,6 +1559,10 @@ ENTRY(nmi)
+ 	 */
+ 
+ 	SWAPGS_UNSAFE_STACK
++	/*
++	 * percpu variables are mapped with user CR3, so no need
++	 * to switch CR3 here.
++	 */
+ 	cld
+ 	movq	%rsp, %rdx
+ 	movq	PER_CPU_VAR(kernel_stack), %rsp
+@@ -1531,12 +1597,34 @@ ENTRY(nmi)
+ 
+ 	movq	%rsp, %rdi
+ 	movq	$-1, %rsi
++#ifdef CONFIG_KAISER
++	/* Unconditionally use kernel CR3 for do_nmi() */
++	/* %rax is saved above, so OK to clobber here */
++	movq	%cr3, %rax
++	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++	orq	x86_cr3_pcid_noflush, %rax
++	pushq	%rax
++	/* mask off "user" bit of pgd address and 12 PCID bits: */
++	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++	movq	%rax, %cr3
++#endif
+ 	call	do_nmi
+ 
++#ifdef CONFIG_KAISER
++	/*
++	 * Unconditionally restore CR3.  I know we return to
++	 * kernel code that needs user CR3, but do we ever return
++	 * to "user mode" where we need the kernel CR3?
++	 */
++	popq	%rax
++	mov	%rax, %cr3
++#endif
++
+ 	/*
+ 	 * Return back to user mode.  We must *not* do the normal exit
+-	 * work, because we don't want to enable interrupts.  Fortunately,
+-	 * do_nmi doesn't modify pt_regs.
++	 * work, because we don't want to enable interrupts.  Do not
++	 * switch to user CR3: we might be going back to kernel code
++	 * that had a user CR3 set.
+ 	 */
+ 	SWAPGS
+ 
+@@ -1746,23 +1834,69 @@ end_repeat_nmi:
+ 	subq $ORIG_RAX-R15, %rsp
+ 	CFI_ADJUST_CFA_OFFSET ORIG_RAX-R15
+ 	/*
+-	 * Use save_paranoid to handle SWAPGS, but no need to use paranoid_exit
+-	 * as we should not be calling schedule in NMI context.
+-	 * Even with normal interrupts enabled. An NMI should not be
+-	 * setting NEED_RESCHED or anything that normal interrupts and
+-	 * exceptions might do.
++	 * Use the same approach as save_paranoid to handle SWAPGS, but
++	 * without CR3 handling since we do that differently in NMIs.  No
++	 * need to use paranoid_exit as we should not be calling schedule
++	 * in NMI context.  Even with normal interrupts enabled. An NMI
++	 * should not be setting NEED_RESCHED or anything that normal
++	 * interrupts and exceptions might do.
+ 	 */
+-	call save_paranoid
+-	DEFAULT_FRAME 0
++	cld
++	movq	%rdi, RDI(%rsp)
++	movq	%rsi, RSI(%rsp)
++	movq_cfi rdx, RDX
++	movq_cfi rcx, RCX
++	movq_cfi rax, RAX
++	movq	%r8, R8(%rsp)
++	movq	%r9, R9(%rsp)
++	movq	%r10, R10(%rsp)
++	movq	%r11, R11(%rsp)
++	movq_cfi rbx, RBX
++	movq	%rbp, RBP(%rsp)
++	movq	%r12, R12(%rsp)
++	movq	%r13, R13(%rsp)
++	movq	%r14, R14(%rsp)
++	movq	%r15, R15(%rsp)
++	movl	$1, %ebx
++	movl	$MSR_GS_BASE, %ecx
++	rdmsr
++	testl	%edx, %edx
++	js	1f				/* negative -> in kernel */
++	SWAPGS
++	xorl %ebx,%ebx
++1:
++	movq	%rsp,%rdi
++	movq	$-1,%rsi
++#ifdef CONFIG_KAISER
++	/* Unconditionally use kernel CR3 for do_nmi() */
++	/* %rax is saved above, so OK to clobber here */
++	movq	%cr3, %rax
++	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
++	orq	x86_cr3_pcid_noflush, %rax
++	pushq	%rax
++	/* mask off "user" bit of pgd address and 12 PCID bits: */
++	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
++	movq	%rax, %cr3
++#endif
++	DEFAULT_FRAME 0		/* ???? */
+ 
+ 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+-	movq %rsp,%rdi
+-	movq $-1,%rsi
+-	call do_nmi
++	call	do_nmi
++
++#ifdef CONFIG_KAISER
++	/*
++	 * Unconditionally restore CR3.  We might be returning to
++	 * kernel code that needs user CR3, like just just before
++	 * a sysret.
++	 */
++	popq	%rax
++	mov	%rax, %cr3
++#endif
+ 
+ 	testl %ebx,%ebx				/* swapgs needed? */
+ 	jnz nmi_restore
+ nmi_swapgs:
++	/* We fixed up CR3 above, so no need to switch it here */
+ 	SWAPGS_UNSAFE_STACK
+ nmi_restore:
+ 
+--- a/arch/x86/kernel/espfix_64.c
++++ b/arch/x86/kernel/espfix_64.c
+@@ -41,6 +41,7 @@
+ #include <asm/pgalloc.h>
+ #include <asm/setup.h>
+ #include <asm/espfix.h>
++#include <asm/kaiser.h>
+ 
+ /*
+  * Note: we only need 6*8 = 48 bytes for the espfix stack, but round
+@@ -129,6 +130,14 @@ void __init init_espfix_bsp(void)
+ 	/* Install the espfix pud into the kernel page directory */
+ 	pgd_p = &init_level4_pgt[pgd_index(ESPFIX_BASE_ADDR)];
+ 	pgd_populate(&init_mm, pgd_p, (pud_t *)espfix_pud_page);
++	/*
++	 * Just copy the top-level PGD that is mapping the espfix
++	 * area to ensure it is mapped into the shadow user page
++	 * tables.
++	 */
++	if (IS_ENABLED(CONFIG_KAISER))
++		set_pgd(native_get_shadow_pgd(pgd_p),
++			__pgd(_KERNPG_TABLE | __pa((pud_t *)espfix_pud_page)));
+ 
+ 	/* Randomize the locations */
+ 	init_espfix_random();
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -441,6 +441,27 @@ early_idt_ripmsg:
+ 	.balign	PAGE_SIZE; \
+ GLOBAL(name)
+ 
++#ifdef CONFIG_KAISER
++/*
++ * Each PGD needs to be 8k long and 8k aligned.  We do not
++ * ever go out to userspace with these, so we do not
++ * strictly *need* the second page, but this allows us to
++ * have a single set_pgd() implementation that does not
++ * need to worry about whether it has 4k or 8k to work
++ * with.
++ *
++ * This ensures PGDs are 8k long:
++ */
++#define KAISER_USER_PGD_FILL	512
++/* This ensures they are 8k-aligned: */
++#define NEXT_PGD_PAGE(name) \
++	.balign 2 * PAGE_SIZE; \
++GLOBAL(name)
++#else
++#define NEXT_PGD_PAGE(name) NEXT_PAGE(name)
++#define KAISER_USER_PGD_FILL	0
++#endif
++
+ /* Automate the creation of 1 to 1 mapping pmd entries */
+ #define PMDS(START, PERM, COUNT)			\
+ 	i = 0 ;						\
+@@ -450,9 +471,10 @@ GLOBAL(name)
+ 	.endr
+ 
+ 	__INITDATA
+-NEXT_PAGE(early_level4_pgt)
++NEXT_PGD_PAGE(early_level4_pgt)
+ 	.fill	511,8,0
+ 	.quad	level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++	.fill	KAISER_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(early_dynamic_pgts)
+ 	.fill	512*EARLY_DYNAMIC_PAGE_TABLES,8,0
+@@ -460,16 +482,18 @@ NEXT_PAGE(early_dynamic_pgts)
+ 	.data
+ 
+ #ifndef CONFIG_XEN
+-NEXT_PAGE(init_level4_pgt)
++NEXT_PGD_PAGE(init_level4_pgt)
+ 	.fill	512,8,0
++	.fill	KAISER_USER_PGD_FILL,8,0
+ #else
+-NEXT_PAGE(init_level4_pgt)
++NEXT_PGD_PAGE(init_level4_pgt)
+ 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ 	.org    init_level4_pgt + L4_PAGE_OFFSET*8, 0
+ 	.quad   level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+ 	.org    init_level4_pgt + L4_START_KERNEL*8, 0
+ 	/* (2^48-(2*1024*1024*1024))/(2^39) = 511 */
+ 	.quad   level3_kernel_pgt - __START_KERNEL_map + _PAGE_TABLE
++	.fill	KAISER_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(level3_ident_pgt)
+ 	.quad	level2_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE
+@@ -480,6 +504,7 @@ NEXT_PAGE(level2_ident_pgt)
+ 	 */
+ 	PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
+ #endif
++	.fill	KAISER_USER_PGD_FILL,8,0
+ 
+ NEXT_PAGE(level3_kernel_pgt)
+ 	.fill	L3_START_KERNEL,8,0
+--- a/arch/x86/kernel/irqinit.c
++++ b/arch/x86/kernel/irqinit.c
+@@ -51,7 +51,7 @@ static struct irqaction irq2 = {
+ 	.flags = IRQF_NO_THREAD,
+ };
+ 
+-DEFINE_PER_CPU(vector_irq_t, vector_irq) = {
++DEFINE_PER_CPU_USER_MAPPED(vector_irq_t, vector_irq) = {
+ 	[0 ... NR_VECTORS - 1] = VECTOR_UNDEFINED,
+ };
+ 
+--- a/arch/x86/kernel/ldt.c
++++ b/arch/x86/kernel/ldt.c
+@@ -15,6 +15,7 @@
+ #include <linux/slab.h>
+ #include <linux/vmalloc.h>
+ #include <linux/uaccess.h>
++#include <linux/kaiser.h>
+ 
+ #include <asm/ldt.h>
+ #include <asm/desc.h>
+@@ -33,11 +34,21 @@ static void flush_ldt(void *current_mm)
+ 	set_ldt(pc->ldt->entries, pc->ldt->size);
+ }
+ 
++static void __free_ldt_struct(struct ldt_struct *ldt)
++{
++	if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
++		vfree(ldt->entries);
++	else
++		free_page((unsigned long)ldt->entries);
++	kfree(ldt);
++}
++
+ /* The caller must call finalize_ldt_struct on the result. LDT starts zeroed. */
+ static struct ldt_struct *alloc_ldt_struct(int size)
+ {
+ 	struct ldt_struct *new_ldt;
+ 	int alloc_size;
++	int ret;
+ 
+ 	if (size > LDT_ENTRIES)
+ 		return NULL;
+@@ -65,7 +76,13 @@ static struct ldt_struct *alloc_ldt_stru
+ 		return NULL;
+ 	}
+ 
++	ret = kaiser_add_mapping((unsigned long)new_ldt->entries, alloc_size,
++				 __PAGE_KERNEL);
+ 	new_ldt->size = size;
++	if (ret) {
++		__free_ldt_struct(new_ldt);
++		return NULL;
++	}
+ 	return new_ldt;
+ }
+ 
+@@ -91,12 +108,10 @@ static void free_ldt_struct(struct ldt_s
+ 	if (likely(!ldt))
+ 		return;
+ 
++	kaiser_remove_mapping((unsigned long)ldt->entries,
++			      ldt->size * LDT_ENTRY_SIZE);
+ 	paravirt_free_ldt(ldt->entries, ldt->size);
+-	if (ldt->size * LDT_ENTRY_SIZE > PAGE_SIZE)
+-		vfree(ldt->entries);
+-	else
+-		kfree(ldt->entries);
+-	kfree(ldt);
++	__free_ldt_struct(ldt);
+ }
+ 
+ /*
+--- a/arch/x86/kernel/process.c
++++ b/arch/x86/kernel/process.c
+@@ -37,7 +37,7 @@
+  * section. Since TSS's are completely CPU-local, we want them
+  * on exact cacheline boundaries, to eliminate cacheline ping-pong.
+  */
+-__visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, init_tss) = INIT_TSS;
++__visible DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(struct tss_struct, init_tss) = INIT_TSS;
+ 
+ #ifdef CONFIG_X86_64
+ static DEFINE_PER_CPU(unsigned char, is_idle);
+--- a/arch/x86/kernel/process_64.c
++++ b/arch/x86/kernel/process_64.c
+@@ -53,7 +53,7 @@
+ 
+ asmlinkage extern void ret_from_fork(void);
+ 
+-__visible DEFINE_PER_CPU(unsigned long, old_rsp);
++__visible DEFINE_PER_CPU_USER_MAPPED(unsigned long, old_rsp);
+ 
+ /* Prints also some state that isn't saved in the pt_regs */
+ void __show_regs(struct pt_regs *regs, int all)
+--- a/arch/x86/kernel/tracepoint.c
++++ b/arch/x86/kernel/tracepoint.c
+@@ -9,10 +9,12 @@
+ #include <linux/atomic.h>
+ 
+ atomic_t trace_idt_ctr = ATOMIC_INIT(0);
++__aligned(PAGE_SIZE)
+ struct desc_ptr trace_idt_descr = { NR_VECTORS * 16 - 1,
+ 				(unsigned long) trace_idt_table };
+ 
+ /* No need to be aligned, but done to keep all IDTs defined the same way. */
++__aligned(PAGE_SIZE)
+ gate_desc trace_idt_table[NR_VECTORS] __page_aligned_bss;
+ 
+ static int trace_irq_vector_refcount;
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -688,7 +688,8 @@ int kvm_set_cr4(struct kvm_vcpu *vcpu, u
+ 			return 1;
+ 
+ 		/* PCID can not be enabled when cr3[11:0]!=000H or EFER.LMA=0 */
+-		if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_MASK) || !is_long_mode(vcpu))
++		if ((kvm_read_cr3(vcpu) & X86_CR3_PCID_ASID_MASK) ||
++		    !is_long_mode(vcpu))
+ 			return 1;
+ 	}
+ 
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -29,3 +29,4 @@ obj-$(CONFIG_ACPI_NUMA)		+= srat.o
+ obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
+ 
+ obj-$(CONFIG_MEMTEST)		+= memtest.o
++obj-$(CONFIG_KAISER)		+= kaiser.o
+--- /dev/null
++++ b/arch/x86/mm/kaiser.c
+@@ -0,0 +1,399 @@
++#include <linux/bug.h>
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/string.h>
++#include <linux/types.h>
++#include <linux/bug.h>
++#include <linux/init.h>
++#include <linux/interrupt.h>
++#include <linux/spinlock.h>
++#include <linux/mm.h>
++#include <linux/uaccess.h>
++#include <linux/ftrace.h>
++
++extern struct mm_struct init_mm;
++
++#include <asm/kaiser.h>
++#include <asm/tlbflush.h>	/* to verify its kaiser declarations */
++#include <asm/pgtable.h>
++#include <asm/pgalloc.h>
++#include <asm/desc.h>
++
++#ifdef CONFIG_KAISER
++__visible
++DEFINE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
++
++/*
++ * These can have bit 63 set, so we can not just use a plain "or"
++ * instruction to get their value or'd into CR3.  It would take
++ * another register.  So, we use a memory reference to these instead.
++ *
++ * This is also handy because systems that do not support PCIDs
++ * just end up or'ing a 0 into their CR3, which does no harm.
++ */
++unsigned long x86_cr3_pcid_noflush __read_mostly;
++DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
++
++/*
++ * At runtime, the only things we map are some things for CPU
++ * hotplug, and stacks for new processes.  No two CPUs will ever
++ * be populating the same addresses, so we only need to ensure
++ * that we protect between two CPUs trying to allocate and
++ * populate the same page table page.
++ *
++ * Only take this lock when doing a set_p[4um]d(), but it is not
++ * needed for doing a set_pte().  We assume that only the *owner*
++ * of a given allocation will be doing this for _their_
++ * allocation.
++ *
++ * This ensures that once a system has been running for a while
++ * and there have been stacks all over and these page tables
++ * are fully populated, there will be no further acquisitions of
++ * this lock.
++ */
++static DEFINE_SPINLOCK(shadow_table_allocation_lock);
++
++/*
++ * Returns -1 on error.
++ */
++static inline unsigned long get_pa_from_mapping(unsigned long vaddr)
++{
++	pgd_t *pgd;
++	pud_t *pud;
++	pmd_t *pmd;
++	pte_t *pte;
++
++	pgd = pgd_offset_k(vaddr);
++	/*
++	 * We made all the kernel PGDs present in kaiser_init().
++	 * We expect them to stay that way.
++	 */
++	BUG_ON(pgd_none(*pgd));
++	/*
++	 * PGDs are either 512GB or 128TB on all x86_64
++	 * configurations.  We don't handle these.
++	 */
++	BUG_ON(pgd_large(*pgd));
++
++	pud = pud_offset(pgd, vaddr);
++	if (pud_none(*pud)) {
++		WARN_ON_ONCE(1);
++		return -1;
++	}
++
++	if (pud_large(*pud))
++		return (pud_pfn(*pud) << PAGE_SHIFT) | (vaddr & ~PUD_PAGE_MASK);
++
++	pmd = pmd_offset(pud, vaddr);
++	if (pmd_none(*pmd)) {
++		WARN_ON_ONCE(1);
++		return -1;
++	}
++
++	if (pmd_large(*pmd))
++		return (pmd_pfn(*pmd) << PAGE_SHIFT) | (vaddr & ~PMD_PAGE_MASK);
++
++	pte = pte_offset_kernel(pmd, vaddr);
++	if (pte_none(*pte)) {
++		WARN_ON_ONCE(1);
++		return -1;
++	}
++
++	return (pte_pfn(*pte) << PAGE_SHIFT) | (vaddr & ~PAGE_MASK);
++}
++
++/*
++ * This is a relatively normal page table walk, except that it
++ * also tries to allocate page tables pages along the way.
++ *
++ * Returns a pointer to a PTE on success, or NULL on failure.
++ */
++static pte_t *kaiser_pagetable_walk(unsigned long address)
++{
++	pmd_t *pmd;
++	pud_t *pud;
++	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
++	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
++
++	if (pgd_none(*pgd)) {
++		WARN_ONCE(1, "All shadow pgds should have been populated");
++		return NULL;
++	}
++	BUILD_BUG_ON(pgd_large(*pgd) != 0);
++
++	pud = pud_offset(pgd, address);
++	/* The shadow page tables do not use large mappings: */
++	if (pud_large(*pud)) {
++		WARN_ON(1);
++		return NULL;
++	}
++	if (pud_none(*pud)) {
++		unsigned long new_pmd_page = __get_free_page(gfp);
++		if (!new_pmd_page)
++			return NULL;
++		spin_lock(&shadow_table_allocation_lock);
++		if (pud_none(*pud)) {
++			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++			__inc_zone_page_state(virt_to_page((void *)
++						new_pmd_page), NR_KAISERTABLE);
++		} else
++			free_page(new_pmd_page);
++		spin_unlock(&shadow_table_allocation_lock);
++	}
++
++	pmd = pmd_offset(pud, address);
++	/* The shadow page tables do not use large mappings: */
++	if (pmd_large(*pmd)) {
++		WARN_ON(1);
++		return NULL;
++	}
++	if (pmd_none(*pmd)) {
++		unsigned long new_pte_page = __get_free_page(gfp);
++		if (!new_pte_page)
++			return NULL;
++		spin_lock(&shadow_table_allocation_lock);
++		if (pmd_none(*pmd)) {
++			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++			__inc_zone_page_state(virt_to_page((void *)
++						new_pte_page), NR_KAISERTABLE);
++		} else
++			free_page(new_pte_page);
++		spin_unlock(&shadow_table_allocation_lock);
++	}
++
++	return pte_offset_kernel(pmd, address);
++}
++
++int kaiser_add_user_map(const void *__start_addr, unsigned long size,
++			unsigned long flags)
++{
++	int ret = 0;
++	pte_t *pte;
++	unsigned long start_addr = (unsigned long )__start_addr;
++	unsigned long address = start_addr & PAGE_MASK;
++	unsigned long end_addr = PAGE_ALIGN(start_addr + size);
++	unsigned long target_address;
++
++	for (; address < end_addr; address += PAGE_SIZE) {
++		target_address = get_pa_from_mapping(address);
++		if (target_address == -1) {
++			ret = -EIO;
++			break;
++		}
++		pte = kaiser_pagetable_walk(address);
++		if (!pte) {
++			ret = -ENOMEM;
++			break;
++		}
++		if (pte_none(*pte)) {
++			set_pte(pte, __pte(flags | target_address));
++		} else {
++			pte_t tmp;
++			set_pte(&tmp, __pte(flags | target_address));
++			WARN_ON_ONCE(!pte_same(*pte, tmp));
++		}
++	}
++	return ret;
++}
++
++static int kaiser_add_user_map_ptrs(const void *start, const void *end, unsigned long flags)
++{
++	unsigned long size = end - start;
++
++	return kaiser_add_user_map(start, size, flags);
++}
++
++/*
++ * Ensure that the top level of the (shadow) page tables are
++ * entirely populated.  This ensures that all processes that get
++ * forked have the same entries.  This way, we do not have to
++ * ever go set up new entries in older processes.
++ *
++ * Note: we never free these, so there are no updates to them
++ * after this.
++ */
++static void __init kaiser_init_all_pgds(void)
++{
++	pgd_t *pgd;
++	int i = 0;
++
++	pgd = native_get_shadow_pgd(pgd_offset_k((unsigned long )0));
++	for (i = PTRS_PER_PGD / 2; i < PTRS_PER_PGD; i++) {
++		pgd_t new_pgd;
++		pud_t *pud = pud_alloc_one(&init_mm,
++					   PAGE_OFFSET + i * PGDIR_SIZE);
++		if (!pud) {
++			WARN_ON(1);
++			break;
++		}
++		inc_zone_page_state(virt_to_page(pud), NR_KAISERTABLE);
++		new_pgd = __pgd(_KERNPG_TABLE |__pa(pud));
++		/*
++		 * Make sure not to stomp on some other pgd entry.
++		 */
++		if (!pgd_none(pgd[i])) {
++			WARN_ON(1);
++			continue;
++		}
++		set_pgd(pgd + i, new_pgd);
++	}
++}
++
++#define kaiser_add_user_map_early(start, size, flags) do {	\
++	int __ret = kaiser_add_user_map(start, size, flags);	\
++	WARN_ON(__ret);						\
++} while (0)
++
++#define kaiser_add_user_map_ptrs_early(start, end, flags) do {		\
++	int __ret = kaiser_add_user_map_ptrs(start, end, flags);	\
++	WARN_ON(__ret);							\
++} while (0)
++
++/*
++ * If anything in here fails, we will likely die on one of the
++ * first kernel->user transitions and init will die.  But, we
++ * will have most of the kernel up by then and should be able to
++ * get a clean warning out of it.  If we BUG_ON() here, we run
++ * the risk of being before we have good console output.
++ */
++void __init kaiser_init(void)
++{
++	int cpu;
++
++	kaiser_init_all_pgds();
++
++	for_each_possible_cpu(cpu) {
++		void *percpu_vaddr = __per_cpu_user_mapped_start +
++				     per_cpu_offset(cpu);
++		unsigned long percpu_sz = __per_cpu_user_mapped_end -
++					  __per_cpu_user_mapped_start;
++		kaiser_add_user_map_early(percpu_vaddr, percpu_sz,
++					  __PAGE_KERNEL);
++	}
++
++	/*
++	 * Map the entry/exit text section, which is needed at
++	 * switches from user to and from kernel.
++	 */
++	kaiser_add_user_map_ptrs_early(__entry_text_start, __entry_text_end,
++				       __PAGE_KERNEL_RX);
++
++#if defined(CONFIG_FUNCTION_GRAPH_TRACER) || defined(CONFIG_KASAN)
++	kaiser_add_user_map_ptrs_early(__irqentry_text_start,
++				       __irqentry_text_end,
++				       __PAGE_KERNEL_RX);
++#endif
++	kaiser_add_user_map_early((void *)idt_descr.address,
++				  sizeof(gate_desc) * NR_VECTORS,
++				  __PAGE_KERNEL_RO);
++#ifdef CONFIG_TRACING
++	kaiser_add_user_map_early(&trace_idt_descr,
++				  sizeof(trace_idt_descr),
++				  __PAGE_KERNEL);
++	kaiser_add_user_map_early(&trace_idt_table,
++				  sizeof(gate_desc) * NR_VECTORS,
++				  __PAGE_KERNEL);
++#endif
++	kaiser_add_user_map_early(&debug_idt_descr, sizeof(debug_idt_descr),
++				  __PAGE_KERNEL);
++	kaiser_add_user_map_early(&debug_idt_table,
++				  sizeof(gate_desc) * NR_VECTORS,
++				  __PAGE_KERNEL);
++
++	kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
++				  sizeof(x86_cr3_pcid_noflush),
++				  __PAGE_KERNEL);
++}
++
++/* Add a mapping to the shadow mapping, and synchronize the mappings */
++int kaiser_add_mapping(unsigned long addr, unsigned long size, unsigned long flags)
++{
++	return kaiser_add_user_map((const void *)addr, size, flags);
++}
++
++void kaiser_remove_mapping(unsigned long start, unsigned long size)
++{
++	extern void unmap_pud_range_nofree(pgd_t *pgd,
++				unsigned long start, unsigned long end);
++	unsigned long end = start + size;
++	unsigned long addr, next;
++	pgd_t *pgd;
++
++	pgd = native_get_shadow_pgd(pgd_offset_k(start));
++	for (addr = start; addr < end; pgd++, addr = next) {
++		next = pgd_addr_end(addr, end);
++		unmap_pud_range_nofree(pgd, addr, next);
++	}
++}
++
++/*
++ * Page table pages are page-aligned.  The lower half of the top
++ * level is used for userspace and the top half for the kernel.
++ * This returns true for user pages that need to get copied into
++ * both the user and kernel copies of the page tables, and false
++ * for kernel pages that should only be in the kernel copy.
++ */
++static inline bool is_userspace_pgd(pgd_t *pgdp)
++{
++	return ((unsigned long)pgdp % PAGE_SIZE) < (PAGE_SIZE / 2);
++}
++
++pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd)
++{
++	/*
++	 * Do we need to also populate the shadow pgd?  Check _PAGE_USER to
++	 * skip cases like kexec and EFI which make temporary low mappings.
++	 */
++	if (pgd.pgd & _PAGE_USER) {
++		if (is_userspace_pgd(pgdp)) {
++			native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++			/*
++			 * Even if the entry is *mapping* userspace, ensure
++			 * that userspace can not use it.  This way, if we
++			 * get out to userspace running on the kernel CR3,
++			 * userspace will crash instead of running.
++			 */
++			pgd.pgd |= _PAGE_NX;
++		}
++	} else if (!pgd.pgd) {
++		/*
++		 * pgd_clear() cannot check _PAGE_USER, and is even used to
++		 * clear corrupted pgd entries: so just rely on cases like
++		 * kexec and EFI never to be using pgd_clear().
++		 */
++		if (!WARN_ON_ONCE((unsigned long)pgdp & PAGE_SIZE) &&
++		    is_userspace_pgd(pgdp))
++			native_get_shadow_pgd(pgdp)->pgd = pgd.pgd;
++	}
++	return pgd;
++}
++
++void kaiser_setup_pcid(void)
++{
++	unsigned long kern_cr3 = 0;
++	unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
++
++	if (this_cpu_has(X86_FEATURE_PCID)) {
++		kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++		user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
++	}
++	/*
++	 * These variables are used by the entry/exit
++	 * code to change PCID and pgd and TLB flushing.
++	 */
++	x86_cr3_pcid_noflush = kern_cr3;
++	this_cpu_write(x86_cr3_pcid_user, user_cr3);
++}
++
++/*
++ * Make a note that this cpu will need to flush USER tlb on return to user.
++ * Caller checks whether this_cpu_has(X86_FEATURE_PCID) before calling:
++ * if cpu does not, then the NOFLUSH bit will never have been set.
++ */
++void kaiser_flush_tlb_on_return_to_user(void)
++{
++	this_cpu_write(x86_cr3_pcid_user,
++			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
++}
++EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
++#endif /* CONFIG_KAISER */
+--- a/arch/x86/mm/pageattr.c
++++ b/arch/x86/mm/pageattr.c
+@@ -52,6 +52,7 @@ static DEFINE_SPINLOCK(cpa_lock);
+ #define CPA_FLUSHTLB 1
+ #define CPA_ARRAY 2
+ #define CPA_PAGES_ARRAY 4
++#define CPA_FREE_PAGETABLES 8
+ 
+ #ifdef CONFIG_PROC_FS
+ static unsigned long direct_pages_count[PG_LEVEL_NUM];
+@@ -672,10 +673,13 @@ static int split_large_page(struct cpa_d
+ 	return 0;
+ }
+ 
+-static bool try_to_free_pte_page(pte_t *pte)
++static bool try_to_free_pte_page(struct cpa_data *cpa, pte_t *pte)
+ {
+ 	int i;
+ 
++	if (!(cpa->flags & CPA_FREE_PAGETABLES))
++		return false;
++
+ 	for (i = 0; i < PTRS_PER_PTE; i++)
+ 		if (!pte_none(pte[i]))
+ 			return false;
+@@ -684,10 +688,13 @@ static bool try_to_free_pte_page(pte_t *
+ 	return true;
+ }
+ 
+-static bool try_to_free_pmd_page(pmd_t *pmd)
++static bool try_to_free_pmd_page(struct cpa_data *cpa, pmd_t *pmd)
+ {
+ 	int i;
+ 
++	if (!(cpa->flags & CPA_FREE_PAGETABLES))
++		return false;
++
+ 	for (i = 0; i < PTRS_PER_PMD; i++)
+ 		if (!pmd_none(pmd[i]))
+ 			return false;
+@@ -708,7 +715,9 @@ static bool try_to_free_pud_page(pud_t *
+ 	return true;
+ }
+ 
+-static bool unmap_pte_range(pmd_t *pmd, unsigned long start, unsigned long end)
++static bool unmap_pte_range(struct cpa_data *cpa, pmd_t *pmd,
++			    unsigned long start,
++			    unsigned long end)
+ {
+ 	pte_t *pte = pte_offset_kernel(pmd, start);
+ 
+@@ -719,22 +728,23 @@ static bool unmap_pte_range(pmd_t *pmd,
+ 		pte++;
+ 	}
+ 
+-	if (try_to_free_pte_page((pte_t *)pmd_page_vaddr(*pmd))) {
++	if (try_to_free_pte_page(cpa, (pte_t *)pmd_page_vaddr(*pmd))) {
+ 		pmd_clear(pmd);
+ 		return true;
+ 	}
+ 	return false;
+ }
+ 
+-static void __unmap_pmd_range(pud_t *pud, pmd_t *pmd,
++static void __unmap_pmd_range(struct cpa_data *cpa, pud_t *pud, pmd_t *pmd,
+ 			      unsigned long start, unsigned long end)
+ {
+-	if (unmap_pte_range(pmd, start, end))
+-		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++	if (unmap_pte_range(cpa, pmd, start, end))
++		if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ 			pud_clear(pud);
+ }
+ 
+-static void unmap_pmd_range(pud_t *pud, unsigned long start, unsigned long end)
++static void unmap_pmd_range(struct cpa_data *cpa, pud_t *pud,
++			    unsigned long start, unsigned long end)
+ {
+ 	pmd_t *pmd = pmd_offset(pud, start);
+ 
+@@ -745,7 +755,7 @@ static void unmap_pmd_range(pud_t *pud,
+ 		unsigned long next_page = (start + PMD_SIZE) & PMD_MASK;
+ 		unsigned long pre_end = min_t(unsigned long, end, next_page);
+ 
+-		__unmap_pmd_range(pud, pmd, start, pre_end);
++		__unmap_pmd_range(cpa, pud, pmd, start, pre_end);
+ 
+ 		start = pre_end;
+ 		pmd++;
+@@ -758,7 +768,8 @@ static void unmap_pmd_range(pud_t *pud,
+ 		if (pmd_large(*pmd))
+ 			pmd_clear(pmd);
+ 		else
+-			__unmap_pmd_range(pud, pmd, start, start + PMD_SIZE);
++			__unmap_pmd_range(cpa, pud, pmd,
++					  start, start + PMD_SIZE);
+ 
+ 		start += PMD_SIZE;
+ 		pmd++;
+@@ -768,17 +779,19 @@ static void unmap_pmd_range(pud_t *pud,
+ 	 * 4K leftovers?
+ 	 */
+ 	if (start < end)
+-		return __unmap_pmd_range(pud, pmd, start, end);
++		return __unmap_pmd_range(cpa, pud, pmd, start, end);
+ 
+ 	/*
+ 	 * Try again to free the PMD page if haven't succeeded above.
+ 	 */
+ 	if (!pud_none(*pud))
+-		if (try_to_free_pmd_page((pmd_t *)pud_page_vaddr(*pud)))
++		if (try_to_free_pmd_page(cpa, (pmd_t *)pud_page_vaddr(*pud)))
+ 			pud_clear(pud);
+ }
+ 
+-static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++static void __unmap_pud_range(struct cpa_data *cpa, pgd_t *pgd,
++			      unsigned long start,
++			      unsigned long end)
+ {
+ 	pud_t *pud = pud_offset(pgd, start);
+ 
+@@ -789,7 +802,7 @@ static void unmap_pud_range(pgd_t *pgd,
+ 		unsigned long next_page = (start + PUD_SIZE) & PUD_MASK;
+ 		unsigned long pre_end	= min_t(unsigned long, end, next_page);
+ 
+-		unmap_pmd_range(pud, start, pre_end);
++		unmap_pmd_range(cpa, pud, start, pre_end);
+ 
+ 		start = pre_end;
+ 		pud++;
+@@ -803,7 +816,7 @@ static void unmap_pud_range(pgd_t *pgd,
+ 		if (pud_large(*pud))
+ 			pud_clear(pud);
+ 		else
+-			unmap_pmd_range(pud, start, start + PUD_SIZE);
++			unmap_pmd_range(cpa, pud, start, start + PUD_SIZE);
+ 
+ 		start += PUD_SIZE;
+ 		pud++;
+@@ -813,7 +826,7 @@ static void unmap_pud_range(pgd_t *pgd,
+ 	 * 2M leftovers?
+ 	 */
+ 	if (start < end)
+-		unmap_pmd_range(pud, start, end);
++		unmap_pmd_range(cpa, pud, start, end);
+ 
+ 	/*
+ 	 * No need to try to free the PUD page because we'll free it in
+@@ -821,6 +834,24 @@ static void unmap_pud_range(pgd_t *pgd,
+ 	 */
+ }
+ 
++static void unmap_pud_range(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++	struct cpa_data cpa = {
++		.flags = CPA_FREE_PAGETABLES,
++	};
++
++	__unmap_pud_range(&cpa, pgd, start, end);
++}
++
++void unmap_pud_range_nofree(pgd_t *pgd, unsigned long start, unsigned long end)
++{
++	struct cpa_data cpa = {
++		.flags = 0,
++	};
++
++	__unmap_pud_range(&cpa, pgd, start, end);
++}
++
+ static void unmap_pgd_range(pgd_t *root, unsigned long addr, unsigned long end)
+ {
+ 	pgd_t *pgd_entry = root + pgd_index(addr);
+--- a/arch/x86/mm/pgtable.c
++++ b/arch/x86/mm/pgtable.c
+@@ -5,7 +5,7 @@
+ #include <asm/tlb.h>
+ #include <asm/fixmap.h>
+ 
+-#define PGALLOC_GFP GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO
++#define PGALLOC_GFP (GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO)
+ 
+ #ifdef CONFIG_HIGHPTE
+ #define PGALLOC_USER_GFP __GFP_HIGHMEM
+@@ -271,12 +271,35 @@ static void pgd_prepopulate_pmd(struct m
+ 	}
+ }
+ 
++#ifdef CONFIG_KAISER
++/*
++ * Instead of one pmd, we aquire two pmds.  Being order-1, it is
++ * both 8k in size and 8k-aligned.  That lets us just flip bit 12
++ * in a pointer to swap between the two 4k halves.
++ */
++#define PGD_ALLOCATION_ORDER 1
++#else
++#define PGD_ALLOCATION_ORDER 0
++#endif
++
++static inline pgd_t *_pgd_alloc(void)
++{
++	/* No __GFP_REPEAT: to avoid page allocation stalls in order-1 case */
++	return (pgd_t *)__get_free_pages(PGALLOC_GFP & ~__GFP_REPEAT,
++					 PGD_ALLOCATION_ORDER);
++}
++
++static inline void _pgd_free(pgd_t *pgd)
++{
++	free_pages((unsigned long)pgd, PGD_ALLOCATION_ORDER);
++}
++
+ pgd_t *pgd_alloc(struct mm_struct *mm)
+ {
+ 	pgd_t *pgd;
+ 	pmd_t *pmds[PREALLOCATED_PMDS];
+ 
+-	pgd = (pgd_t *)__get_free_page(PGALLOC_GFP);
++	pgd = _pgd_alloc();
+ 
+ 	if (pgd == NULL)
+ 		goto out;
+@@ -306,7 +329,7 @@ pgd_t *pgd_alloc(struct mm_struct *mm)
+ out_free_pmds:
+ 	free_pmds(pmds);
+ out_free_pgd:
+-	free_page((unsigned long)pgd);
++	_pgd_free(pgd);
+ out:
+ 	return NULL;
+ }
+@@ -316,7 +339,7 @@ void pgd_free(struct mm_struct *mm, pgd_
+ 	pgd_mop_up_pmds(mm, pgd);
+ 	pgd_dtor(pgd);
+ 	paravirt_pgd_free(mm, pgd);
+-	free_page((unsigned long)pgd);
++	_pgd_free(pgd);
+ }
+ 
+ /*
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -6,13 +6,14 @@
+ #include <linux/interrupt.h>
+ #include <linux/module.h>
+ #include <linux/cpu.h>
++#include <linux/debugfs.h>
+ 
+ #include <asm/tlbflush.h>
+ #include <asm/mmu_context.h>
+ #include <asm/cache.h>
+ #include <asm/apic.h>
+ #include <asm/uv/uv.h>
+-#include <linux/debugfs.h>
++#include <asm/kaiser.h>
+ 
+ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate)
+ 			= { &init_mm, 0, };
+@@ -37,6 +38,38 @@ struct flush_tlb_info {
+ 	unsigned long flush_end;
+ };
+ 
++static void load_new_mm_cr3(pgd_t *pgdir)
++{
++	unsigned long new_mm_cr3 = __pa(pgdir);
++
++#ifdef CONFIG_KAISER
++	if (this_cpu_has(X86_FEATURE_PCID)) {
++		/*
++		 * We reuse the same PCID for different tasks, so we must
++		 * flush all the entries for the PCID out when we change tasks.
++		 * Flush KERN below, flush USER when returning to userspace in
++		 * kaiser's SWITCH_USER_CR3 (_SWITCH_TO_USER_CR3) macro.
++		 *
++		 * invpcid_flush_single_context(X86_CR3_PCID_ASID_USER) could
++		 * do it here, but can only be used if X86_FEATURE_INVPCID is
++		 * available - and many machines support pcid without invpcid.
++		 *
++		 * The line below is a no-op: X86_CR3_PCID_KERN_FLUSH is now 0;
++		 * but keep that line in there in case something changes.
++		 */
++		new_mm_cr3 |= X86_CR3_PCID_KERN_FLUSH;
++		kaiser_flush_tlb_on_return_to_user();
++	}
++#endif /* CONFIG_KAISER */
++
++	/*
++	 * Caution: many callers of this function expect
++	 * that load_cr3() is serializing and orders TLB
++	 * fills with respect to the mm_cpumask writes.
++	 */
++	write_cr3(new_mm_cr3);
++}
++
+ /*
+  * We cannot call mmdrop() because we are in interrupt context,
+  * instead update mm->cpu_vm_mask.
+@@ -48,7 +81,7 @@ void leave_mm(int cpu)
+ 		BUG();
+ 	if (cpumask_test_cpu(cpu, mm_cpumask(active_mm))) {
+ 		cpumask_clear_cpu(cpu, mm_cpumask(active_mm));
+-		load_cr3(swapper_pg_dir);
++		load_new_mm_cr3(swapper_pg_dir);
+ 	}
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+@@ -101,7 +134,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 		 * ordering guarantee we need.
+ 		 *
+ 		 */
+-		load_cr3(next->pgd);
++		load_new_mm_cr3(next->pgd);
+ 
+ 		/* Stop flush ipis for the previous mm */
+ 		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+@@ -130,7 +163,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 			 * As above, load_cr3() is serializing and orders TLB
+ 			 * fills with respect to the mm_cpumask write.
+ 			 */
+-			load_cr3(next->pgd);
++			load_new_mm_cr3(next->pgd);
+ 			load_mm_ldt(next);
+ 		}
+ 	}
+--- a/include/asm-generic/vmlinux.lds.h
++++ b/include/asm-generic/vmlinux.lds.h
+@@ -689,7 +689,14 @@
+  */
+ #define PERCPU_INPUT(cacheline)						\
+ 	VMLINUX_SYMBOL(__per_cpu_start) = .;				\
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_start) = .;		\
+ 	*(.data..percpu..first)						\
++	. = ALIGN(cacheline);						\
++	*(.data..percpu..user_mapped)					\
++	*(.data..percpu..user_mapped..shared_aligned)			\
++	. = ALIGN(PAGE_SIZE);						\
++	*(.data..percpu..user_mapped..page_aligned)			\
++	VMLINUX_SYMBOL(__per_cpu_user_mapped_end) = .;			\
+ 	. = ALIGN(PAGE_SIZE);						\
+ 	*(.data..percpu..page_aligned)					\
+ 	. = ALIGN(cacheline);						\
+--- /dev/null
++++ b/include/linux/kaiser.h
+@@ -0,0 +1,52 @@
++#ifndef _LINUX_KAISER_H
++#define _LINUX_KAISER_H
++
++#ifdef CONFIG_KAISER
++#include <asm/kaiser.h>
++
++static inline int kaiser_map_thread_stack(void *stack)
++{
++	/*
++	 * Map that page of kernel stack on which we enter from user context.
++	 */
++	return kaiser_add_mapping((unsigned long)stack +
++			THREAD_SIZE - PAGE_SIZE, PAGE_SIZE, __PAGE_KERNEL);
++}
++
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++	/*
++	 * Note: may be called even when kaiser_map_thread_stack() failed.
++	 */
++	kaiser_remove_mapping((unsigned long)stack +
++			THREAD_SIZE - PAGE_SIZE, PAGE_SIZE);
++}
++#else
++
++/*
++ * These stubs are used whenever CONFIG_KAISER is off, which
++ * includes architectures that support KAISER, but have it disabled.
++ */
++
++static inline void kaiser_init(void)
++{
++}
++static inline int kaiser_add_mapping(unsigned long addr,
++				     unsigned long size, unsigned long flags)
++{
++	return 0;
++}
++static inline void kaiser_remove_mapping(unsigned long start,
++					 unsigned long size)
++{
++}
++static inline int kaiser_map_thread_stack(void *stack)
++{
++	return 0;
++}
++static inline void kaiser_unmap_thread_stack(void *stack)
++{
++}
++
++#endif /* !CONFIG_KAISER */
++#endif /* _LINUX_KAISER_H */
+--- a/include/linux/mmzone.h
++++ b/include/linux/mmzone.h
+@@ -131,8 +131,9 @@ enum zone_stat_item {
+ 	NR_SLAB_RECLAIMABLE,
+ 	NR_SLAB_UNRECLAIMABLE,
+ 	NR_PAGETABLE,		/* used for pagetables */
+-	NR_KERNEL_STACK,
+ 	/* Second 128 byte cacheline */
++	NR_KERNEL_STACK,
++	NR_KAISERTABLE,
+ 	NR_UNSTABLE_NFS,	/* NFS unstable pages */
+ 	NR_BOUNCE,
+ 	NR_VMSCAN_WRITE,
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -1,6 +1,12 @@
+ #ifndef _LINUX_PERCPU_DEFS_H
+ #define _LINUX_PERCPU_DEFS_H
+ 
++#ifdef CONFIG_KAISER
++#define USER_MAPPED_SECTION "..user_mapped"
++#else
++#define USER_MAPPED_SECTION ""
++#endif
++
+ /*
+  * Base implementations of per-CPU variable declarations and definitions, where
+  * the section in which the variable is to be placed is provided by the
+@@ -94,6 +100,12 @@
+ #define DEFINE_PER_CPU(type, name)					\
+ 	DEFINE_PER_CPU_SECTION(type, name, "")
+ 
++#define DECLARE_PER_CPU_USER_MAPPED(type, name)				\
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
++#define DEFINE_PER_CPU_USER_MAPPED(type, name)				\
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION)
++
+ /*
+  * Declaration/definition used for per-CPU variables that must come first in
+  * the set of variables.
+@@ -123,6 +135,14 @@
+ 	DEFINE_PER_CPU_SECTION(type, name, PER_CPU_SHARED_ALIGNED_SECTION) \
+ 	____cacheline_aligned_in_smp
+ 
++#define DECLARE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)		\
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++	____cacheline_aligned_in_smp
++
++#define DEFINE_PER_CPU_SHARED_ALIGNED_USER_MAPPED(type, name)		\
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION PER_CPU_SHARED_ALIGNED_SECTION) \
++	____cacheline_aligned_in_smp
++
+ #define DECLARE_PER_CPU_ALIGNED(type, name)				\
+ 	DECLARE_PER_CPU_SECTION(type, name, PER_CPU_ALIGNED_SECTION)	\
+ 	____cacheline_aligned
+@@ -141,11 +161,21 @@
+ #define DEFINE_PER_CPU_PAGE_ALIGNED(type, name)				\
+ 	DEFINE_PER_CPU_SECTION(type, name, "..page_aligned")		\
+ 	__aligned(PAGE_SIZE)
++/*
++ * Declaration/definition used for per-CPU variables that must be page aligned and need to be mapped in user mode.
++ */
++#define DECLARE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)		\
++	DECLARE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++	__aligned(PAGE_SIZE)
++
++#define DEFINE_PER_CPU_PAGE_ALIGNED_USER_MAPPED(type, name)		\
++	DEFINE_PER_CPU_SECTION(type, name, USER_MAPPED_SECTION"..page_aligned") \
++	__aligned(PAGE_SIZE)
+ 
+ /*
+  * Declaration/definition used for per-CPU variables that must be read mostly.
+  */
+-#define DECLARE_PER_CPU_READ_MOSTLY(type, name)			\
++#define DECLARE_PER_CPU_READ_MOSTLY(type, name)				\
+ 	DECLARE_PER_CPU_SECTION(type, name, "..read_mostly")
+ 
+ #define DEFINE_PER_CPU_READ_MOSTLY(type, name)				\
+--- a/init/main.c
++++ b/init/main.c
+@@ -78,6 +78,7 @@
+ #include <linux/context_tracking.h>
+ #include <linux/random.h>
+ #include <linux/list.h>
++#include <linux/kaiser.h>
+ 
+ #include <asm/io.h>
+ #include <asm/bugs.h>
+@@ -497,6 +498,7 @@ static void __init mm_init(void)
+ 	percpu_init_late();
+ 	pgtable_init();
+ 	vmalloc_init();
++	kaiser_init();
+ }
+ 
+ asmlinkage __visible void __init start_kernel(void)
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -58,6 +58,7 @@
+ #include <linux/tsacct_kern.h>
+ #include <linux/cn_proc.h>
+ #include <linux/freezer.h>
++#include <linux/kaiser.h>
+ #include <linux/delayacct.h>
+ #include <linux/taskstats_kern.h>
+ #include <linux/random.h>
+@@ -158,6 +159,7 @@ static struct thread_info *alloc_thread_
+ 
+ static inline void free_thread_info(struct thread_info *ti)
+ {
++	kaiser_unmap_thread_stack(ti);
+ 	free_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
+ }
+ # else
+@@ -316,6 +318,10 @@ static struct task_struct *dup_task_stru
+ 
+ 	tsk->stack = ti;
+ 
++	err = kaiser_map_thread_stack(tsk->stack);
++	if (err)
++		goto free_ti;
++
+ 	setup_thread_stack(tsk, orig);
+ 	clear_user_return_notifier(tsk);
+ 	clear_tsk_need_resched(tsk);
+--- a/mm/vmstat.c
++++ b/mm/vmstat.c
+@@ -753,6 +753,7 @@ const char * const vmstat_text[] = {
+ 	"nr_slab_unreclaimable",
+ 	"nr_page_table_pages",
+ 	"nr_kernel_stack",
++	"nr_overhead",
+ 	"nr_unstable",
+ 	"nr_bounce",
+ 	"nr_vmscan_write",
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,6 +30,16 @@ config SECURITY
+ 
+ 	  If you are unsure how to answer this question, answer N.
+ 
++config KAISER
++	bool "Remove the kernel mapping in user mode"
++	default y
++	depends on X86_64 && SMP && !PARAVIRT
++	help
++	  This enforces a strict kernel and user space isolation, in order
++	  to close hardware side channels on kernel address information.
++
++	  If you are unsure how to answer this question, answer Y.
++
+ config SECURITYFS
+ 	bool "Enable the securityfs filesystem"
+ 	help
diff --git a/debian/patches/bugfix/all/kpti/kaiser-set-_page_user-of-the-vsyscall-page.patch b/debian/patches/bugfix/all/kpti/kaiser-set-_page_user-of-the-vsyscall-page.patch
new file mode 100644
index 0000000..34b0512
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-set-_page_user-of-the-vsyscall-page.patch
@@ -0,0 +1,145 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Thu, 4 Jan 2018 17:42:45 +0100
+Subject: kaiser: Set _PAGE_USER of the vsyscall page
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Signed-off-by: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16:
+ - Drop the case for disabled CONFIG_X86_VSYSCALL_EMULATION
+ - Adjust filename, context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/vsyscall.h |  2 ++
+ arch/x86/kernel/vsyscall_64.c   | 12 +++++++++---
+ arch/x86/mm/kaiser.c            | 19 +++++++++++++++----
+ 3 files changed, 26 insertions(+), 7 deletions(-)
+
+--- a/arch/x86/include/asm/vsyscall.h
++++ b/arch/x86/include/asm/vsyscall.h
+@@ -10,6 +10,7 @@
+ /* kernel space (writeable) */
+ extern int vgetcpu_mode;
+ extern struct timezone sys_tz;
++extern unsigned long vsyscall_pgprot;
+ 
+ #include <asm/vvar.h>
+ 
+@@ -20,6 +21,7 @@ extern void map_vsyscall(void);
+  * Returns true if handled.
+  */
+ extern bool emulate_vsyscall(struct pt_regs *regs, unsigned long address);
++extern bool vsyscall_enabled(void);
+ 
+ #ifdef CONFIG_X86_64
+ 
+--- a/arch/x86/kernel/vsyscall_64.c
++++ b/arch/x86/kernel/vsyscall_64.c
+@@ -55,6 +55,7 @@
+ DEFINE_VVAR(int, vgetcpu_mode);
+ 
+ static enum { EMULATE, NATIVE, NONE } vsyscall_mode = EMULATE;
++unsigned long vsyscall_pgprot = __PAGE_KERNEL_VSYSCALL;
+ 
+ static int __init vsyscall_setup(char *str)
+ {
+@@ -75,6 +76,11 @@ static int __init vsyscall_setup(char *s
+ }
+ early_param("vsyscall", vsyscall_setup);
+ 
++bool vsyscall_enabled(void)
++{
++	return vsyscall_mode != NONE;
++}
++
+ static void warn_bad_vsyscall(const char *level, struct pt_regs *regs,
+ 			      const char *message)
+ {
+@@ -331,10 +337,10 @@ void __init map_vsyscall(void)
+ 	extern char __vsyscall_page;
+ 	unsigned long physaddr_vsyscall = __pa_symbol(&__vsyscall_page);
+ 
++	if (vsyscall_mode != NATIVE)
++		vsyscall_pgprot = __PAGE_KERNEL_VVAR;
+ 	__set_fixmap(VSYSCALL_PAGE, physaddr_vsyscall,
+-		     vsyscall_mode == NATIVE
+-		     ? PAGE_KERNEL_VSYSCALL
+-		     : PAGE_KERNEL_VVAR);
++		     __pgprot(vsyscall_pgprot));
+ 	BUILD_BUG_ON((unsigned long)__fix_to_virt(VSYSCALL_PAGE) !=
+ 		     (unsigned long)VSYSCALL_ADDR);
+ }
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -18,6 +18,7 @@ extern struct mm_struct init_mm;
+ #include <asm/pgtable.h>
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
++#include <asm/vsyscall.h>
+ 
+ #ifdef CONFIG_KAISER
+ __visible
+@@ -108,12 +109,13 @@ static inline unsigned long get_pa_from_
+  *
+  * Returns a pointer to a PTE on success, or NULL on failure.
+  */
+-static pte_t *kaiser_pagetable_walk(unsigned long address)
++static pte_t *kaiser_pagetable_walk(unsigned long address, bool user)
+ {
+ 	pmd_t *pmd;
+ 	pud_t *pud;
+ 	pgd_t *pgd = native_get_shadow_pgd(pgd_offset_k(address));
+ 	gfp_t gfp = (GFP_KERNEL | __GFP_NOTRACK | __GFP_ZERO);
++	unsigned long prot = _KERNPG_TABLE;
+ 
+ 	if (pgd_none(*pgd)) {
+ 		WARN_ONCE(1, "All shadow pgds should have been populated");
+@@ -121,6 +123,11 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 	}
+ 	BUILD_BUG_ON(pgd_large(*pgd) != 0);
+ 
++	if (user) {
++		set_pgd(pgd, __pgd(pgd_val(*pgd) | _PAGE_USER));
++		prot = _PAGE_TABLE;
++	}
++
+ 	pud = pud_offset(pgd, address);
+ 	/* The shadow page tables do not use large mappings: */
+ 	if (pud_large(*pud)) {
+@@ -133,7 +140,7 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+ 		if (pud_none(*pud)) {
+-			set_pud(pud, __pud(_KERNPG_TABLE | __pa(new_pmd_page)));
++			set_pud(pud, __pud(prot | __pa(new_pmd_page)));
+ 			__inc_zone_page_state(virt_to_page((void *)
+ 						new_pmd_page), NR_KAISERTABLE);
+ 		} else
+@@ -153,7 +160,7 @@ static pte_t *kaiser_pagetable_walk(unsi
+ 			return NULL;
+ 		spin_lock(&shadow_table_allocation_lock);
+ 		if (pmd_none(*pmd)) {
+-			set_pmd(pmd, __pmd(_KERNPG_TABLE | __pa(new_pte_page)));
++			set_pmd(pmd, __pmd(prot | __pa(new_pte_page)));
+ 			__inc_zone_page_state(virt_to_page((void *)
+ 						new_pte_page), NR_KAISERTABLE);
+ 		} else
+@@ -180,7 +187,7 @@ int kaiser_add_user_map(const void *__st
+ 			ret = -EIO;
+ 			break;
+ 		}
+-		pte = kaiser_pagetable_walk(address);
++		pte = kaiser_pagetable_walk(address, flags & _PAGE_USER);
+ 		if (!pte) {
+ 			ret = -ENOMEM;
+ 			break;
+@@ -303,6 +310,10 @@ void __init kaiser_init(void)
+ 	kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
+ 				  sizeof(x86_cr3_pcid_noflush),
+ 				  __PAGE_KERNEL);
++
++	if (vsyscall_enabled())
++		kaiser_add_user_map_early((void *)VSYSCALL_ADDR, PAGE_SIZE,
++					  vsyscall_pgprot);
+ }
+ 
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
diff --git a/debian/patches/bugfix/all/kpti/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch b/debian/patches/bugfix/all/kpti/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
new file mode 100644
index 0000000..20c9d4f
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
@@ -0,0 +1,127 @@
+From: Hugh Dickins <hughd at google.com>
+Date: Tue, 3 Oct 2017 20:49:04 -0700
+Subject: kaiser: use ALTERNATIVE instead of x86_cr3_pcid_noflush
+
+Now that we're playing the ALTERNATIVE game, use that more efficient
+method: instead of user-mapping an extra page, and reading an extra
+cacheline each time for x86_cr3_pcid_noflush.
+
+Neel has found that __stringify(bts $X86_CR3_PCID_NOFLUSH_BIT, %rax)
+is a working substitute for the "bts $63, %rax" in these ALTERNATIVEs;
+but the one line with $63 in looks clearer, so let's stick with that.
+
+Worried about what happens with an ALTERNATIVE between the jump and
+jump label in another ALTERNATIVE?  I was, but have checked the
+combinations in SWITCH_KERNEL_CR3_NO_STACK at entry_SYSCALL_64,
+and it does a good job.
+
+(cherry picked from Change-Id: I46d06167615aa8d628eed9972125ab2faca93f05)
+
+Signed-off-by: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/kaiser.h |  6 +++---
+ arch/x86/kernel/entry_64.S    |  7 ++++---
+ arch/x86/mm/kaiser.c          | 11 +----------
+ 3 files changed, 8 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -25,7 +25,8 @@
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+ andq $(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), \reg
+-orq  x86_cr3_pcid_noflush, \reg
++/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++ALTERNATIVE "", "bts $63, \reg", X86_FEATURE_PCID
+ movq \reg, %cr3
+ .endm
+ 
+@@ -39,7 +40,7 @@ movq \reg, %cr3
+ movq %cr3, \reg
+ orq  PER_CPU_VAR(x86_cr3_pcid_user), \reg
+ js   9f
+-/* FLUSH this time, reset to NOFLUSH for next time (if PCID enabled) */
++/* If PCID enabled, FLUSH this time, reset to NOFLUSH for next time */
+ movb \regb, PER_CPU_VAR(x86_cr3_pcid_user+7)
+ 9:
+ movq \reg, %cr3
+@@ -90,7 +91,6 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ */
+ DECLARE_PER_CPU_USER_MAPPED(unsigned long, unsafe_stack_register_backup);
+ 
+-extern unsigned long x86_cr3_pcid_noflush;
+ DECLARE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+ 
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -331,7 +331,8 @@ ENTRY(save_paranoid)
+ 	jz	2f
+ 	orl	$2, %ebx
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+-	orq	x86_cr3_pcid_noflush, %rax
++	/* If PCID enabled, set X86_CR3_PCID_NOFLUSH_BIT */
++	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ 	movq	%rax, %cr3
+ 2:
+ #endif
+@@ -1603,7 +1604,7 @@ ENTRY(nmi)
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+-	orq	x86_cr3_pcid_noflush, %rax
++	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+@@ -1873,7 +1874,7 @@ end_repeat_nmi:
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+ 	/* If PCID enabled, NOFLUSH now and NOFLUSH on return */
+-	orq	x86_cr3_pcid_noflush, %rax
++	ALTERNATIVE "", "bts $63, %rax", X86_FEATURE_PCID
+ 	pushq	%rax
+ 	/* mask off "user" bit of pgd address and 12 PCID bits: */
+ 	andq	$(~(X86_CR3_PCID_ASID_MASK | KAISER_SHADOW_PGD_OFFSET)), %rax
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -35,7 +35,6 @@ DEFINE_PER_CPU_USER_MAPPED(unsigned long
+  * This is also handy because systems that do not support PCIDs
+  * just end up or'ing a 0 into their CR3, which does no harm.
+  */
+-unsigned long x86_cr3_pcid_noflush __read_mostly;
+ DEFINE_PER_CPU(unsigned long, x86_cr3_pcid_user);
+ 
+ /*
+@@ -361,10 +360,6 @@ void __init kaiser_init(void)
+ 				  sizeof(gate_desc) * NR_VECTORS,
+ 				  __PAGE_KERNEL);
+ 
+-	kaiser_add_user_map_early(&x86_cr3_pcid_noflush,
+-				  sizeof(x86_cr3_pcid_noflush),
+-				  __PAGE_KERNEL);
+-
+ 	if (vsyscall_enabled())
+ 		kaiser_add_user_map_early((void *)VSYSCALL_ADDR, PAGE_SIZE,
+ 					  vsyscall_pgprot);
+@@ -441,18 +436,14 @@ pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp,
+ 
+ void kaiser_setup_pcid(void)
+ {
+-	unsigned long kern_cr3 = 0;
+ 	unsigned long user_cr3 = KAISER_SHADOW_PGD_OFFSET;
+ 
+-	if (this_cpu_has(X86_FEATURE_PCID)) {
+-		kern_cr3 |= X86_CR3_PCID_KERN_NOFLUSH;
++	if (this_cpu_has(X86_FEATURE_PCID))
+ 		user_cr3 |= X86_CR3_PCID_USER_NOFLUSH;
+-	}
+ 	/*
+ 	 * These variables are used by the entry/exit
+ 	 * code to change PCID and pgd and TLB flushing.
+ 	 */
+-	x86_cr3_pcid_noflush = kern_cr3;
+ 	this_cpu_write(x86_cr3_pcid_user, user_cr3);
+ }
+ 
diff --git a/debian/patches/bugfix/all/kpti/kpti-rename-to-page_table_isolation.patch b/debian/patches/bugfix/all/kpti/kpti-rename-to-page_table_isolation.patch
new file mode 100644
index 0000000..dcc1341
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kpti-rename-to-page_table_isolation.patch
@@ -0,0 +1,302 @@
+From: Kees Cook <keescook at chromium.org>
+Date: Thu, 4 Jan 2018 01:14:24 +0000
+Subject: KPTI: Rename to PAGE_TABLE_ISOLATION
+
+This renames CONFIG_KAISER to CONFIG_PAGE_TABLE_ISOLATION.
+
+Signed-off-by: Kees Cook <keescook at chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+[bwh: Backported to 3.16]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/boot/compressed/misc.h           |  2 +-
+ arch/x86/include/asm/cpufeature.h         |  2 +-
+ arch/x86/include/asm/kaiser.h             | 12 ++++++------
+ arch/x86/include/asm/pgtable.h            |  4 ++--
+ arch/x86/include/asm/pgtable_64.h         |  4 ++--
+ arch/x86/include/asm/pgtable_types.h      |  2 +-
+ arch/x86/include/asm/tlbflush.h           |  2 +-
+ arch/x86/kernel/cpu/perf_event_intel_ds.c |  4 ++--
+ arch/x86/kernel/entry_64.S                | 12 ++++++------
+ arch/x86/kernel/head_64.S                 |  2 +-
+ arch/x86/mm/Makefile                      |  2 +-
+ include/linux/kaiser.h                    |  6 +++---
+ include/linux/percpu-defs.h               |  2 +-
+ security/Kconfig                          |  2 +-
+ 14 files changed, 29 insertions(+), 29 deletions(-)
+
+--- a/arch/x86/boot/compressed/misc.h
++++ b/arch/x86/boot/compressed/misc.h
+@@ -9,7 +9,7 @@
+  */
+ #undef CONFIG_PARAVIRT
+ #undef CONFIG_PARAVIRT_SPINLOCKS
+-#undef CONFIG_KAISER
++#undef CONFIG_PAGE_TABLE_ISOLATION
+ 
+ #include <linux/linkage.h>
+ #include <linux/screen_info.h>
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -189,7 +189,7 @@
+ #define X86_FEATURE_INVPCID_SINGLE (7*32+10) /* Effectively INVPCID && CR4.PCIDE=1 */
+ 
+ /* Because the ALTERNATIVE scheme is for members of the X86_FEATURE club... */
+-#define X86_FEATURE_KAISER	(7*32+31) /* "" CONFIG_KAISER w/o nokaiser */
++#define X86_FEATURE_KAISER	(7*32+31) /* "" CONFIG_PAGE_TABLE_ISOLATION w/o nokaiser */
+ 
+ /* Virtualization flags: Linux defined, word 8 */
+ #define X86_FEATURE_TPR_SHADOW  (8*32+ 0) /* Intel TPR Shadow */
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -20,7 +20,7 @@
+ #define KAISER_SHADOW_PGD_OFFSET 0x1000
+ 
+ #ifdef __ASSEMBLY__
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 
+ .macro _SWITCH_TO_KERNEL_CR3 reg
+ movq %cr3, \reg
+@@ -69,7 +69,7 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ 8:
+ .endm
+ 
+-#else /* CONFIG_KAISER */
++#else /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ .macro SWITCH_KERNEL_CR3
+ .endm
+@@ -78,11 +78,11 @@ movq PER_CPU_VAR(unsafe_stack_register_b
+ .macro SWITCH_KERNEL_CR3_NO_STACK
+ .endm
+ 
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ #else /* __ASSEMBLY__ */
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+  * Upon kernel/user mode switch, it may happen that the address
+  * space has to be switched before the registers have been
+@@ -100,10 +100,10 @@ extern void __init kaiser_check_boottime
+ #else
+ #define kaiser_enabled	0
+ static inline void __init kaiser_check_boottime_disable(void) {}
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ /*
+- * Kaiser function prototypes are needed even when CONFIG_KAISER is not set,
++ * Kaiser function prototypes are needed even when CONFIG_PAGE_TABLE_ISOLATION is not set,
+  * so as to build with tests on kaiser_enabled instead of #ifdefs.
+  */
+ 
+--- a/arch/x86/include/asm/pgtable.h
++++ b/arch/x86/include/asm/pgtable.h
+@@ -17,7 +17,7 @@
+ #ifndef __ASSEMBLY__
+ #include <asm/x86_init.h>
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern int kaiser_enabled;
+ #else
+ #define kaiser_enabled 0
+@@ -860,7 +860,7 @@ static inline void pmdp_set_wrprotect(st
+ static inline void clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
+ {
+ 	memcpy(dst, src, count * sizeof(pgd_t));
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	if (kaiser_enabled) {
+ 		/* Clone the shadow pgd part as well */
+ 		memcpy(native_get_shadow_pgd(dst),
+--- a/arch/x86/include/asm/pgtable_64.h
++++ b/arch/x86/include/asm/pgtable_64.h
+@@ -106,7 +106,7 @@ static inline void native_pud_clear(pud_
+ 	native_set_pud(pud, native_make_pud(0));
+ }
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern pgd_t kaiser_set_shadow_pgd(pgd_t *pgdp, pgd_t pgd);
+ 
+ static inline pgd_t *native_get_shadow_pgd(pgd_t *pgdp)
+@@ -127,7 +127,7 @@ static inline pgd_t *native_get_shadow_p
+ 	BUILD_BUG_ON(1);
+ 	return NULL;
+ }
+-#endif /* CONFIG_KAISER */
++#endif /* CONFIG_PAGE_TABLE_ISOLATION */
+ 
+ static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd)
+ {
+--- a/arch/x86/include/asm/pgtable_types.h
++++ b/arch/x86/include/asm/pgtable_types.h
+@@ -136,7 +136,7 @@
+ #define X86_CR3_PCID_MASK       (X86_CR3_PCID_NOFLUSH | X86_CR3_PCID_ASID_MASK)
+ #define X86_CR3_PCID_ASID_KERN  (_AC(0x0,UL))
+ 
+-#if defined(CONFIG_KAISER) && defined(CONFIG_X86_64)
++#if defined(CONFIG_PAGE_TABLE_ISOLATION) && defined(CONFIG_X86_64)
+ /* Let X86_CR3_PCID_ASID_USER be usable for the X86_CR3_PCID_NOFLUSH bit */
+ #define X86_CR3_PCID_ASID_USER	(_AC(0x80,UL))
+ 
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -68,7 +68,7 @@ static inline void invpcid_flush_all_non
+  * Declare a couple of kaiser interfaces here for convenience,
+  * to avoid the need for asm/kaiser.h in unexpected places.
+  */
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ extern int kaiser_enabled;
+ extern void kaiser_setup_pcid(void);
+ extern void kaiser_flush_tlb_on_return_to_user(void);
+--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
++++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
+@@ -262,7 +262,7 @@ static DEFINE_PER_CPU(void *, insn_buffe
+ 
+ static void *dsalloc(size_t size, gfp_t flags, int node)
+ {
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	unsigned int order = get_order(size);
+ 	struct page *page;
+ 	unsigned long addr;
+@@ -283,7 +283,7 @@ static void *dsalloc(size_t size, gfp_t
+ 
+ static void dsfree(const void *buffer, size_t size)
+ {
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	if (!buffer)
+ 		return;
+ 	kaiser_remove_mapping((unsigned long)buffer, size);
+--- a/arch/x86/kernel/entry_64.S
++++ b/arch/x86/kernel/entry_64.S
+@@ -318,7 +318,7 @@ ENTRY(save_paranoid)
+ 	SWAPGS
+ 	xorl %ebx,%ebx
+ 1:
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/*
+ 	 * We might have come in between a swapgs and a SWITCH_KERNEL_CR3
+ 	 * on entry, or between a SWITCH_USER_CR3 and a swapgs on exit.
+@@ -1351,7 +1351,7 @@ ENTRY(paranoid_exit)
+ paranoid_kernel:
+ 	movq	%r12, %rbx		/* restore after paranoid_userspace */
+ 	TRACE_IRQS_IRETQ_DEBUG 0
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/* No ALTERNATIVE for X86_FEATURE_KAISER: paranoid_entry sets %ebx */
+ 	testl	$2, %ebx			/* SWITCH_USER_CR3 needed? */
+ 	jz	paranoid_exit_no_switch
+@@ -1599,7 +1599,7 @@ ENTRY(nmi)
+ 
+ 	movq	%rsp, %rdi
+ 	movq	$-1, %rsi
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+@@ -1613,7 +1613,7 @@ ENTRY(nmi)
+ #endif
+ 	call	do_nmi
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/*
+ 	 * Unconditionally restore CR3.  I know we return to
+ 	 * kernel code that needs user CR3, but do we ever return
+@@ -1869,7 +1869,7 @@ end_repeat_nmi:
+ 1:
+ 	movq	%rsp,%rdi
+ 	movq	$-1,%rsi
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/* Unconditionally use kernel CR3 for do_nmi() */
+ 	/* %rax is saved above, so OK to clobber here */
+ 	ALTERNATIVE "jmp 2f", "movq %cr3, %rax", X86_FEATURE_KAISER
+@@ -1886,7 +1886,7 @@ end_repeat_nmi:
+ 	/* paranoidentry do_nmi, 0; without TRACE_IRQS_OFF */
+ 	call	do_nmi
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ 	/*
+ 	 * Unconditionally restore CR3.  We might be returning to
+ 	 * kernel code that needs user CR3, like just just before
+--- a/arch/x86/kernel/head_64.S
++++ b/arch/x86/kernel/head_64.S
+@@ -441,7 +441,7 @@ early_idt_ripmsg:
+ 	.balign	PAGE_SIZE; \
+ GLOBAL(name)
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ /*
+  * Each PGD needs to be 8k long and 8k aligned.  We do not
+  * ever go out to userspace with these, so we do not
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -29,4 +29,4 @@ obj-$(CONFIG_ACPI_NUMA)		+= srat.o
+ obj-$(CONFIG_NUMA_EMU)		+= numa_emulation.o
+ 
+ obj-$(CONFIG_MEMTEST)		+= memtest.o
+-obj-$(CONFIG_KAISER)		+= kaiser.o
++obj-$(CONFIG_PAGE_TABLE_ISOLATION)		+= kaiser.o
+--- a/include/linux/kaiser.h
++++ b/include/linux/kaiser.h
+@@ -1,7 +1,7 @@
+ #ifndef _LINUX_KAISER_H
+ #define _LINUX_KAISER_H
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ #include <asm/kaiser.h>
+ 
+ static inline int kaiser_map_thread_stack(void *stack)
+@@ -24,7 +24,7 @@ static inline void kaiser_unmap_thread_s
+ #else
+ 
+ /*
+- * These stubs are used whenever CONFIG_KAISER is off, which
++ * These stubs are used whenever CONFIG_PAGE_TABLE_ISOLATION is off, which
+  * includes architectures that support KAISER, but have it disabled.
+  */
+ 
+@@ -48,5 +48,5 @@ static inline void kaiser_unmap_thread_s
+ {
+ }
+ 
+-#endif /* !CONFIG_KAISER */
++#endif /* !CONFIG_PAGE_TABLE_ISOLATION */
+ #endif /* _LINUX_KAISER_H */
+--- a/include/linux/percpu-defs.h
++++ b/include/linux/percpu-defs.h
+@@ -1,7 +1,7 @@
+ #ifndef _LINUX_PERCPU_DEFS_H
+ #define _LINUX_PERCPU_DEFS_H
+ 
+-#ifdef CONFIG_KAISER
++#ifdef CONFIG_PAGE_TABLE_ISOLATION
+ #define USER_MAPPED_SECTION "..user_mapped"
+ #else
+ #define USER_MAPPED_SECTION ""
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -30,7 +30,7 @@ config SECURITY
+ 
+ 	  If you are unsure how to answer this question, answer N.
+ 
+-config KAISER
++config PAGE_TABLE_ISOLATION
+ 	bool "Remove the kernel mapping in user mode"
+ 	default y
+ 	depends on X86_64 && SMP
diff --git a/debian/patches/bugfix/all/kpti/kpti-report-when-enabled.patch b/debian/patches/bugfix/all/kpti/kpti-report-when-enabled.patch
new file mode 100644
index 0000000..11d6d34
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/kpti-report-when-enabled.patch
@@ -0,0 +1,44 @@
+From: Kees Cook <keescook at chromium.org>
+Date: Wed, 3 Jan 2018 10:18:01 -0800
+Subject: KPTI: Report when enabled
+
+Make sure dmesg reports when KPTI is enabled.
+
+Signed-off-by: Kees Cook <keescook at chromium.org>
+Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/kaiser.c | 7 ++++++-
+ 1 file changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -12,6 +12,9 @@
+ #include <linux/ftrace.h>
+ #include <xen/xen.h>
+ 
++#undef pr_fmt
++#define pr_fmt(fmt)     "Kernel/User page tables isolation: " fmt
++
+ extern struct mm_struct init_mm;
+ 
+ #include <asm/kaiser.h>
+@@ -303,7 +306,7 @@ enable:
+ 	return;
+ 
+ disable:
+-	pr_info("Kernel/User page tables isolation: disabled\n");
++	pr_info("disabled\n");
+ 
+ silent_disable:
+ 	kaiser_enabled = 0;
+@@ -367,6 +370,8 @@ void __init kaiser_init(void)
+ 	if (vsyscall_enabled())
+ 		kaiser_add_user_map_early((void *)VSYSCALL_ADDR, PAGE_SIZE,
+ 					  vsyscall_pgprot);
++
++	pr_info("enabled\n");
+ }
+ 
+ /* Add a mapping to the shadow mapping, and synchronize the mappings */
diff --git a/debian/patches/bugfix/all/kpti/mm-mmu_context-sched-core-fix-mmu_context.h-assumption.patch b/debian/patches/bugfix/all/kpti/mm-mmu_context-sched-core-fix-mmu_context.h-assumption.patch
new file mode 100644
index 0000000..7be8dec
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/mm-mmu_context-sched-core-fix-mmu_context.h-assumption.patch
@@ -0,0 +1,37 @@
+From: Ingo Molnar <mingo at kernel.org>
+Date: Thu, 28 Apr 2016 11:39:12 +0200
+Subject: mm/mmu_context, sched/core: Fix mmu_context.h  assumption
+
+commit 8efd755ac2fe262d4c8d5c9bbe054bb67dae93da upstream.
+
+Some architectures (such as Alpha) rely on include/linux/sched.h definitions
+in their mmu_context.h files.
+
+So include sched.h before mmu_context.h.
+
+Cc: Andy Lutomirski <luto at kernel.org>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: linux-kernel at vger.kernel.org
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ mm/mmu_context.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/mm/mmu_context.c
++++ b/mm/mmu_context.c
+@@ -4,9 +4,9 @@
+  */
+ 
+ #include <linux/mm.h>
++#include <linux/sched.h>
+ #include <linux/mmu_context.h>
+ #include <linux/export.h>
+-#include <linux/sched.h>
+ 
+ #include <asm/mmu_context.h>
+ 
diff --git a/debian/patches/bugfix/all/kpti/sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch b/debian/patches/bugfix/all/kpti/sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch
new file mode 100644
index 0000000..d4cc8ff
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch
@@ -0,0 +1,73 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Tue, 26 Apr 2016 09:39:06 -0700
+Subject: sched/core: Add switch_mm_irqs_off() and use it in the  scheduler
+
+commit f98db6013c557c216da5038d9c52045be55cd039 upstream.
+
+By default, this is the same thing as switch_mm().
+
+x86 will override it as an optimization.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/df401df47bdd6be3e389c6f1e3f5310d70e81b2c.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ include/linux/mmu_context.h | 7 +++++++
+ kernel/sched/core.c         | 6 +++---
+ 2 files changed, 10 insertions(+), 3 deletions(-)
+
+--- a/include/linux/mmu_context.h
++++ b/include/linux/mmu_context.h
+@@ -1,9 +1,16 @@
+ #ifndef _LINUX_MMU_CONTEXT_H
+ #define _LINUX_MMU_CONTEXT_H
+ 
++#include <asm/mmu_context.h>
++
+ struct mm_struct;
+ 
+ void use_mm(struct mm_struct *mm);
+ void unuse_mm(struct mm_struct *mm);
+ 
++/* Architectures that care about IRQ state in switch_mm can override this. */
++#ifndef switch_mm_irqs_off
++# define switch_mm_irqs_off switch_mm
++#endif
++
+ #endif
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -32,7 +32,7 @@
+ #include <linux/init.h>
+ #include <linux/uaccess.h>
+ #include <linux/highmem.h>
+-#include <asm/mmu_context.h>
++#include <linux/mmu_context.h>
+ #include <linux/interrupt.h>
+ #include <linux/capability.h>
+ #include <linux/completion.h>
+@@ -2374,7 +2374,7 @@ context_switch(struct rq *rq, struct tas
+ 		atomic_inc(&oldmm->mm_count);
+ 		enter_lazy_tlb(oldmm, next);
+ 	} else
+-		switch_mm(oldmm, mm, next);
++		switch_mm_irqs_off(oldmm, mm, next);
+ 
+ 	if (!prev->mm) {
+ 		prev->active_mm = NULL;
+@@ -4850,7 +4850,7 @@ void idle_task_exit(void)
+ 	BUG_ON(cpu_online(smp_processor_id()));
+ 
+ 	if (mm != &init_mm) {
+-		switch_mm(mm, &init_mm, current);
++		switch_mm_irqs_off(mm, &init_mm, current);
+ 		finish_arch_post_lock_switch();
+ 	}
+ 	mmdrop(mm);
diff --git a/debian/patches/bugfix/all/kpti/sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch b/debian/patches/bugfix/all/kpti/sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch
new file mode 100644
index 0000000..9fbbf4b
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch
@@ -0,0 +1,41 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Fri, 9 Jun 2017 11:49:15 -0700
+Subject: sched/core: Idle_task_exit() shouldn't use  switch_mm_irqs_off()
+
+commit 252d2a4117bc181b287eeddf848863788da733ae upstream.
+
+idle_task_exit() can be called with IRQs on x86 on and therefore
+should use switch_mm(), not switch_mm_irqs_off().
+
+This doesn't seem to cause any problems right now, but it will
+confuse my upcoming TLB flush changes.  Nonetheless, I think it
+should be backported because it's trivial.  There won't be any
+meaningful performance impact because idle_task_exit() is only
+used when offlining a CPU.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Cc: Borislav Petkov <bp at suse.de>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: stable at vger.kernel.org
+Fixes: f98db6013c55 ("sched/core: Add switch_mm_irqs_off() and use it in the scheduler")
+Link: http://lkml.kernel.org/r/ca3d1a9fa93a0b49f5a8ff729eda3640fb6abdf9.1497034141.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ kernel/sched/core.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/kernel/sched/core.c
++++ b/kernel/sched/core.c
+@@ -4850,7 +4850,7 @@ void idle_task_exit(void)
+ 	BUG_ON(cpu_online(smp_processor_id()));
+ 
+ 	if (mm != &init_mm) {
+-		switch_mm_irqs_off(mm, &init_mm, current);
++		switch_mm(mm, &init_mm, current);
+ 		finish_arch_post_lock_switch();
+ 	}
+ 	mmdrop(mm);
diff --git a/debian/patches/bugfix/all/kpti/x86-alternatives-add-instruction-padding.patch b/debian/patches/bugfix/all/kpti/x86-alternatives-add-instruction-padding.patch
new file mode 100644
index 0000000..092088c
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-alternatives-add-instruction-padding.patch
@@ -0,0 +1,424 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Sat, 27 Dec 2014 10:41:52 +0100
+Subject: x86/alternatives: Add instruction padding
+
+commit 4332195c5615bf748624094ce4ff6797e475024d upstream.
+
+Up until now we have always paid attention to make sure the length of
+the new instruction replacing the old one is at least less or equal to
+the length of the old instruction. If the new instruction is longer, at
+the time it replaces the old instruction it will overwrite the beginning
+of the next instruction in the kernel image and cause your pants to
+catch fire.
+
+So instead of having to pay attention, teach the alternatives framework
+to pad shorter old instructions with NOPs at buildtime - but only in the
+case when
+
+  len(old instruction(s)) < len(new instruction(s))
+
+and add nothing in the >= case. (In that case we do add_nops() when
+patching).
+
+This way the alternatives user shouldn't have to care about instruction
+sizes and simply use the macros.
+
+Add asm ALTERNATIVE* flavor macros too, while at it.
+
+Also, we need to save the pad length in a separate struct alt_instr
+member for NOP optimization and the way to do that reliably is to carry
+the pad length instead of trying to detect whether we're looking at
+single-byte NOPs or at pathological instruction offsets like e9 90 90 90
+90, for example, which is a valid instruction.
+
+Thanks to Michael Matz for the great help with toolchain questions.
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/alternative-asm.h | 43 +++++++++++++++++++++-
+ arch/x86/include/asm/alternative.h     | 65 +++++++++++++++++++++-------------
+ arch/x86/include/asm/cpufeature.h      | 22 ++++++++----
+ arch/x86/include/asm/smap.h            |  4 +--
+ arch/x86/kernel/alternative.c          |  6 ++--
+ arch/x86/kernel/entry_32.S             |  2 +-
+ arch/x86/lib/clear_page_64.S           |  4 +--
+ arch/x86/lib/copy_page_64.S            |  2 +-
+ arch/x86/lib/copy_user_64.S            |  4 +--
+ arch/x86/lib/memcpy_64.S               |  8 ++---
+ arch/x86/lib/memmove_64.S              |  2 +-
+ arch/x86/lib/memset_64.S               |  8 ++---
+ 12 files changed, 118 insertions(+), 52 deletions(-)
+
+--- a/arch/x86/include/asm/alternative-asm.h
++++ b/arch/x86/include/asm/alternative-asm.h
+@@ -18,12 +18,53 @@
+ 	.endm
+ #endif
+ 
+-.macro altinstruction_entry orig alt feature orig_len alt_len
++.macro altinstruction_entry orig alt feature orig_len alt_len pad_len
+ 	.long \orig - .
+ 	.long \alt - .
+ 	.word \feature
+ 	.byte \orig_len
+ 	.byte \alt_len
++	.byte \pad_len
++.endm
++
++.macro ALTERNATIVE oldinstr, newinstr, feature
++140:
++	\oldinstr
++141:
++	.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
++142:
++
++	.pushsection .altinstructions,"a"
++	altinstruction_entry 140b,143f,\feature,142b-140b,144f-143f,142b-141b
++	.popsection
++
++	.pushsection .altinstr_replacement,"ax"
++143:
++	\newinstr
++144:
++	.popsection
++.endm
++
++.macro ALTERNATIVE_2 oldinstr, newinstr1, feature1, newinstr2, feature2
++140:
++	\oldinstr
++141:
++	.skip -(((144f-143f)-(141b-140b)) > 0) * ((144f-143f)-(141b-140b)),0x90
++	.skip -(((145f-144f)-(144f-143f)-(141b-140b)) > 0) * ((145f-144f)-(144f-143f)-(141b-140b)),0x90
++142:
++
++	.pushsection .altinstructions,"a"
++	altinstruction_entry 140b,143f,\feature1,142b-140b,144f-143f,142b-141b
++	altinstruction_entry 140b,144f,\feature2,142b-140b,145f-144f,142b-141b
++	.popsection
++
++	.pushsection .altinstr_replacement,"ax"
++143:
++	\newinstr1
++144:
++	\newinstr2
++145:
++	.popsection
+ .endm
+ 
+ #endif  /*  __ASSEMBLY__  */
+--- a/arch/x86/include/asm/alternative.h
++++ b/arch/x86/include/asm/alternative.h
+@@ -48,8 +48,9 @@ struct alt_instr {
+ 	s32 repl_offset;	/* offset to replacement instruction */
+ 	u16 cpuid;		/* cpuid bit set for replacement */
+ 	u8  instrlen;		/* length of original instruction */
+-	u8  replacementlen;	/* length of new instruction, <= instrlen */
+-};
++	u8  replacementlen;	/* length of new instruction */
++	u8  padlen;		/* length of build-time padding */
++} __packed;
+ 
+ extern void alternative_instructions(void);
+ extern void apply_alternatives(struct alt_instr *start, struct alt_instr *end);
+@@ -76,50 +77,61 @@ static inline int alternatives_text_rese
+ }
+ #endif	/* CONFIG_SMP */
+ 
+-#define OLDINSTR(oldinstr)	"661:\n\t" oldinstr "\n662:\n"
++#define b_replacement(num)	"664"#num
++#define e_replacement(num)	"665"#num
+ 
+-#define b_replacement(number)	"663"#number
+-#define e_replacement(number)	"664"#number
++#define alt_end_marker		"663"
++#define alt_slen		"662b-661b"
++#define alt_pad_len		alt_end_marker"b-662b"
++#define alt_total_slen		alt_end_marker"b-661b"
++#define alt_rlen(num)		e_replacement(num)"f-"b_replacement(num)"f"
++
++#define __OLDINSTR(oldinstr, num)					\
++	"661:\n\t" oldinstr "\n662:\n"					\
++	".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * "		\
++		"((" alt_rlen(num) ")-(" alt_slen ")),0x90\n"
++
++#define OLDINSTR(oldinstr, num)						\
++	__OLDINSTR(oldinstr, num)					\
++	alt_end_marker ":\n"
+ 
+-#define alt_slen "662b-661b"
+-#define alt_rlen(number) e_replacement(number)"f-"b_replacement(number)"f"
++/*
++ * Pad the second replacement alternative with additional NOPs if it is
++ * additionally longer than the first replacement alternative.
++ */
++#define OLDINSTR_2(oldinstr, num1, num2)					\
++	__OLDINSTR(oldinstr, num1)						\
++	".skip -(((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)) > 0) * " \
++		"((" alt_rlen(num2) ")-(" alt_rlen(num1) ")-(662b-661b)),0x90\n"  \
++	alt_end_marker ":\n"
+ 
+-#define ALTINSTR_ENTRY(feature, number)					      \
++#define ALTINSTR_ENTRY(feature, num)					      \
+ 	" .long 661b - .\n"				/* label           */ \
+-	" .long " b_replacement(number)"f - .\n"	/* new instruction */ \
++	" .long " b_replacement(num)"f - .\n"		/* new instruction */ \
+ 	" .word " __stringify(feature) "\n"		/* feature bit     */ \
+-	" .byte " alt_slen "\n"				/* source len      */ \
+-	" .byte " alt_rlen(number) "\n"			/* replacement len */
+-
+-#define DISCARD_ENTRY(number)				/* rlen <= slen */    \
+-	" .byte 0xff + (" alt_rlen(number) ") - (" alt_slen ")\n"
++	" .byte " alt_total_slen "\n"			/* source len      */ \
++	" .byte " alt_rlen(num) "\n"			/* replacement len */ \
++	" .byte " alt_pad_len "\n"			/* pad len */
+ 
+-#define ALTINSTR_REPLACEMENT(newinstr, feature, number)	/* replacement */     \
+-	b_replacement(number)":\n\t" newinstr "\n" e_replacement(number) ":\n\t"
++#define ALTINSTR_REPLACEMENT(newinstr, feature, num)	/* replacement */     \
++	b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t"
+ 
+ /* alternative assembly primitive: */
+ #define ALTERNATIVE(oldinstr, newinstr, feature)			\
+-	OLDINSTR(oldinstr)						\
++	OLDINSTR(oldinstr, 1)						\
+ 	".pushsection .altinstructions,\"a\"\n"				\
+ 	ALTINSTR_ENTRY(feature, 1)					\
+ 	".popsection\n"							\
+-	".pushsection .discard,\"aw\", at progbits\n"			\
+-	DISCARD_ENTRY(1)						\
+-	".popsection\n"							\
+ 	".pushsection .altinstr_replacement, \"ax\"\n"			\
+ 	ALTINSTR_REPLACEMENT(newinstr, feature, 1)			\
+ 	".popsection"
+ 
+ #define ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2)\
+-	OLDINSTR(oldinstr)						\
++	OLDINSTR_2(oldinstr, 1, 2)					\
+ 	".pushsection .altinstructions,\"a\"\n"				\
+ 	ALTINSTR_ENTRY(feature1, 1)					\
+ 	ALTINSTR_ENTRY(feature2, 2)					\
+ 	".popsection\n"							\
+-	".pushsection .discard,\"aw\", at progbits\n"			\
+-	DISCARD_ENTRY(1)						\
+-	DISCARD_ENTRY(2)						\
+-	".popsection\n"							\
+ 	".pushsection .altinstr_replacement, \"ax\"\n"			\
+ 	ALTINSTR_REPLACEMENT(newinstr1, feature1, 1)			\
+ 	ALTINSTR_REPLACEMENT(newinstr2, feature2, 2)			\
+@@ -146,6 +158,9 @@ static inline int alternatives_text_rese
+ #define alternative(oldinstr, newinstr, feature)			\
+ 	asm volatile (ALTERNATIVE(oldinstr, newinstr, feature) : : : "memory")
+ 
++#define alternative_2(oldinstr, newinstr1, feature1, newinstr2, feature2) \
++	asm volatile(ALTERNATIVE_2(oldinstr, newinstr1, feature1, newinstr2, feature2) ::: "memory")
++
+ /*
+  * Alternative inline assembly with input.
+  *
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -390,6 +390,7 @@ static __always_inline __pure bool __sta
+ 			 " .word %P0\n"		/* 1: do replace */
+ 			 " .byte 2b - 1b\n"	/* source len */
+ 			 " .byte 0\n"		/* replacement len */
++			 " .byte 0\n"		/* pad len */
+ 			 ".previous\n"
+ 			 /* skipping size check since replacement size = 0 */
+ 			 : : "i" (X86_FEATURE_ALWAYS) : : t_warn);
+@@ -404,6 +405,7 @@ static __always_inline __pure bool __sta
+ 			 " .word %P0\n"		/* feature bit */
+ 			 " .byte 2b - 1b\n"	/* source len */
+ 			 " .byte 0\n"		/* replacement len */
++			 " .byte 0\n"		/* pad len */
+ 			 ".previous\n"
+ 			 /* skipping size check since replacement size = 0 */
+ 			 : : "i" (bit) : : t_no);
+@@ -429,6 +431,7 @@ static __always_inline __pure bool __sta
+ 			     " .word %P1\n"		/* feature bit */
+ 			     " .byte 2b - 1b\n"		/* source len */
+ 			     " .byte 4f - 3f\n"		/* replacement len */
++			     " .byte 0\n"		/* pad len */
+ 			     ".previous\n"
+ 			     ".section .discard,\"aw\", at progbits\n"
+ 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
+@@ -463,23 +466,28 @@ static __always_inline __pure bool _stat
+  */
+ 		asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
+ 			 "2:\n"
++			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
++			         "((5f-4f) - (2b-1b)),0x90\n"
++			 "3:\n"
+ 			 ".section .altinstructions,\"a\"\n"
+ 			 " .long 1b - .\n"		/* src offset */
+-			 " .long 3f - .\n"		/* repl offset */
++			 " .long 4f - .\n"		/* repl offset */
+ 			 " .word %P1\n"			/* always replace */
+-			 " .byte 2b - 1b\n"		/* src len */
+-			 " .byte 4f - 3f\n"		/* repl len */
++			 " .byte 3b - 1b\n"		/* src len */
++			 " .byte 5f - 4f\n"		/* repl len */
++			 " .byte 3b - 2b\n"		/* pad len */
+ 			 ".previous\n"
+ 			 ".section .altinstr_replacement,\"ax\"\n"
+-			 "3: .byte 0xe9\n .long %l[t_no] - 2b\n"
+-			 "4:\n"
++			 "4: .byte 0xe9\n .long %l[t_no] - 2b\n"
++			 "5:\n"
+ 			 ".previous\n"
+ 			 ".section .altinstructions,\"a\"\n"
+ 			 " .long 1b - .\n"		/* src offset */
+ 			 " .long 0\n"			/* no replacement */
+ 			 " .word %P0\n"			/* feature bit */
+-			 " .byte 2b - 1b\n"		/* src len */
++			 " .byte 3b - 1b\n"		/* src len */
+ 			 " .byte 0\n"			/* repl len */
++			 " .byte 0\n"			/* pad len */
+ 			 ".previous\n"
+ 			 : : "i" (bit), "i" (X86_FEATURE_ALWAYS)
+ 			 : : t_dynamic, t_no);
+@@ -499,6 +507,7 @@ static __always_inline __pure bool _stat
+ 			     " .word %P2\n"		/* always replace */
+ 			     " .byte 2b - 1b\n"		/* source len */
+ 			     " .byte 4f - 3f\n"		/* replacement len */
++			     " .byte 0\n"		/* pad len */
+ 			     ".previous\n"
+ 			     ".section .discard,\"aw\", at progbits\n"
+ 			     " .byte 0xff + (4f-3f) - (2b-1b)\n" /* size check */
+@@ -513,6 +522,7 @@ static __always_inline __pure bool _stat
+ 			     " .word %P1\n"		/* feature bit */
+ 			     " .byte 4b - 3b\n"		/* src len */
+ 			     " .byte 6f - 5f\n"		/* repl len */
++			     " .byte 0\n"		/* pad len */
+ 			     ".previous\n"
+ 			     ".section .discard,\"aw\", at progbits\n"
+ 			     " .byte 0xff + (6f-5f) - (4b-3b)\n" /* size check */
+--- a/arch/x86/include/asm/smap.h
++++ b/arch/x86/include/asm/smap.h
+@@ -33,7 +33,7 @@
+ 	662: __ASM_CLAC ;						\
+ 	.popsection ;							\
+ 	.pushsection .altinstructions, "a" ;				\
+-	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
++	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3, 0 ;	\
+ 	.popsection
+ 
+ #define ASM_STAC							\
+@@ -42,7 +42,7 @@
+ 	662: __ASM_STAC ;						\
+ 	.popsection ;							\
+ 	.pushsection .altinstructions, "a" ;				\
+-	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3 ;	\
++	altinstruction_entry 661b, 662b, X86_FEATURE_SMAP, 3, 3, 0 ;	\
+ 	.popsection
+ 
+ #else /* CONFIG_X86_SMAP */
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -270,7 +270,6 @@ void __init_or_module apply_alternatives
+ 	for (a = start; a < end; a++) {
+ 		instr = (u8 *)&a->instr_offset + a->instr_offset;
+ 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
+-		BUG_ON(a->replacementlen > a->instrlen);
+ 		BUG_ON(a->instrlen > sizeof(insnbuf));
+ 		BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
+ 		if (!boot_cpu_has(a->cpuid))
+@@ -290,8 +289,9 @@ void __init_or_module apply_alternatives
+ 			DPRINTK("Fix CALL offset: 0x%x", *(s32 *)(insnbuf + 1));
+ 		}
+ 
+-		add_nops(insnbuf + a->replacementlen,
+-			 a->instrlen - a->replacementlen);
++		if (a->instrlen > a->replacementlen)
++			add_nops(insnbuf + a->replacementlen,
++				 a->instrlen - a->replacementlen);
+ 
+ 		text_poke_early(instr, insnbuf, a->instrlen);
+ 	}
+--- a/arch/x86/kernel/entry_32.S
++++ b/arch/x86/kernel/entry_32.S
+@@ -821,7 +821,7 @@ ENTRY(simd_coprocessor_error)
+ 661:	pushl_cfi $do_general_protection
+ 662:
+ .section .altinstructions,"a"
+-	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f
++	altinstruction_entry 661b, 663f, X86_FEATURE_XMM, 662b-661b, 664f-663f, 0
+ .previous
+ .section .altinstr_replacement,"ax"
+ 663:	pushl $do_simd_coprocessor_error
+--- a/arch/x86/lib/clear_page_64.S
++++ b/arch/x86/lib/clear_page_64.S
+@@ -67,7 +67,7 @@ ENDPROC(clear_page)
+ 	.previous
+ 	.section .altinstructions,"a"
+ 	altinstruction_entry clear_page,1b,X86_FEATURE_REP_GOOD,\
+-			     .Lclear_page_end-clear_page, 2b-1b
++			     .Lclear_page_end-clear_page, 2b-1b, 0
+ 	altinstruction_entry clear_page,2b,X86_FEATURE_ERMS,   \
+-			     .Lclear_page_end-clear_page,3b-2b
++			     .Lclear_page_end-clear_page,3b-2b, 0
+ 	.previous
+--- a/arch/x86/lib/copy_page_64.S
++++ b/arch/x86/lib/copy_page_64.S
+@@ -106,5 +106,5 @@ ENDPROC(copy_page)
+ 	.previous
+ 	.section .altinstructions,"a"
+ 	altinstruction_entry copy_page, 1b, X86_FEATURE_REP_GOOD,	\
+-		.Lcopy_page_end-copy_page, 2b-1b
++		.Lcopy_page_end-copy_page, 2b-1b, 0
+ 	.previous
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -39,8 +39,8 @@
+ 	.previous
+ 
+ 	.section .altinstructions,"a"
+-	altinstruction_entry 0b,2b,\feature1,5,5
+-	altinstruction_entry 0b,3b,\feature2,5,5
++	altinstruction_entry 0b,2b,\feature1,5,5,0
++	altinstruction_entry 0b,3b,\feature2,5,5,0
+ 	.previous
+ 	.endm
+ 
+--- a/arch/x86/lib/memcpy_64.S
++++ b/arch/x86/lib/memcpy_64.S
+@@ -199,8 +199,8 @@ ENDPROC(__memcpy)
+ 	 * only outcome...
+ 	 */
+ 	.section .altinstructions, "a"
+-	altinstruction_entry memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
+-			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c
+-	altinstruction_entry memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
+-			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e
++	altinstruction_entry __memcpy,.Lmemcpy_c,X86_FEATURE_REP_GOOD,\
++			     .Lmemcpy_e-.Lmemcpy_c,.Lmemcpy_e-.Lmemcpy_c,0
++	altinstruction_entry __memcpy,.Lmemcpy_c_e,X86_FEATURE_ERMS, \
++			     .Lmemcpy_e_e-.Lmemcpy_c_e,.Lmemcpy_e_e-.Lmemcpy_c_e,0
+ 	.previous
+--- a/arch/x86/lib/memmove_64.S
++++ b/arch/x86/lib/memmove_64.S
+@@ -218,6 +218,6 @@ ENTRY(memmove)
+ 	altinstruction_entry .Lmemmove_begin_forward,		\
+ 		.Lmemmove_begin_forward_efs,X86_FEATURE_ERMS,	\
+ 		.Lmemmove_end_forward-.Lmemmove_begin_forward,	\
+-		.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs
++		.Lmemmove_end_forward_efs-.Lmemmove_begin_forward_efs,0
+ 	.previous
+ ENDPROC(memmove)
+--- a/arch/x86/lib/memset_64.S
++++ b/arch/x86/lib/memset_64.S
+@@ -147,8 +147,8 @@ ENDPROC(__memset)
+          * feature to implement the right patch order.
+ 	 */
+ 	.section .altinstructions,"a"
+-	altinstruction_entry memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
+-			     .Lfinal-memset,.Lmemset_e-.Lmemset_c
+-	altinstruction_entry memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
+-			     .Lfinal-memset,.Lmemset_e_e-.Lmemset_c_e
++	altinstruction_entry __memset,.Lmemset_c,X86_FEATURE_REP_GOOD,\
++			     .Lfinal-__memset,.Lmemset_e-.Lmemset_c,0
++	altinstruction_entry __memset,.Lmemset_c_e,X86_FEATURE_ERMS, \
++			     .Lfinal-__memset,.Lmemset_e_e-.Lmemset_c_e,0
+ 	.previous
diff --git a/debian/patches/bugfix/all/kpti/x86-alternatives-cleanup-dprintk-macro.patch b/debian/patches/bugfix/all/kpti/x86-alternatives-cleanup-dprintk-macro.patch
new file mode 100644
index 0000000..17bc17a
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-alternatives-cleanup-dprintk-macro.patch
@@ -0,0 +1,99 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Tue, 30 Dec 2014 20:27:09 +0100
+Subject: x86/alternatives: Cleanup DPRINTK macro
+
+commit db477a3386dee183130916d6bbf21f5828b0b2e2 upstream.
+
+Make it pass __func__ implicitly. Also, dump info about each replacing
+we're doing. Fixup comments and style while at it.
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/alternative.c | 41 +++++++++++++++++++++++++----------------
+ 1 file changed, 25 insertions(+), 16 deletions(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -52,10 +52,10 @@ static int __init setup_noreplace_paravi
+ __setup("noreplace-paravirt", setup_noreplace_paravirt);
+ #endif
+ 
+-#define DPRINTK(fmt, ...)				\
+-do {							\
+-	if (debug_alternative)				\
+-		printk(KERN_DEBUG fmt, ##__VA_ARGS__);	\
++#define DPRINTK(fmt, args...)						\
++do {									\
++	if (debug_alternative)						\
++		printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);	\
+ } while (0)
+ 
+ /*
+@@ -243,12 +243,13 @@ extern struct alt_instr __alt_instructio
+ extern s32 __smp_locks[], __smp_locks_end[];
+ void *text_poke_early(void *addr, const void *opcode, size_t len);
+ 
+-/* Replace instructions with better alternatives for this CPU type.
+-   This runs before SMP is initialized to avoid SMP problems with
+-   self modifying code. This implies that asymmetric systems where
+-   APs have less capabilities than the boot processor are not handled.
+-   Tough. Make sure you disable such features by hand. */
+-
++/*
++ * Replace instructions with better alternatives for this CPU type. This runs
++ * before SMP is initialized to avoid SMP problems with self modifying code.
++ * This implies that asymmetric systems where APs have less capabilities than
++ * the boot processor are not handled. Tough. Make sure you disable such
++ * features by hand.
++ */
+ void __init_or_module apply_alternatives(struct alt_instr *start,
+ 					 struct alt_instr *end)
+ {
+@@ -256,10 +257,10 @@ void __init_or_module apply_alternatives
+ 	u8 *instr, *replacement;
+ 	u8 insnbuf[MAX_PATCH_LEN];
+ 
+-	DPRINTK("%s: alt table %p -> %p\n", __func__, start, end);
++	DPRINTK("alt table %p -> %p", start, end);
+ 	/*
+ 	 * The scan order should be from start to end. A later scanned
+-	 * alternative code can overwrite a previous scanned alternative code.
++	 * alternative code can overwrite previously scanned alternative code.
+ 	 * Some kernel functions (e.g. memcpy, memset, etc) use this order to
+ 	 * patch code.
+ 	 *
+@@ -275,11 +276,19 @@ void __init_or_module apply_alternatives
+ 		if (!boot_cpu_has(a->cpuid))
+ 			continue;
+ 
++		DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
++			a->cpuid >> 5,
++			a->cpuid & 0x1f,
++			instr, a->instrlen,
++			replacement, a->replacementlen);
++
+ 		memcpy(insnbuf, replacement, a->replacementlen);
+ 
+ 		/* 0xe8 is a relative jump; fix the offset. */
+-		if (*insnbuf == 0xe8 && a->replacementlen == 5)
+-		    *(s32 *)(insnbuf + 1) += replacement - instr;
++		if (*insnbuf == 0xe8 && a->replacementlen == 5) {
++			*(s32 *)(insnbuf + 1) += replacement - instr;
++			DPRINTK("Fix CALL offset: 0x%x", *(s32 *)(insnbuf + 1));
++		}
+ 
+ 		add_nops(insnbuf + a->replacementlen,
+ 			 a->instrlen - a->replacementlen);
+@@ -371,8 +380,8 @@ void __init_or_module alternatives_smp_m
+ 	smp->locks_end	= locks_end;
+ 	smp->text	= text;
+ 	smp->text_end	= text_end;
+-	DPRINTK("%s: locks %p -> %p, text %p -> %p, name %s\n",
+-		__func__, smp->locks, smp->locks_end,
++	DPRINTK("locks %p -> %p, text %p -> %p, name %s\n",
++		smp->locks, smp->locks_end,
+ 		smp->text, smp->text_end, smp->name);
+ 
+ 	list_add_tail(&smp->next, &smp_alt_modules);
diff --git a/debian/patches/bugfix/all/kpti/x86-alternatives-make-jmps-more-robust.patch b/debian/patches/bugfix/all/kpti/x86-alternatives-make-jmps-more-robust.patch
new file mode 100644
index 0000000..9a52bd6
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-alternatives-make-jmps-more-robust.patch
@@ -0,0 +1,284 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Mon, 5 Jan 2015 13:48:41 +0100
+Subject: x86/alternatives: Make JMPs more robust
+
+commit 48c7a2509f9e237d8465399d9cdfe487d3212a23 upstream.
+
+Up until now we had to pay attention to relative JMPs in alternatives
+about how their relative offset gets computed so that the jump target
+is still correct. Or, as it is the case for near CALLs (opcode e8), we
+still have to go and readjust the offset at patching time.
+
+What is more, the static_cpu_has_safe() facility had to forcefully
+generate 5-byte JMPs since we couldn't rely on the compiler to generate
+properly sized ones so we had to force the longest ones. Worse than
+that, sometimes it would generate a replacement JMP which is longer than
+the original one, thus overwriting the beginning of the next instruction
+at patching time.
+
+So, in order to alleviate all that and make using JMPs more
+straight-forward we go and pad the original instruction in an
+alternative block with NOPs at build time, should the replacement(s) be
+longer. This way, alternatives users shouldn't pay special attention
+so that original and replacement instruction sizes are fine but the
+assembler would simply add padding where needed and not do anything
+otherwise.
+
+As a second aspect, we go and recompute JMPs at patching time so that we
+can try to make 5-byte JMPs into two-byte ones if possible. If not, we
+still have to recompute the offsets as the replacement JMP gets put far
+away in the .altinstr_replacement section leading to a wrong offset if
+copied verbatim.
+
+For example, on a locally generated kernel image
+
+  old insn VA: 0xffffffff810014bd, CPU feat: X86_FEATURE_ALWAYS, size: 2
+  __switch_to:
+   ffffffff810014bd:      eb 21                   jmp ffffffff810014e0
+  repl insn: size: 5
+  ffffffff81d0b23c:       e9 b1 62 2f ff          jmpq ffffffff810014f2
+
+gets corrected to a 2-byte JMP:
+
+  apply_alternatives: feat: 3*32+21, old: (ffffffff810014bd, len: 2), repl: (ffffffff81d0b23c, len: 5)
+  alt_insn: e9 b1 62 2f ff
+  recompute_jumps: next_rip: ffffffff81d0b241, tgt_rip: ffffffff810014f2, new_displ: 0x00000033, ret len: 2
+  converted to: eb 33 90 90 90
+
+and a 5-byte JMP:
+
+  old insn VA: 0xffffffff81001516, CPU feat: X86_FEATURE_ALWAYS, size: 2
+  __switch_to:
+   ffffffff81001516:      eb 30                   jmp ffffffff81001548
+  repl insn: size: 5
+   ffffffff81d0b241:      e9 10 63 2f ff          jmpq ffffffff81001556
+
+gets shortened into a two-byte one:
+
+  apply_alternatives: feat: 3*32+21, old: (ffffffff81001516, len: 2), repl: (ffffffff81d0b241, len: 5)
+  alt_insn: e9 10 63 2f ff
+  recompute_jumps: next_rip: ffffffff81d0b246, tgt_rip: ffffffff81001556, new_displ: 0x0000003e, ret len: 2
+  converted to: eb 3e 90 90 90
+
+... and so on.
+
+This leads to a net win of around
+
+40ish replacements * 3 bytes savings =~ 120 bytes of I$
+
+on an AMD guest which means some savings of precious instruction cache
+bandwidth. The padding to the shorter 2-byte JMPs are single-byte NOPs
+which on smart microarchitectures means discarding NOPs at decode time
+and thus freeing up execution bandwidth.
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/cpufeature.h |  10 +---
+ arch/x86/kernel/alternative.c     | 103 ++++++++++++++++++++++++++++++++++++--
+ arch/x86/lib/copy_user_64.S       |  11 ++--
+ 3 files changed, 105 insertions(+), 19 deletions(-)
+
+--- a/arch/x86/include/asm/cpufeature.h
++++ b/arch/x86/include/asm/cpufeature.h
+@@ -458,13 +458,7 @@ static __always_inline __pure bool __sta
+ static __always_inline __pure bool _static_cpu_has_safe(u16 bit)
+ {
+ #ifdef CC_HAVE_ASM_GOTO
+-/*
+- * We need to spell the jumps to the compiler because, depending on the offset,
+- * the replacement jump can be bigger than the original jump, and this we cannot
+- * have. Thus, we force the jump to the widest, 4-byte, signed relative
+- * offset even though the last would often fit in less bytes.
+- */
+-		asm_volatile_goto("1: .byte 0xe9\n .long %l[t_dynamic] - 2f\n"
++		asm_volatile_goto("1: jmp %l[t_dynamic]\n"
+ 			 "2:\n"
+ 			 ".skip -(((5f-4f) - (2b-1b)) > 0) * "
+ 			         "((5f-4f) - (2b-1b)),0x90\n"
+@@ -478,7 +472,7 @@ static __always_inline __pure bool _stat
+ 			 " .byte 3b - 2b\n"		/* pad len */
+ 			 ".previous\n"
+ 			 ".section .altinstr_replacement,\"ax\"\n"
+-			 "4: .byte 0xe9\n .long %l[t_no] - 2b\n"
++			 "4: jmp %l[t_no]\n"
+ 			 "5:\n"
+ 			 ".previous\n"
+ 			 ".section .altinstructions,\"a\"\n"
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -58,6 +58,21 @@ do {									\
+ 		printk(KERN_DEBUG "%s: " fmt "\n", __func__, ##args);	\
+ } while (0)
+ 
++#define DUMP_BYTES(buf, len, fmt, args...)				\
++do {									\
++	if (unlikely(debug_alternative)) {				\
++		int j;							\
++									\
++		if (!(len))						\
++			break;						\
++									\
++		printk(KERN_DEBUG fmt, ##args);				\
++		for (j = 0; j < (len) - 1; j++)				\
++			printk(KERN_CONT "%02hhx ", buf[j]);		\
++		printk(KERN_CONT "%02hhx\n", buf[j]);			\
++	}								\
++} while (0)
++
+ /*
+  * Each GENERIC_NOPX is of X bytes, and defined as an array of bytes
+  * that correspond to that nop. Getting from one nop to the next, we
+@@ -244,6 +259,71 @@ extern s32 __smp_locks[], __smp_locks_en
+ void *text_poke_early(void *addr, const void *opcode, size_t len);
+ 
+ /*
++ * Are we looking at a near JMP with a 1 or 4-byte displacement.
++ */
++static inline bool is_jmp(const u8 opcode)
++{
++	return opcode == 0xeb || opcode == 0xe9;
++}
++
++static void __init_or_module
++recompute_jump(struct alt_instr *a, u8 *orig_insn, u8 *repl_insn, u8 *insnbuf)
++{
++	u8 *next_rip, *tgt_rip;
++	s32 n_dspl, o_dspl;
++	int repl_len;
++
++	if (a->replacementlen != 5)
++		return;
++
++	o_dspl = *(s32 *)(insnbuf + 1);
++
++	/* next_rip of the replacement JMP */
++	next_rip = repl_insn + a->replacementlen;
++	/* target rip of the replacement JMP */
++	tgt_rip  = next_rip + o_dspl;
++	n_dspl = tgt_rip - orig_insn;
++
++	DPRINTK("target RIP: %p, new_displ: 0x%x", tgt_rip, n_dspl);
++
++	if (tgt_rip - orig_insn >= 0) {
++		if (n_dspl - 2 <= 127)
++			goto two_byte_jmp;
++		else
++			goto five_byte_jmp;
++	/* negative offset */
++	} else {
++		if (((n_dspl - 2) & 0xff) == (n_dspl - 2))
++			goto two_byte_jmp;
++		else
++			goto five_byte_jmp;
++	}
++
++two_byte_jmp:
++	n_dspl -= 2;
++
++	insnbuf[0] = 0xeb;
++	insnbuf[1] = (s8)n_dspl;
++	add_nops(insnbuf + 2, 3);
++
++	repl_len = 2;
++	goto done;
++
++five_byte_jmp:
++	n_dspl -= 5;
++
++	insnbuf[0] = 0xe9;
++	*(s32 *)&insnbuf[1] = n_dspl;
++
++	repl_len = 5;
++
++done:
++
++	DPRINTK("final displ: 0x%08x, JMP 0x%lx",
++		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
++}
++
++/*
+  * Replace instructions with better alternatives for this CPU type. This runs
+  * before SMP is initialized to avoid SMP problems with self modifying code.
+  * This implies that asymmetric systems where APs have less capabilities than
+@@ -268,6 +348,8 @@ void __init_or_module apply_alternatives
+ 	 * order.
+ 	 */
+ 	for (a = start; a < end; a++) {
++		int insnbuf_sz = 0;
++
+ 		instr = (u8 *)&a->instr_offset + a->instr_offset;
+ 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
+ 		BUG_ON(a->instrlen > sizeof(insnbuf));
+@@ -281,24 +363,35 @@ void __init_or_module apply_alternatives
+ 			instr, a->instrlen,
+ 			replacement, a->replacementlen);
+ 
++		DUMP_BYTES(instr, a->instrlen, "%p: old_insn: ", instr);
++		DUMP_BYTES(replacement, a->replacementlen, "%p: rpl_insn: ", replacement);
++
+ 		memcpy(insnbuf, replacement, a->replacementlen);
++		insnbuf_sz = a->replacementlen;
+ 
+ 		/* 0xe8 is a relative jump; fix the offset. */
+ 		if (*insnbuf == 0xe8 && a->replacementlen == 5) {
+ 			*(s32 *)(insnbuf + 1) += replacement - instr;
+-			DPRINTK("Fix CALL offset: 0x%x", *(s32 *)(insnbuf + 1));
++			DPRINTK("Fix CALL offset: 0x%x, CALL 0x%lx",
++				*(s32 *)(insnbuf + 1),
++				(unsigned long)instr + *(s32 *)(insnbuf + 1) + 5);
+ 		}
+ 
+-		if (a->instrlen > a->replacementlen)
++		if (a->replacementlen && is_jmp(replacement[0]))
++			recompute_jump(a, instr, replacement, insnbuf);
++
++		if (a->instrlen > a->replacementlen) {
+ 			add_nops(insnbuf + a->replacementlen,
+ 				 a->instrlen - a->replacementlen);
++			insnbuf_sz += a->instrlen - a->replacementlen;
++		}
++		DUMP_BYTES(insnbuf, insnbuf_sz, "%p: final_insn: ", instr);
+ 
+-		text_poke_early(instr, insnbuf, a->instrlen);
++		text_poke_early(instr, insnbuf, insnbuf_sz);
+ 	}
+ }
+ 
+ #ifdef CONFIG_SMP
+-
+ static void alternatives_smp_lock(const s32 *start, const s32 *end,
+ 				  u8 *text, u8 *text_end)
+ {
+@@ -449,7 +542,7 @@ int alternatives_text_reserved(void *sta
+ 
+ 	return 0;
+ }
+-#endif
++#endif /* CONFIG_SMP */
+ 
+ #ifdef CONFIG_PARAVIRT
+ void __init_or_module apply_paravirt(struct paravirt_patch_site *start,
+--- a/arch/x86/lib/copy_user_64.S
++++ b/arch/x86/lib/copy_user_64.S
+@@ -28,14 +28,13 @@
+  */
+ 	.macro ALTERNATIVE_JUMP feature1,feature2,orig,alt1,alt2
+ 0:
+-	.byte 0xe9	/* 32bit jump */
+-	.long \orig-1f	/* by default jump to orig */
++	jmp \orig
+ 1:
+ 	.section .altinstr_replacement,"ax"
+-2:	.byte 0xe9			/* near jump with 32bit immediate */
+-	.long \alt1-1b /* offset */   /* or alternatively to alt1 */
+-3:	.byte 0xe9			/* near jump with 32bit immediate */
+-	.long \alt2-1b /* offset */   /* or alternatively to alt2 */
++2:
++	jmp \alt1
++3:
++	jmp \alt2
+ 	.previous
+ 
+ 	.section .altinstructions,"a"
diff --git a/debian/patches/bugfix/all/kpti/x86-alternatives-use-optimized-nops-for-padding.patch b/debian/patches/bugfix/all/kpti/x86-alternatives-use-optimized-nops-for-padding.patch
new file mode 100644
index 0000000..ed064b9
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-alternatives-use-optimized-nops-for-padding.patch
@@ -0,0 +1,50 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Sat, 10 Jan 2015 20:34:07 +0100
+Subject: x86/alternatives: Use optimized NOPs for padding
+
+commit 4fd4b6e5537cec5b56db0b22546dd439ebb26830 upstream.
+
+Alternatives allow now for an empty old instruction. In this case we go
+and pad the space with NOPs at assembly time. However, there are the
+optimal, longer NOPs which should be used. Do that at patching time by
+adding alt_instr.padlen-sized NOPs at the old instruction address.
+
+Cc: Andy Lutomirski <luto at amacapital.net>
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/alternative.c | 14 +++++++++++++-
+ 1 file changed, 13 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/alternative.c
++++ b/arch/x86/kernel/alternative.c
+@@ -323,6 +323,14 @@ done:
+ 		n_dspl, (unsigned long)orig_insn + n_dspl + repl_len);
+ }
+ 
++static void __init_or_module optimize_nops(struct alt_instr *a, u8 *instr)
++{
++	add_nops(instr + (a->instrlen - a->padlen), a->padlen);
++
++	DUMP_BYTES(instr, a->instrlen, "%p: [%d:%d) optimized NOPs: ",
++		   instr, a->instrlen - a->padlen, a->padlen);
++}
++
+ /*
+  * Replace instructions with better alternatives for this CPU type. This runs
+  * before SMP is initialized to avoid SMP problems with self modifying code.
+@@ -354,8 +362,12 @@ void __init_or_module apply_alternatives
+ 		replacement = (u8 *)&a->repl_offset + a->repl_offset;
+ 		BUG_ON(a->instrlen > sizeof(insnbuf));
+ 		BUG_ON(a->cpuid >= (NCAPINTS + NBUGINTS) * 32);
+-		if (!boot_cpu_has(a->cpuid))
++		if (!boot_cpu_has(a->cpuid)) {
++			if (a->padlen > 1)
++				optimize_nops(a, instr);
++
+ 			continue;
++		}
+ 
+ 		DPRINTK("feat: %d*32+%d, old: (%p, len: %d), repl: (%p, len: %d)",
+ 			a->cpuid >> 5,
diff --git a/debian/patches/bugfix/all/kpti/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch b/debian/patches/bugfix/all/kpti/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
new file mode 100644
index 0000000..5d03599
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
@@ -0,0 +1,175 @@
+From: Tom Lendacky <thomas.lendacky at amd.com>
+Date: Mon, 17 Jul 2017 16:10:33 -0500
+Subject: x86/boot: Add early cmdline parsing for options with  arguments
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit e505371dd83963caae1a37ead9524e8d997341be upstream.
+
+Add a cmdline_find_option() function to look for cmdline options that
+take arguments. The argument is returned in a supplied buffer and the
+argument length (regardless of whether it fits in the supplied buffer)
+is returned, with -1 indicating not found.
+
+Signed-off-by: Tom Lendacky <thomas.lendacky at amd.com>
+Reviewed-by: Thomas Gleixner <tglx at linutronix.de>
+Cc: Alexander Potapenko <glider at google.com>
+Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
+Cc: Andy Lutomirski <luto at kernel.org>
+Cc: Arnd Bergmann <arnd at arndb.de>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brijesh Singh <brijesh.singh at amd.com>
+Cc: Dave Young <dyoung at redhat.com>
+Cc: Dmitry Vyukov <dvyukov at google.com>
+Cc: Jonathan Corbet <corbet at lwn.net>
+Cc: Konrad Rzeszutek Wilk <konrad.wilk at oracle.com>
+Cc: Larry Woodman <lwoodman at redhat.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Matt Fleming <matt at codeblueprint.co.uk>
+Cc: Michael S. Tsirkin <mst at redhat.com>
+Cc: Paolo Bonzini <pbonzini at redhat.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Radim Krčmář <rkrcmar at redhat.com>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Toshimitsu Kani <toshi.kani at hpe.com>
+Cc: kasan-dev at googlegroups.com
+Cc: kvm at vger.kernel.org
+Cc: linux-arch at vger.kernel.org
+Cc: linux-doc at vger.kernel.org
+Cc: linux-efi at vger.kernel.org
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/36b5f97492a9745dce27682305f990fc20e5cf8a.1500319216.git.thomas.lendacky@amd.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/cmdline.h |   2 +
+ arch/x86/lib/cmdline.c         | 105 +++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 107 insertions(+)
+
+--- a/arch/x86/include/asm/cmdline.h
++++ b/arch/x86/include/asm/cmdline.h
+@@ -2,5 +2,7 @@
+ #define _ASM_X86_CMDLINE_H
+ 
+ int cmdline_find_option_bool(const char *cmdline_ptr, const char *option);
++int cmdline_find_option(const char *cmdline_ptr, const char *option,
++			char *buffer, int bufsize);
+ 
+ #endif /* _ASM_X86_CMDLINE_H */
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -104,7 +104,112 @@ __cmdline_find_option_bool(const char *c
+ 	return 0;	/* Buffer overrun */
+ }
+ 
++/*
++ * Find a non-boolean option (i.e. option=argument). In accordance with
++ * standard Linux practice, if this option is repeated, this returns the
++ * last instance on the command line.
++ *
++ * @cmdline: the cmdline string
++ * @max_cmdline_size: the maximum size of cmdline
++ * @option: option string to look for
++ * @buffer: memory buffer to return the option argument
++ * @bufsize: size of the supplied memory buffer
++ *
++ * Returns the length of the argument (regardless of if it was
++ * truncated to fit in the buffer), or -1 on not found.
++ */
++static int
++__cmdline_find_option(const char *cmdline, int max_cmdline_size,
++		      const char *option, char *buffer, int bufsize)
++{
++	char c;
++	int pos = 0, len = -1;
++	const char *opptr = NULL;
++	char *bufptr = buffer;
++	enum {
++		st_wordstart = 0,	/* Start of word/after whitespace */
++		st_wordcmp,	/* Comparing this word */
++		st_wordskip,	/* Miscompare, skip */
++		st_bufcpy,	/* Copying this to buffer */
++	} state = st_wordstart;
++
++	if (!cmdline)
++		return -1;      /* No command line */
++
++	/*
++	 * This 'pos' check ensures we do not overrun
++	 * a non-NULL-terminated 'cmdline'
++	 */
++	while (pos++ < max_cmdline_size) {
++		c = *(char *)cmdline++;
++		if (!c)
++			break;
++
++		switch (state) {
++		case st_wordstart:
++			if (myisspace(c))
++				break;
++
++			state = st_wordcmp;
++			opptr = option;
++			/* fall through */
++
++		case st_wordcmp:
++			if ((c == '=') && !*opptr) {
++				/*
++				 * We matched all the way to the end of the
++				 * option we were looking for, prepare to
++				 * copy the argument.
++				 */
++				len = 0;
++				bufptr = buffer;
++				state = st_bufcpy;
++				break;
++			} else if (c == *opptr++) {
++				/*
++				 * We are currently matching, so continue
++				 * to the next character on the cmdline.
++				 */
++				break;
++			}
++			state = st_wordskip;
++			/* fall through */
++
++		case st_wordskip:
++			if (myisspace(c))
++				state = st_wordstart;
++			break;
++
++		case st_bufcpy:
++			if (myisspace(c)) {
++				state = st_wordstart;
++			} else {
++				/*
++				 * Increment len, but don't overrun the
++				 * supplied buffer and leave room for the
++				 * NULL terminator.
++				 */
++				if (++len < bufsize)
++					*bufptr++ = c;
++			}
++			break;
++		}
++	}
++
++	if (bufsize)
++		*bufptr = '\0';
++
++	return len;
++}
++
+ int cmdline_find_option_bool(const char *cmdline, const char *option)
+ {
+ 	return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
+ }
++
++int cmdline_find_option(const char *cmdline, const char *option, char *buffer,
++			int bufsize)
++{
++	return __cmdline_find_option(cmdline, COMMAND_LINE_SIZE, option,
++				     buffer, bufsize);
++}
diff --git a/debian/patches/bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-matching-at-end.patch b/debian/patches/bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-matching-at-end.patch
new file mode 100644
index 0000000..9f1aa59
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-matching-at-end.patch
@@ -0,0 +1,120 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Tue, 22 Dec 2015 14:52:38 -0800
+Subject: x86/boot: Fix early command-line parsing when matching  at end
+
+commit 02afeaae9843733a39cd9b11053748b2d1dc5ae7 upstream.
+
+The x86 early command line parsing in cmdline_find_option_bool() is
+buggy. If it matches a specified 'option' all the way to the end of the
+command-line, it will consider it a match.
+
+For instance,
+
+  cmdline = "foo";
+  cmdline_find_option_bool(cmdline, "fool");
+
+will return 1. This is particularly annoying since we have actual FPU
+options like "noxsave" and "noxsaves" So, command-line "foo bar noxsave"
+will match *BOTH* a "noxsave" and "noxsaves". (This turns out not to be
+an actual problem because "noxsave" implies "noxsaves", but it's still
+confusing.)
+
+To fix this, we simplify the code and stop tracking 'len'. 'len'
+was trying to indicate either the NULL terminator *OR* the end of a
+non-NULL-terminated command line at 'COMMAND_LINE_SIZE'. But, each of the
+three states is *already* checking 'cmdline' for a NULL terminator.
+
+We _only_ need to check if we have overrun 'COMMAND_LINE_SIZE', and that
+we can do without keeping 'len' around.
+
+Also add some commends to clarify what is going on.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: fenghua.yu at intel.com
+Cc: yu-cheng.yu at intel.com
+Link: http://lkml.kernel.org/r/20151222225238.9AEB560C@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/lib/cmdline.c | 34 ++++++++++++++++++++++++----------
+ 1 file changed, 24 insertions(+), 10 deletions(-)
+
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -21,12 +21,14 @@ static inline int myisspace(u8 c)
+  * @option: option string to look for
+  *
+  * Returns the position of that @option (starts counting with 1)
+- * or 0 on not found.
++ * or 0 on not found.  @option will only be found if it is found
++ * as an entire word in @cmdline.  For instance, if @option="car"
++ * then a cmdline which contains "cart" will not match.
+  */
+ int cmdline_find_option_bool(const char *cmdline, const char *option)
+ {
+ 	char c;
+-	int len, pos = 0, wstart = 0;
++	int pos = 0, wstart = 0;
+ 	const char *opptr = NULL;
+ 	enum {
+ 		st_wordstart = 0,	/* Start of word/after whitespace */
+@@ -37,11 +39,14 @@ int cmdline_find_option_bool(const char
+ 	if (!cmdline)
+ 		return -1;      /* No command line */
+ 
+-	len = min_t(int, strlen(cmdline), COMMAND_LINE_SIZE);
+-	if (!len)
++	if (!strlen(cmdline))
+ 		return 0;
+ 
+-	while (len--) {
++	/*
++	 * This 'pos' check ensures we do not overrun
++	 * a non-NULL-terminated 'cmdline'
++	 */
++	while (pos < COMMAND_LINE_SIZE) {
+ 		c = *(char *)cmdline++;
+ 		pos++;
+ 
+@@ -58,17 +63,26 @@ int cmdline_find_option_bool(const char
+ 			/* fall through */
+ 
+ 		case st_wordcmp:
+-			if (!*opptr)
++			if (!*opptr) {
++				/*
++				 * We matched all the way to the end of the
++				 * option we were looking for.  If the
++				 * command-line has a space _or_ ends, then
++				 * we matched!
++				 */
+ 				if (!c || myisspace(c))
+ 					return wstart;
+ 				else
+ 					state = st_wordskip;
+-			else if (!c)
++			} else if (!c) {
++				/*
++				 * Hit the NULL terminator on the end of
++				 * cmdline.
++				 */
+ 				return 0;
+-			else if (c != *opptr++)
++			} else if (c != *opptr++) {
+ 				state = st_wordskip;
+-			else if (!len)		/* last word and is matching */
+-				return wstart;
++			}
+ 			break;
+ 
+ 		case st_wordskip:
diff --git a/debian/patches/bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-partial-word-matches.patch b/debian/patches/bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-partial-word-matches.patch
new file mode 100644
index 0000000..a66f451
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-partial-word-matches.patch
@@ -0,0 +1,101 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Tue, 22 Dec 2015 14:52:39 -0800
+Subject: x86/boot: Fix early command-line parsing when partial  word matches
+
+commit abcdc1c694fa4055323cbec1cde4c2cb6b68398c upstream.
+
+cmdline_find_option_bool() keeps track of position in two strings:
+
+ 1. the command-line
+ 2. the option we are searchign for in the command-line
+
+We plow through each character in the command-line one at a time, always
+moving forward. We move forward in the option ('opptr') when we match
+characters in 'cmdline'. We reset the 'opptr' only when we go in to the
+'st_wordstart' state.
+
+But, if we fail to match an option because we see a space
+(state=st_wordcmp, *opptr='\0',c=' '), we set state='st_wordskip' and
+'break', moving to the next character. But, that move to the next
+character is the one *after* the ' '. This means that we will miss a
+'st_wordstart' state.
+
+For instance, if we have
+
+  cmdline = "foo fool";
+
+and are searching for "fool", we have:
+
+	  "fool"
+  opptr = ----^
+
+           "foo fool"
+   c = --------^
+
+We see that 'l' != ' ', set state=st_wordskip, break, and then move 'c', so:
+
+          "foo fool"
+  c = ---------^
+
+and are still in state=st_wordskip. We will stay in wordskip until we
+have skipped "fool", thus missing the option we were looking for. This
+*only* happens when you have a partially- matching word followed by a
+matching one.
+
+To fix this, we always fall *into* the 'st_wordskip' state when we set
+it.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: fenghua.yu at intel.com
+Cc: yu-cheng.yu at intel.com
+Link: http://lkml.kernel.org/r/20151222225239.8E1DCA58@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/lib/cmdline.c | 18 +++++++++++++-----
+ 1 file changed, 13 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -72,18 +72,26 @@ int cmdline_find_option_bool(const char
+ 				 */
+ 				if (!c || myisspace(c))
+ 					return wstart;
+-				else
+-					state = st_wordskip;
++				/*
++				 * We hit the end of the option, but _not_
++				 * the end of a word on the cmdline.  Not
++				 * a match.
++				 */
+ 			} else if (!c) {
+ 				/*
+ 				 * Hit the NULL terminator on the end of
+ 				 * cmdline.
+ 				 */
+ 				return 0;
+-			} else if (c != *opptr++) {
+-				state = st_wordskip;
++			} else if (c == *opptr++) {
++				/*
++				 * We are currently matching, so continue
++				 * to the next character on the cmdline.
++				 */
++				break;
+ 			}
+-			break;
++			state = st_wordskip;
++			/* fall through */
+ 
+ 		case st_wordskip:
+ 			if (!c)
diff --git a/debian/patches/bugfix/all/kpti/x86-boot-pass-in-size-to-early-cmdline-parsing.patch b/debian/patches/bugfix/all/kpti/x86-boot-pass-in-size-to-early-cmdline-parsing.patch
new file mode 100644
index 0000000..503163f
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-boot-pass-in-size-to-early-cmdline-parsing.patch
@@ -0,0 +1,60 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Tue, 22 Dec 2015 14:52:43 -0800
+Subject: x86/boot: Pass in size to early cmdline parsing
+
+commit 8c0517759a1a100a8b83134cf3c7f254774aaeba upstream.
+
+We will use this in a few patches to implement tests for early parsing.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+[ Aligned args properly. ]
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: fenghua.yu at intel.com
+Cc: yu-cheng.yu at intel.com
+Link: http://lkml.kernel.org/r/20151222225243.5CC47EB6@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/lib/cmdline.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -25,7 +25,9 @@ static inline int myisspace(u8 c)
+  * as an entire word in @cmdline.  For instance, if @option="car"
+  * then a cmdline which contains "cart" will not match.
+  */
+-int cmdline_find_option_bool(const char *cmdline, const char *option)
++static int
++__cmdline_find_option_bool(const char *cmdline, int max_cmdline_size,
++			   const char *option)
+ {
+ 	char c;
+ 	int pos = 0, wstart = 0;
+@@ -43,7 +45,7 @@ int cmdline_find_option_bool(const char
+ 	 * This 'pos' check ensures we do not overrun
+ 	 * a non-NULL-terminated 'cmdline'
+ 	 */
+-	while (pos < COMMAND_LINE_SIZE) {
++	while (pos < max_cmdline_size) {
+ 		c = *(char *)cmdline++;
+ 		pos++;
+ 
+@@ -101,3 +103,8 @@ int cmdline_find_option_bool(const char
+ 
+ 	return 0;	/* Buffer overrun */
+ }
++
++int cmdline_find_option_bool(const char *cmdline, const char *option)
++{
++	return __cmdline_find_option_bool(cmdline, COMMAND_LINE_SIZE, option);
++}
diff --git a/debian/patches/bugfix/all/kpti/x86-boot-simplify-early-command-line-parsing.patch b/debian/patches/bugfix/all/kpti/x86-boot-simplify-early-command-line-parsing.patch
new file mode 100644
index 0000000..3c10d8f
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-boot-simplify-early-command-line-parsing.patch
@@ -0,0 +1,52 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Tue, 22 Dec 2015 14:52:41 -0800
+Subject: x86/boot: Simplify early command line parsing
+
+commit 4de07ea481361b08fe13735004dafae862482d38 upstream.
+
+__cmdline_find_option_bool() tries to account for both NULL-terminated
+and non-NULL-terminated strings. It keeps 'pos' to look for the end of
+the buffer and also looks for '!c' in a bunch of places to look for NULL
+termination.
+
+But, it also calls strlen(). You can't call strlen on a
+non-NULL-terminated string.
+
+If !strlen(cmdline), then cmdline[0]=='\0'. In that case, we will go in
+to the while() loop, set c='\0', hit st_wordstart, notice !c, and will
+immediately return 0.
+
+So, remove the strlen().  It is unnecessary and unsafe.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: fenghua.yu at intel.com
+Cc: yu-cheng.yu at intel.com
+Link: http://lkml.kernel.org/r/20151222225241.15365E43@viggo.jf.intel.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/lib/cmdline.c | 3 ---
+ 1 file changed, 3 deletions(-)
+
+--- a/arch/x86/lib/cmdline.c
++++ b/arch/x86/lib/cmdline.c
+@@ -39,9 +39,6 @@ int cmdline_find_option_bool(const char
+ 	if (!cmdline)
+ 		return -1;      /* No command line */
+ 
+-	if (!strlen(cmdline))
+-		return 0;
+-
+ 	/*
+ 	 * This 'pos' check ensures we do not overrun
+ 	 * a non-NULL-terminated 'cmdline'
diff --git a/debian/patches/bugfix/all/kpti/x86-irq-do-not-substract-irq_tlb_count-from-irq_call_count.patch b/debian/patches/bugfix/all/kpti/x86-irq-do-not-substract-irq_tlb_count-from-irq_call_count.patch
new file mode 100644
index 0000000..f4f1cdb
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-irq-do-not-substract-irq_tlb_count-from-irq_call_count.patch
@@ -0,0 +1,103 @@
+From: Aaron Lu <aaron.lu at intel.com>
+Date: Thu, 11 Aug 2016 15:44:30 +0800
+Subject: x86/irq: Do not substract irq_tlb_count from  irq_call_count
+
+commit 82ba4faca1bffad429f15c90c980ffd010366c25 upstream.
+
+Since commit:
+
+  52aec3308db8 ("x86/tlb: replace INVALIDATE_TLB_VECTOR by CALL_FUNCTION_VECTOR")
+
+the TLB remote shootdown is done through call function vector. That
+commit didn't take care of irq_tlb_count, which a later commit:
+
+  fd0f5869724f ("x86: Distinguish TLB shootdown interrupts from other functions call interrupts")
+
+... tried to fix.
+
+The fix assumes every increase of irq_tlb_count has a corresponding
+increase of irq_call_count. So the irq_call_count is always bigger than
+irq_tlb_count and we could substract irq_tlb_count from irq_call_count.
+
+Unfortunately this is not true for the smp_call_function_single() case.
+The IPI is only sent if the target CPU's call_single_queue is empty when
+adding a csd into it in generic_exec_single. That means if two threads
+are both adding flush tlb csds to the same CPU's call_single_queue, only
+one IPI is sent. In other words, the irq_call_count is incremented by 1
+but irq_tlb_count is incremented by 2. Over time, irq_tlb_count will be
+bigger than irq_call_count and the substract will produce a very large
+irq_call_count value due to overflow.
+
+Considering that:
+
+  1) it's not worth to send more IPIs for the sake of accurate counting of
+     irq_call_count in generic_exec_single();
+
+  2) it's not easy to tell if the call function interrupt is for TLB
+     shootdown in __smp_call_function_single_interrupt().
+
+Not to exclude TLB shootdown from call function count seems to be the
+simplest fix and this patch just does that.
+
+This bug was found by LKP's cyclic performance regression tracking recently
+with the vm-scalability test suite. I have bisected to commit:
+
+  3dec0ba0be6a ("mm/rmap: share the i_mmap_rwsem")
+
+This commit didn't do anything wrong but revealed the irq_call_count
+problem. IIUC, the commit makes rwc->remap_one in rmap_walk_file
+concurrent with multiple threads.  When remap_one is try_to_unmap_one(),
+then multiple threads could queue flush TLB to the same CPU but only
+one IPI will be sent.
+
+Since the commit was added in Linux v3.19, the counting problem only
+shows up from v3.19 onwards.
+
+Signed-off-by: Aaron Lu <aaron.lu at intel.com>
+Cc: Alex Shi <alex.shi at linaro.org>
+Cc: Andy Lutomirski <luto at kernel.org>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Davidlohr Bueso <dave at stgolabs.net>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Huang Ying <ying.huang at intel.com>
+Cc: Josh Poimboeuf <jpoimboe at redhat.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: Tomoki Sekiyama <tomoki.sekiyama.qu at hitachi.com>
+Link: http://lkml.kernel.org/r/20160811074430.GA18163@aaronlu.sh.intel.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/hardirq.h | 4 ----
+ arch/x86/kernel/irq.c          | 3 +--
+ 2 files changed, 1 insertion(+), 6 deletions(-)
+
+--- a/arch/x86/include/asm/hardirq.h
++++ b/arch/x86/include/asm/hardirq.h
+@@ -21,10 +21,6 @@ typedef struct {
+ #ifdef CONFIG_SMP
+ 	unsigned int irq_resched_count;
+ 	unsigned int irq_call_count;
+-	/*
+-	 * irq_tlb_count is double-counted in irq_call_count, so it must be
+-	 * subtracted from irq_call_count when displaying irq_call_count
+-	 */
+ 	unsigned int irq_tlb_count;
+ #endif
+ #ifdef CONFIG_X86_THERMAL_VECTOR
+--- a/arch/x86/kernel/irq.c
++++ b/arch/x86/kernel/irq.c
+@@ -96,8 +96,7 @@ int arch_show_interrupts(struct seq_file
+ 	seq_printf(p, "  Rescheduling interrupts\n");
+ 	seq_printf(p, "%*s: ", prec, "CAL");
+ 	for_each_online_cpu(j)
+-		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count -
+-					irq_stats(j)->irq_tlb_count);
++		seq_printf(p, "%10u ", irq_stats(j)->irq_call_count);
+ 	seq_printf(p, "  Function call interrupts\n");
+ 	seq_printf(p, "%*s: ", prec, "TLB");
+ 	for_each_online_cpu(j)
diff --git a/debian/patches/bugfix/all/kpti/x86-kaiser-check-boottime-cmdline-params.patch b/debian/patches/bugfix/all/kpti/x86-kaiser-check-boottime-cmdline-params.patch
new file mode 100644
index 0000000..5a0580f
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-kaiser-check-boottime-cmdline-params.patch
@@ -0,0 +1,121 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Tue, 2 Jan 2018 14:19:48 +0100
+Subject: x86/kaiser: Check boottime cmdline params
+
+AMD (and possibly other vendors) are not affected by the leak
+KAISER is protecting against.
+
+Keep the "nopti" for traditional reasons and add pti=<on|off|auto>
+like upstream.
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ Documentation/kernel-parameters.txt |  6 ++++
+ arch/x86/mm/kaiser.c                | 59 ++++++++++++++++++++++++++-----------
+ 2 files changed, 47 insertions(+), 18 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2752,6 +2752,12 @@ bytes respectively. Such letter suffixes
+ 	pt.		[PARIDE]
+ 			See Documentation/blockdev/paride.txt.
+ 
++	pti=		[X86_64]
++			Control KAISER user/kernel address space isolation:
++			on - enable
++			off - disable
++			auto - default setting
++
+ 	pty.legacy_count=
+ 			[KNL] Number of legacy pty's. Overwrites compiled-in
+ 			default number.
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -19,6 +19,7 @@ extern struct mm_struct init_mm;
+ #include <asm/pgalloc.h>
+ #include <asm/desc.h>
+ #include <asm/vsyscall.h>
++#include <asm/cmdline.h>
+ 
+ int kaiser_enabled __read_mostly = 1;
+ EXPORT_SYMBOL(kaiser_enabled);	/* for inlined TLB flush functions */
+@@ -267,6 +268,43 @@ static void __init kaiser_init_all_pgds(
+ 	WARN_ON(__ret);							\
+ } while (0)
+ 
++void __init kaiser_check_boottime_disable(void)
++{
++	bool enable = true;
++	char arg[5];
++	int ret;
++
++	ret = cmdline_find_option(boot_command_line, "pti", arg, sizeof(arg));
++	if (ret > 0) {
++		if (!strncmp(arg, "on", 2))
++			goto enable;
++
++		if (!strncmp(arg, "off", 3))
++			goto disable;
++
++		if (!strncmp(arg, "auto", 4))
++			goto skip;
++	}
++
++	if (cmdline_find_option_bool(boot_command_line, "nopti"))
++		goto disable;
++
++skip:
++	if (boot_cpu_data.x86_vendor == X86_VENDOR_AMD)
++		goto disable;
++
++enable:
++	if (enable)
++		setup_force_cpu_cap(X86_FEATURE_KAISER);
++
++	return;
++
++disable:
++	pr_info("Kernel/User page tables isolation: disabled\n");
++	kaiser_enabled = 0;
++	setup_clear_cpu_cap(X86_FEATURE_KAISER);
++}
++
+ /*
+  * If anything in here fails, we will likely die on one of the
+  * first kernel->user transitions and init will die.  But, we
+@@ -278,12 +316,10 @@ void __init kaiser_init(void)
+ {
+ 	int cpu, idx;
+ 
+-	if (!kaiser_enabled) {
+-		setup_clear_cpu_cap(X86_FEATURE_KAISER);
+-		return;
+-	}
++	kaiser_check_boottime_disable();
+ 
+-	setup_force_cpu_cap(X86_FEATURE_KAISER);
++	if (!kaiser_enabled)
++		return;
+ 
+ 	kaiser_init_all_pgds();
+ 
+@@ -431,16 +467,3 @@ void kaiser_flush_tlb_on_return_to_user(
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+-	/* nopti doesn't accept parameters */
+-	if (s)
+-		return -EINVAL;
+-
+-	kaiser_enabled = 0;
+-	pr_info("Kernel/User page tables isolation: disabled\n");
+-
+-	return 0;
+-}
+-early_param("nopti", x86_nokaiser_setup);
diff --git a/debian/patches/bugfix/all/kpti/x86-kaiser-move-feature-detection-up.patch b/debian/patches/bugfix/all/kpti/x86-kaiser-move-feature-detection-up.patch
new file mode 100644
index 0000000..7428f10
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-kaiser-move-feature-detection-up.patch
@@ -0,0 +1,77 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Mon, 25 Dec 2017 13:57:16 +0100
+Subject: x86/kaiser: Move feature detection up
+
+... before the first use of kaiser_enabled as otherwise funky
+things happen:
+
+  about to get started...
+  (XEN) d0v0 Unhandled page fault fault/trap [#14, ec=0000]
+  (XEN) Pagetable walk from ffff88022a449090:
+  (XEN)  L4[0x110] = 0000000229e0e067 0000000000001e0e
+  (XEN)  L3[0x008] = 0000000000000000 ffffffffffffffff
+  (XEN) domain_crash_sync called from entry.S: fault at ffff82d08033fd08
+  entry.o#create_bounce_frame+0x135/0x14d
+  (XEN) Domain 0 (vcpu#0) crashed on cpu#0:
+  (XEN) ----[ Xen-4.9.1_02-3.21  x86_64  debug=n   Not tainted ]----
+  (XEN) CPU:    0
+  (XEN) RIP:    e033:[<ffffffff81007460>]
+  (XEN) RFLAGS: 0000000000000286   EM: 1   CONTEXT: pv guest (d0v0)
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/kaiser.h | 2 ++
+ arch/x86/kernel/setup.c       | 7 +++++++
+ arch/x86/mm/kaiser.c          | 2 --
+ 3 files changed, 9 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/kaiser.h
++++ b/arch/x86/include/asm/kaiser.h
+@@ -96,8 +96,10 @@ DECLARE_PER_CPU(unsigned long, x86_cr3_p
+ extern char __per_cpu_user_mapped_start[], __per_cpu_user_mapped_end[];
+ 
+ extern int kaiser_enabled;
++extern void __init kaiser_check_boottime_disable(void);
+ #else
+ #define kaiser_enabled	0
++static inline void __init kaiser_check_boottime_disable(void) {}
+ #endif /* CONFIG_KAISER */
+ 
+ /*
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -110,6 +110,7 @@
+ #include <asm/mce.h>
+ #include <asm/alternative.h>
+ #include <asm/prom.h>
++#include <asm/kaiser.h>
+ 
+ /*
+  * max_low_pfn_mapped: highest direct mapped pfn under 4GB
+@@ -1019,6 +1020,12 @@ void __init setup_arch(char **cmdline_p)
+ 	 */
+ 	init_hypervisor_platform();
+ 
++	/*
++	 * This needs to happen right after XENPV is set on xen and
++	 * kaiser_enabled is checked below in cleanup_highmap().
++	 */
++	kaiser_check_boottime_disable();
++
+ 	x86_init.resources.probe_roms();
+ 
+ 	/* after parse_early_param, so could debug it */
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -321,8 +321,6 @@ void __init kaiser_init(void)
+ {
+ 	int cpu, idx;
+ 
+-	kaiser_check_boottime_disable();
+-
+ 	if (!kaiser_enabled)
+ 		return;
+ 
diff --git a/debian/patches/bugfix/all/kpti/x86-kaiser-reenable-paravirt.patch b/debian/patches/bugfix/all/kpti/x86-kaiser-reenable-paravirt.patch
new file mode 100644
index 0000000..511cdb7
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-kaiser-reenable-paravirt.patch
@@ -0,0 +1,26 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Tue, 2 Jan 2018 14:19:49 +0100
+Subject: x86/kaiser: Reenable PARAVIRT
+
+Now that the required bits have been addressed, reenable
+PARAVIRT.
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ security/Kconfig | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/security/Kconfig
++++ b/security/Kconfig
+@@ -33,7 +33,7 @@ config SECURITY
+ config KAISER
+ 	bool "Remove the kernel mapping in user mode"
+ 	default y
+-	depends on X86_64 && SMP && !PARAVIRT
++	depends on X86_64 && SMP
+ 	help
+ 	  This enforces a strict kernel and user space isolation, in order
+ 	  to close hardware side channels on kernel address information.
diff --git a/debian/patches/bugfix/all/kpti/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch b/debian/patches/bugfix/all/kpti/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch
new file mode 100644
index 0000000..a242b7f
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch
@@ -0,0 +1,95 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Tue, 2 Jan 2018 14:19:48 +0100
+Subject: x86/kaiser: Rename and simplify X86_FEATURE_KAISER  handling
+
+Concentrate it in arch/x86/mm/kaiser.c and use the upstream string "nopti".
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Signed-off-by: Greg Kroah-Hartman <gregkh at linuxfoundation.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ Documentation/kernel-parameters.txt |  2 +-
+ arch/x86/kernel/cpu/common.c        | 18 ------------------
+ arch/x86/mm/kaiser.c                | 20 +++++++++++++++++++-
+ 3 files changed, 20 insertions(+), 20 deletions(-)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2229,7 +2229,7 @@ bytes respectively. Such letter suffixes
+ 
+ 	nojitter	[IA-64] Disables jitter checking for ITC timers.
+ 
+-	nokaiser	[X86-64] Disable KAISER isolation of kernel from user.
++	nopti		[X86-64] Disable KAISER isolation of kernel from user.
+ 
+ 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
+ 
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -179,20 +179,6 @@ static int __init x86_pcid_setup(char *s
+ 	return 1;
+ }
+ __setup("nopcid", x86_pcid_setup);
+-
+-static int __init x86_nokaiser_setup(char *s)
+-{
+-	/* nokaiser doesn't accept parameters */
+-	if (s)
+-		return -EINVAL;
+-#ifdef CONFIG_KAISER
+-	kaiser_enabled = 0;
+-	setup_clear_cpu_cap(X86_FEATURE_KAISER);
+-	pr_info("nokaiser: KAISER feature disabled\n");
+-#endif
+-	return 0;
+-}
+-early_param("nokaiser", x86_nokaiser_setup);
+ #endif
+ 
+ static int __init x86_noinvpcid_setup(char *s)
+@@ -733,10 +719,6 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
+ 		c->x86_power = cpuid_edx(0x80000007);
+ 
+ 	init_scattered_cpuid_features(c);
+-#ifdef CONFIG_KAISER
+-	if (kaiser_enabled)
+-		set_cpu_cap(c, X86_FEATURE_KAISER);
+-#endif
+ }
+ 
+ static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c)
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -278,8 +278,13 @@ void __init kaiser_init(void)
+ {
+ 	int cpu, idx;
+ 
+-	if (!kaiser_enabled)
++	if (!kaiser_enabled) {
++		setup_clear_cpu_cap(X86_FEATURE_KAISER);
+ 		return;
++	}
++
++	setup_force_cpu_cap(X86_FEATURE_KAISER);
++
+ 	kaiser_init_all_pgds();
+ 
+ 	for_each_possible_cpu(cpu) {
+@@ -426,3 +431,16 @@ void kaiser_flush_tlb_on_return_to_user(
+ 			X86_CR3_PCID_USER_FLUSH | KAISER_SHADOW_PGD_OFFSET);
+ }
+ EXPORT_SYMBOL(kaiser_flush_tlb_on_return_to_user);
++
++static int __init x86_nokaiser_setup(char *s)
++{
++	/* nopti doesn't accept parameters */
++	if (s)
++		return -EINVAL;
++
++	kaiser_enabled = 0;
++	pr_info("Kernel/User page tables isolation: disabled\n");
++
++	return 0;
++}
++early_param("nopti", x86_nokaiser_setup);
diff --git a/debian/patches/bugfix/all/kpti/x86-kvmclock-disable-use-from-vdso-if-kpti-is-enabled.patch b/debian/patches/bugfix/all/kpti/x86-kvmclock-disable-use-from-vdso-if-kpti-is-enabled.patch
new file mode 100644
index 0000000..d631f9e
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-kvmclock-disable-use-from-vdso-if-kpti-is-enabled.patch
@@ -0,0 +1,45 @@
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Fri, 5 Jan 2018 03:09:26 +0000
+Subject: x86: kvmclock: Disable use from vDSO if KPTI is enabled
+
+Currently the pvclock pages aren't being added to user-space page
+tables, and my attempt to fix this didn't work.
+
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/kvmclock.c | 5 +++++
+ arch/x86/mm/kaiser.c       | 2 +-
+ 2 files changed, 6 insertions(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/kvmclock.c
++++ b/arch/x86/kernel/kvmclock.c
+@@ -24,6 +24,7 @@
+ #include <linux/percpu.h>
+ #include <linux/hardirq.h>
+ #include <linux/memblock.h>
++#include <linux/kaiser.h>
+ 
+ #include <asm/x86_init.h>
+ #include <asm/reboot.h>
+@@ -281,6 +282,10 @@ int __init kvm_setup_vsyscall_timeinfo(v
+ 	if (!hv_clock)
+ 		return 0;
+ 
++	/* FIXME: Need to add pvclock pages to user-space page tables */
++	if (kaiser_enabled)
++		return 0;
++
+ 	size = PAGE_ALIGN(sizeof(struct pvclock_vsyscall_time_info)*NR_CPUS);
+ 
+ 	preempt_disable();
+--- a/arch/x86/mm/kaiser.c
++++ b/arch/x86/mm/kaiser.c
+@@ -265,7 +265,7 @@ static void __init kaiser_init_all_pgds(
+  */
+ void __init kaiser_init(void)
+ {
+-	int cpu;
++	int cpu, idx;
+ 
+ 	kaiser_init_all_pgds();
+ 
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch b/debian/patches/bugfix/all/kpti/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
new file mode 100644
index 0000000..7c8da71
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
@@ -0,0 +1,40 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Sun, 8 Oct 2017 21:53:05 -0700
+Subject: x86/mm/64: Fix reboot interaction with CR4.PCIDE
+
+commit 924c6b900cfdf376b07bccfd80e62b21914f8a5a upstream.
+
+Trying to reboot via real mode fails with PCID on: long mode cannot
+be exited while CR4.PCIDE is set.  (No, I have no idea why, but the
+SDM and actual CPUs are in agreement here.)  The result is a GPF and
+a hang instead of a reboot.
+
+I didn't catch this in testing because neither my computer nor my VM
+reboots this way.  I can trigger it with reboot=bios, though.
+
+Fixes: 660da7c9228f ("x86/mm: Enable CR4.PCIDE on supported systems")
+Reported-and-tested-by: Steven Rostedt (VMware) <rostedt at goodmis.org>
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
+Cc: Borislav Petkov <bp at alien8.de>
+Link: https://lkml.kernel.org/r/f1e7d965998018450a7a70c2823873686a8b21c0.1507524746.git.luto@kernel.org
+Cc: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16: use clear_in_cr4()]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/reboot.c | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -92,6 +92,10 @@ void __noreturn machine_real_restart(uns
+ 	load_cr3(initial_page_table);
+ #else
+ 	write_cr3(real_mode_header->trampoline_pgd);
++
++	/* Exiting long mode will fail if CR4.PCIDE is set. */
++	if (static_cpu_has(X86_FEATURE_PCID))
++		clear_in_cr4(X86_CR4_PCIDE);
+ #endif
+ 
+ 	/* Jump to the identity-mapped low memory code */
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch b/debian/patches/bugfix/all/kpti/x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch
new file mode 100644
index 0000000..855303a
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch
@@ -0,0 +1,72 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Fri, 29 Jan 2016 11:42:58 -0800
+Subject: x86/mm: Add a 'noinvpcid' boot option to turn off  INVPCID
+
+commit d12a72b844a49d4162f24cefdab30bed3f86730e upstream.
+
+This adds a chicken bit to turn off INVPCID in case something goes
+wrong.  It's an early_param() because we do TLB flushes before we
+parse __setup() parameters.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Dave Hansen <dave.hansen at linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof at suse.com>
+Cc: Oleg Nesterov <oleg at redhat.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: Toshi Kani <toshi.kani at hp.com>
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/f586317ed1bc2b87aee652267e515b90051af385.1454096309.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ Documentation/kernel-parameters.txt |  2 ++
+ arch/x86/kernel/cpu/common.c        | 16 ++++++++++++++++
+ 2 files changed, 18 insertions(+)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2225,6 +2225,8 @@ bytes respectively. Such letter suffixes
+ 
+ 	nointroute	[IA-64]
+ 
++	noinvpcid	[X86] Disable the INVPCID cpu feature.
++
+ 	nojitter	[IA-64] Disables jitter checking for ITC timers.
+ 
+ 	no-kvmclock	[X86,KVM] Disable paravirtualized KVM clock driver
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -163,6 +163,22 @@ static int __init x86_xsaveopt_setup(cha
+ }
+ __setup("noxsaveopt", x86_xsaveopt_setup);
+ 
++static int __init x86_noinvpcid_setup(char *s)
++{
++	/* noinvpcid doesn't accept parameters */
++	if (s)
++		return -EINVAL;
++
++	/* do not emit a message if the feature is not present */
++	if (!boot_cpu_has(X86_FEATURE_INVPCID))
++		return 0;
++
++	setup_clear_cpu_cap(X86_FEATURE_INVPCID);
++	pr_info("noinvpcid: INVPCID feature disabled\n");
++	return 0;
++}
++early_param("noinvpcid", x86_noinvpcid_setup);
++
+ #ifdef CONFIG_X86_32
+ static int cachesize_override = -1;
+ static int disable_x86_serial_nr = 1;
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-add-invpcid-helpers.patch b/debian/patches/bugfix/all/kpti/x86-mm-add-invpcid-helpers.patch
new file mode 100644
index 0000000..808e596
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-add-invpcid-helpers.patch
@@ -0,0 +1,91 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Fri, 29 Jan 2016 11:42:57 -0800
+Subject: x86/mm: Add INVPCID helpers
+
+commit 060a402a1ddb551455ee410de2eadd3349f2801b upstream.
+
+This adds helpers for each of the four currently-specified INVPCID
+modes.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Dave Hansen <dave.hansen at linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof at suse.com>
+Cc: Oleg Nesterov <oleg at redhat.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: Toshi Kani <toshi.kani at hp.com>
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/8a62b23ad686888cee01da134c91409e22064db9.1454096309.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h | 48 +++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 48 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -7,6 +7,54 @@
+ #include <asm/processor.h>
+ #include <asm/special_insns.h>
+ 
++static inline void __invpcid(unsigned long pcid, unsigned long addr,
++			     unsigned long type)
++{
++	u64 desc[2] = { pcid, addr };
++
++	/*
++	 * The memory clobber is because the whole point is to invalidate
++	 * stale TLB entries and, especially if we're flushing global
++	 * mappings, we don't want the compiler to reorder any subsequent
++	 * memory accesses before the TLB flush.
++	 *
++	 * The hex opcode is invpcid (%ecx), %eax in 32-bit mode and
++	 * invpcid (%rcx), %rax in long mode.
++	 */
++	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
++		      : : "m" (desc), "a" (type), "c" (desc) : "memory");
++}
++
++#define INVPCID_TYPE_INDIV_ADDR		0
++#define INVPCID_TYPE_SINGLE_CTXT	1
++#define INVPCID_TYPE_ALL_INCL_GLOBAL	2
++#define INVPCID_TYPE_ALL_NON_GLOBAL	3
++
++/* Flush all mappings for a given pcid and addr, not including globals. */
++static inline void invpcid_flush_one(unsigned long pcid,
++				     unsigned long addr)
++{
++	__invpcid(pcid, addr, INVPCID_TYPE_INDIV_ADDR);
++}
++
++/* Flush all mappings for a given PCID, not including globals. */
++static inline void invpcid_flush_single_context(unsigned long pcid)
++{
++	__invpcid(pcid, 0, INVPCID_TYPE_SINGLE_CTXT);
++}
++
++/* Flush all mappings, including globals, for all PCIDs. */
++static inline void invpcid_flush_all(void)
++{
++	__invpcid(0, 0, INVPCID_TYPE_ALL_INCL_GLOBAL);
++}
++
++/* Flush all mappings for all PCIDs except globals. */
++static inline void invpcid_flush_all_nonglobals(void)
++{
++	__invpcid(0, 0, INVPCID_TYPE_ALL_NON_GLOBAL);
++}
++
+ #ifdef CONFIG_PARAVIRT
+ #include <asm/paravirt.h>
+ #else
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch b/debian/patches/bugfix/all/kpti/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
new file mode 100644
index 0000000..f8f0c54
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
@@ -0,0 +1,71 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Thu, 29 Jun 2017 08:53:20 -0700
+Subject: x86/mm: Add the 'nopcid' boot option to turn off PCID
+
+commit 0790c9aad84901ca1bdc14746175549c8b5da215 upstream.
+The parameter is only present on x86_64 systems to save a few bytes,
+as PCID is always disabled on x86_32.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit at gmail.com>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Reviewed-by: Thomas Gleixner <tglx at linutronix.de>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Arjan van de Ven <arjan at linux.intel.com>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Mel Gorman <mgorman at suse.de>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/8bbb2e65bcd249a5f18bfb8128b4689f08ac2b60.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+[Hugh Dickins: Backported to 3.18:
+ - Documentation/admin-guide/kernel-parameters.txt (not in this tree)
+ - Documentation/kernel-parameters.txt (patched instead of that)
+Signed-off-by: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ Documentation/kernel-parameters.txt |  2 ++
+ arch/x86/kernel/cpu/common.c        | 18 ++++++++++++++++++
+ 2 files changed, 20 insertions(+)
+
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -2261,6 +2261,8 @@ bytes respectively. Such letter suffixes
+ 	nopat		[X86] Disable PAT (page attribute table extension of
+ 			pagetables) support.
+ 
++	nopcid		[X86-64] Disable the PCID cpu feature.
++
+ 	norandmaps	Don't use address space randomization.  Equivalent to
+ 			echo 0 > /proc/sys/kernel/randomize_va_space
+ 
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -163,6 +163,24 @@ static int __init x86_xsaveopt_setup(cha
+ }
+ __setup("noxsaveopt", x86_xsaveopt_setup);
+ 
++#ifdef CONFIG_X86_64
++static int __init x86_pcid_setup(char *s)
++{
++	/* require an exact match without trailing characters */
++	if (strlen(s))
++		return 0;
++
++	/* do not emit a message if the feature is not present */
++	if (!boot_cpu_has(X86_FEATURE_PCID))
++		return 1;
++
++	setup_clear_cpu_cap(X86_FEATURE_PCID);
++	pr_info("nopcid: PCID feature disabled\n");
++	return 1;
++}
++__setup("nopcid", x86_pcid_setup);
++#endif
++
+ static int __init x86_noinvpcid_setup(char *s)
+ {
+ 	/* noinvpcid doesn't accept parameters */
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-build-arch-x86-mm-tlb.c-even-on-smp.patch b/debian/patches/bugfix/all/kpti/x86-mm-build-arch-x86-mm-tlb.c-even-on-smp.patch
new file mode 100644
index 0000000..212c2d8
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-build-arch-x86-mm-tlb.c-even-on-smp.patch
@@ -0,0 +1,63 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Tue, 26 Apr 2016 09:39:07 -0700
+Subject: x86/mm: Build arch/x86/mm/tlb.c even on !SMP
+
+commit e1074888c326038340a1ada9129d679e661f2ea6 upstream.
+
+Currently all of the functions that live in tlb.c are inlined on
+!SMP builds.  One can debate whether this is a good idea (in many
+respects the code in tlb.c is better than the inlined UP code).
+
+Regardless, I want to add code that needs to be built on UP and SMP
+kernels and relates to tlb flushing, so arrange for tlb.c to be
+compiled unconditionally.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/f0d778f0d828fc46e5d1946bca80f0aaf9abf032.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/Makefile | 3 +--
+ arch/x86/mm/tlb.c    | 4 ++++
+ 2 files changed, 5 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/Makefile
++++ b/arch/x86/mm/Makefile
+@@ -1,5 +1,5 @@
+ obj-y	:=  init.o init_$(BITS).o fault.o ioremap.o extable.o pageattr.o mmap.o \
+-	    pat.o pgtable.o physaddr.o gup.o setup_nx.o
++	    pat.o pgtable.o physaddr.o gup.o setup_nx.o tlb.o
+ 
+ # Make sure __phys_addr has no stackprotector
+ nostackp := $(call cc-option, -fno-stack-protector)
+@@ -9,7 +9,6 @@ CFLAGS_setup_nx.o		:= $(nostackp)
+ CFLAGS_fault.o := -I$(src)/../include/asm/trace
+ 
+ obj-$(CONFIG_X86_PAT)		+= pat_rbtree.o
+-obj-$(CONFIG_SMP)		+= tlb.o
+ 
+ obj-$(CONFIG_X86_32)		+= pgtable_32.o iomap_32.o
+ 
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -31,6 +31,8 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb
+  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
++#ifdef CONFIG_SMP
++
+ struct flush_tlb_info {
+ 	struct mm_struct *flush_mm;
+ 	unsigned long flush_start;
+@@ -330,3 +332,5 @@ static int __init create_tlb_single_page
+ 	return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
++
++#endif /* CONFIG_SMP */
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-clean-up-the-tlb-flushing-code.patch b/debian/patches/bugfix/all/kpti/x86-mm-clean-up-the-tlb-flushing-code.patch
new file mode 100644
index 0000000..934a97a
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-clean-up-the-tlb-flushing-code.patch
@@ -0,0 +1,93 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Thu, 31 Jul 2014 08:40:54 -0700
+Subject: x86/mm: Clean up the TLB flushing code
+
+commit 4995ab9cf512e9a6cc07dfd6b1d4e2fc48ce7fef upstream.
+
+The
+
+	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+
+line of code is not exactly the easiest to audit, especially when
+it ends up at two different indentation levels.  This eliminates
+one of the the copy-n-paste versions.  It also gives us a unified
+exit point for each path through this function.  We need this in
+a minute for our tracepoint.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Link: http://lkml.kernel.org/r/20140731154054.44F1CDDC@viggo.jf.intel.com
+Acked-by: Rik van Riel <riel at redhat.com>
+Acked-by: Mel Gorman <mgorman at suse.de>
+Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
+Cc: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/tlb.c | 23 +++++++++++------------
+ 1 file changed, 11 insertions(+), 12 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -162,6 +162,7 @@ void flush_tlb_current_task(void)
+ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag)
+ {
++	bool need_flush_others_all = true;
+ 	unsigned long addr;
+ 	unsigned act_entries, tlb_entries = 0;
+ 	unsigned long nr_base_pages;
+@@ -171,7 +172,7 @@ void flush_tlb_mm_range(struct mm_struct
+ 		/* Synchronize with switch_mm. */
+ 		smp_mb();
+ 
+-		goto flush_all;
++		goto out;
+ 	}
+ 
+ 	if (!current->mm) {
+@@ -180,13 +181,13 @@ void flush_tlb_mm_range(struct mm_struct
+ 		/* Synchronize with switch_mm. */
+ 		smp_mb();
+ 
+-		goto flush_all;
++		goto out;
+ 	}
+ 
+ 	if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1
+ 					|| vmflag & VM_HUGETLB) {
+ 		local_flush_tlb();
+-		goto flush_all;
++		goto out;
+ 	}
+ 
+ 	/* In modern CPU, last level tlb used for both data/ins */
+@@ -205,22 +206,20 @@ void flush_tlb_mm_range(struct mm_struct
+ 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ 		local_flush_tlb();
+ 	} else {
++		need_flush_others_all = false;
+ 		/* flush range by one by one 'invlpg' */
+ 		for (addr = start; addr < end;	addr += PAGE_SIZE) {
+ 			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+ 			__flush_tlb_single(addr);
+ 		}
+-
+-		if (cpumask_any_but(mm_cpumask(mm),
+-				smp_processor_id()) < nr_cpu_ids)
+-			flush_tlb_others(mm_cpumask(mm), mm, start, end);
+-		preempt_enable();
+-		return;
+ 	}
+-
+-flush_all:
++out:
++	if (need_flush_others_all) {
++		start = 0UL;
++		end = TLB_FLUSH_ALL;
++	}
+ 	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
++		flush_tlb_others(mm_cpumask(mm), mm, start, end);
+ 	preempt_enable();
+ }
+ 
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-disable-pcid-on-32-bit-kernels.patch b/debian/patches/bugfix/all/kpti/x86-mm-disable-pcid-on-32-bit-kernels.patch
new file mode 100644
index 0000000..f078b3c
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-disable-pcid-on-32-bit-kernels.patch
@@ -0,0 +1,63 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Thu, 29 Jun 2017 08:53:19 -0700
+Subject: x86/mm: Disable PCID on 32-bit kernels
+
+commit cba4671af7550e008f7a7835f06df0763825bf3e upstream.
+
+32-bit kernels on new hardware will see PCID in CPUID, but PCID can
+only be used in 64-bit mode.  Rather than making all PCID code
+conditional, just disable the feature on 32-bit builds.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit at gmail.com>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Reviewed-by: Thomas Gleixner <tglx at linutronix.de>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Arjan van de Ven <arjan at linux.intel.com>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Mel Gorman <mgorman at suse.de>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/2e391769192a4d31b808410c383c6bf0734bc6ea.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Signed-off-by: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/cpu/bugs.c   | 8 ++++++++
+ arch/x86/kernel/cpu/common.c | 5 +++++
+ 2 files changed, 13 insertions(+)
+
+--- a/arch/x86/kernel/cpu/bugs.c
++++ b/arch/x86/kernel/cpu/bugs.c
+@@ -65,6 +65,14 @@ static void __init check_fpu(void)
+ 
+ void __init check_bugs(void)
+ {
++#ifdef CONFIG_X86_32
++	/*
++	 * Regardless of whether PCID is enumerated, the SDM says
++	 * that it can't be enabled in 32-bit mode.
++	 */
++	setup_clear_cpu_cap(X86_FEATURE_PCID);
++#endif
++
+ 	identify_boot_cpu();
+ #ifndef CONFIG_SMP
+ 	pr_info("CPU: ");
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -1015,6 +1015,11 @@ void identify_secondary_cpu(struct cpuin
+ 	BUG_ON(c == &boot_cpu_data);
+ 	identify_cpu(c);
+ #ifdef CONFIG_X86_32
++	/*
++	 * Regardless of whether PCID is enumerated, the SDM says
++	 * that it can't be enabled in 32-bit mode.
++	 */
++	clear_cpu_cap(c, X86_FEATURE_PCID);
+ 	enable_sep_cpu();
+ #endif
+ 	mtrr_ap_init();
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-enable-cr4.pcide-on-supported-systems.patch b/debian/patches/bugfix/all/kpti/x86-mm-enable-cr4.pcide-on-supported-systems.patch
new file mode 100644
index 0000000..39a7067
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-enable-cr4.pcide-on-supported-systems.patch
@@ -0,0 +1,109 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Thu, 29 Jun 2017 08:53:21 -0700
+Subject: x86/mm: Enable CR4.PCIDE on supported systems
+
+commit 660da7c9228f685b2ebe664f9fd69aaddcc420b5 upstream.
+
+We can use PCID if the CPU has PCID and PGE and we're not on Xen.
+
+By itself, this has no effect. A followup patch will start using PCID.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Nadav Amit <nadav.amit at gmail.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Reviewed-by: Thomas Gleixner <tglx at linutronix.de>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Arjan van de Ven <arjan at linux.intel.com>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Juergen Gross <jgross at suse.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Mel Gorman <mgorman at suse.de>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/6327ecd907b32f79d5aa0d466f04503bbec5df88.1498751203.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+[Hugh Dickins: Backported to 3.18:
+ - arch/x86/xen/enlighten_pv.c (not in this tree)
+ - arch/x86/xen/enlighten.c (patched instead of that)]
+Signed-off-by: Hugh Dickins <hughd at google.com>
+[Borislav Petkov: Fix bad backport to disable PCID on Xen]
+[bwh: Backported to 3.16: use set_in_cr4()]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h |  8 ++++++++
+ arch/x86/kernel/cpu/common.c    | 22 ++++++++++++++++++++++
+ arch/x86/xen/enlighten.c        |  6 ++++++
+ 3 files changed, 36 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -123,6 +123,14 @@ static inline void __flush_tlb_all(void)
+ 		__flush_tlb_global();
+ 	else
+ 		__flush_tlb();
++
++	/*
++	 * Note: if we somehow had PCID but not PGE, then this wouldn't work --
++	 * we'd end up flushing kernel translations for the current ASID but
++	 * we might fail to flush kernel translations for other cached ASIDs.
++	 *
++	 * To avoid this issue, we force PCID off if PGE is off.
++	 */
+ }
+ 
+ static inline void __flush_tlb_one(unsigned long addr)
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -330,6 +330,25 @@ static __always_inline void setup_smap(s
+ 	}
+ }
+ 
++static void setup_pcid(struct cpuinfo_x86 *c)
++{
++	if (cpu_has(c, X86_FEATURE_PCID)) {
++		if (cpu_has(c, X86_FEATURE_PGE)) {
++			set_in_cr4(X86_CR4_PCIDE);
++		} else {
++			/*
++			 * flush_tlb_all(), as currently implemented, won't
++			 * work if PCID is on but PGE is not.  Since that
++			 * combination doesn't exist on real hardware, there's
++			 * no reason to try to fully support it, but it's
++			 * polite to avoid corrupting data if we're on
++			 * an improperly configured VM.
++			 */
++			clear_cpu_cap(c, X86_FEATURE_PCID);
++		}
++	}
++}
++
+ /*
+  * Some CPU features depend on higher CPUID levels, which may not always
+  * be available due to CPUID level capping or broken virtualization
+@@ -911,6 +930,9 @@ static void identify_cpu(struct cpuinfo_
+ 	setup_smep(c);
+ 	setup_smap(c);
+ 
++	/* Set up PCID */
++	setup_pcid(c);
++
+ 	/*
+ 	 * The vendor-specific functions might have changed features.
+ 	 * Now we do "generic changes."
+--- a/arch/x86/xen/enlighten.c
++++ b/arch/x86/xen/enlighten.c
+@@ -430,6 +430,12 @@ static void __init xen_init_cpuid_mask(v
+ 		~((1 << X86_FEATURE_MTRR) |  /* disable MTRR */
+ 		  (1 << X86_FEATURE_ACC));   /* thermal monitoring */
+ 
++	/*
++	 * Xen PV would need some work to support PCID: CR3 handling as well
++	 * as xen_flush_tlb_others() would need updating.
++	 */
++	cpuid_leaf1_ecx_mask &= ~(1 << (X86_FEATURE_PCID % 32));  /* disable PCID */
++
+ 	if (!xen_initial_domain())
+ 		cpuid_leaf1_edx_mask &=
+ 			~((1 << X86_FEATURE_ACPI));  /* disable ACPI */
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-fix-invpcid-asm-constraint.patch b/debian/patches/bugfix/all/kpti/x86-mm-fix-invpcid-asm-constraint.patch
new file mode 100644
index 0000000..34c0abc
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-fix-invpcid-asm-constraint.patch
@@ -0,0 +1,66 @@
+From: Borislav Petkov <bp at suse.de>
+Date: Wed, 10 Feb 2016 15:51:16 +0100
+Subject: x86/mm: Fix INVPCID asm constraint
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+commit e2c7698cd61f11d4077fdb28148b2d31b82ac848 upstream.
+
+So we want to specify the dependency on both @pcid and @addr so that the
+compiler doesn't reorder accesses to them *before* the TLB flush. But
+for that to work, we need to express this properly in the inline asm and
+deref the whole desc array, not the pointer to it. See clwb() for an
+example.
+
+This fixes the build error on 32-bit:
+
+  arch/x86/include/asm/tlbflush.h: In function ‘__invpcid’:
+  arch/x86/include/asm/tlbflush.h:26:18: error: memory input 0 is not directly addressable
+
+which gcc4.7 caught but 5.x didn't. Which is strange. :-\
+
+Signed-off-by: Borislav Petkov <bp at suse.de>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Dave Hansen <dave.hansen at linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof at suse.com>
+Cc: Michael Matz <matz at suse.de>
+Cc: Oleg Nesterov <oleg at redhat.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: Toshi Kani <toshi.kani at hp.com>
+Cc: linux-mm at kvack.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h | 4 ++--
+ 1 file changed, 2 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -10,7 +10,7 @@
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ 			     unsigned long type)
+ {
+-	u64 desc[2] = { pcid, addr };
++	struct { u64 d[2]; } desc = { { pcid, addr } };
+ 
+ 	/*
+ 	 * The memory clobber is because the whole point is to invalidate
+@@ -22,7 +22,7 @@ static inline void __invpcid(unsigned lo
+ 	 * invpcid (%rcx), %rax in long mode.
+ 	 */
+ 	asm volatile (".byte 0x66, 0x0f, 0x38, 0x82, 0x01"
+-		      : : "m" (desc), "a" (type), "c" (desc) : "memory");
++		      : : "m" (desc), "a" (type), "c" (&desc) : "memory");
+ }
+ 
+ #define INVPCID_TYPE_INDIV_ADDR		0
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-fix-missed-global-tlb-flush-stat.patch b/debian/patches/bugfix/all/kpti/x86-mm-fix-missed-global-tlb-flush-stat.patch
new file mode 100644
index 0000000..3f6859a
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-fix-missed-global-tlb-flush-stat.patch
@@ -0,0 +1,72 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Thu, 31 Jul 2014 08:40:56 -0700
+Subject: x86/mm: Fix missed global TLB flush stat
+
+commit 9dfa6dee5355f200cf19528ca7c678ef4007cec5 upstream.
+
+If we take the
+
+	if (end == TLB_FLUSH_ALL || vmflag & VM_HUGETLB) {
+		local_flush_tlb();
+		goto out;
+	}
+
+path out of flush_tlb_mm_range(), we will have flushed the tlb,
+but not incremented NR_TLB_LOCAL_FLUSH_ALL.  This unifies the
+way out of the function so that we always take a single path when
+doing a full tlb flush.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Link: http://lkml.kernel.org/r/20140731154056.FF763B76@viggo.jf.intel.com
+Acked-by: Rik van Riel <riel at redhat.com>
+Acked-by: Mel Gorman <mgorman at suse.de>
+Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/tlb.c | 15 +++++++--------
+ 1 file changed, 7 insertions(+), 8 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -165,8 +165,9 @@ unsigned long tlb_single_page_flush_ceil
+ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag)
+ {
+-	int need_flush_others_all = 1;
+ 	unsigned long addr;
++	/* do a global flush by default */
++	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
+ 
+ 	preempt_disable();
+ 	if (current->active_mm != mm) {
+@@ -185,16 +186,14 @@ void flush_tlb_mm_range(struct mm_struct
+ 		goto out;
+ 	}
+ 
+-	if (end == TLB_FLUSH_ALL || vmflag & VM_HUGETLB) {
+-		local_flush_tlb();
+-		goto out;
+-	}
++	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
++		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+ 
+-	if ((end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
++	if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
++		base_pages_to_flush = TLB_FLUSH_ALL;
+ 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ 		local_flush_tlb();
+ 	} else {
+-		need_flush_others_all = 0;
+ 		/* flush range by one by one 'invlpg' */
+ 		for (addr = start; addr < end;	addr += PAGE_SIZE) {
+ 			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+@@ -202,7 +201,7 @@ void flush_tlb_mm_range(struct mm_struct
+ 		}
+ 	}
+ out:
+-	if (need_flush_others_all) {
++	if (base_pages_to_flush == TLB_FLUSH_ALL) {
+ 		start = 0UL;
+ 		end = TLB_FLUSH_ALL;
+ 	}
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-fix-sparse-tlb_single_page_flush_ceiling-warning-and-make-the-variable-read-mostly.patch b/debian/patches/bugfix/all/kpti/x86-mm-fix-sparse-tlb_single_page_flush_ceiling-warning-and-make-the-variable-read-mostly.patch
new file mode 100644
index 0000000..4bf2b58
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-fix-sparse-tlb_single_page_flush_ceiling-warning-and-make-the-variable-read-mostly.patch
@@ -0,0 +1,42 @@
+From: Jeremiah Mahler <jmmahler at gmail.com>
+Date: Sat, 9 Aug 2014 00:38:33 -0700
+Subject: x86/mm: Fix sparse 'tlb_single_page_flush_ceiling'  warning and make the variable read-mostly
+
+commit 86426851c38d3fe84dee34d7daa71d26c174d409 upstream.
+
+A sparse warning is generated about
+'tlb_single_page_flush_ceiling' not being declared.
+
+  arch/x86/mm/tlb.c:177:15: warning: symbol
+  'tlb_single_page_flush_ceiling' was not declared. Should it be static?
+
+Since it isn't used anywhere outside this file, fix the warning
+by making it static.
+
+Also, optimize the use of this variable by adding the
+__read_mostly directive, as suggested by David Rientjes.
+
+Suggested-by: David Rientjes <rientjes at google.com>
+Signed-off-by: Jeremiah Mahler <jmmahler at gmail.com>
+Cc: Dave Hansen <dave.hansen at linux.intel.com>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Mel Gorman <mgorman at suse.de>
+Link: http://lkml.kernel.org/r/1407569913-4035-1-git-send-email-jmmahler@gmail.com
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/tlb.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -169,7 +169,7 @@ void flush_tlb_current_task(void)
+  *
+  * This is in units of pages.
+  */
+-unsigned long tlb_single_page_flush_ceiling = 33;
++static unsigned long tlb_single_page_flush_ceiling __read_mostly = 33;
+ 
+ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag)
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch b/debian/patches/bugfix/all/kpti/x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch
new file mode 100644
index 0000000..1cf32ad
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch
@@ -0,0 +1,54 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Fri, 29 Jan 2016 11:42:59 -0800
+Subject: x86/mm: If INVPCID is available, use it to flush global  mappings
+
+commit d8bced79af1db6734f66b42064cc773cada2ce99 upstream.
+
+On my Skylake laptop, INVPCID function 2 (flush absolutely
+everything) takes about 376ns, whereas saving flags, twiddling
+CR4.PGE to flush global mappings, and restoring flags takes about
+539ns.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Andrey Ryabinin <aryabinin at virtuozzo.com>
+Cc: Andy Lutomirski <luto at amacapital.net>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Dave Hansen <dave.hansen at linux.intel.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Luis R. Rodriguez <mcgrof at suse.com>
+Cc: Oleg Nesterov <oleg at redhat.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: Toshi Kani <toshi.kani at hp.com>
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/ed0ef62581c0ea9c99b9bf6df726015e96d44743.1454096309.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h | 9 +++++++++
+ 1 file changed, 9 insertions(+)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -90,6 +90,15 @@ static inline void __native_flush_tlb_gl
+ {
+ 	unsigned long flags;
+ 
++	if (static_cpu_has(X86_FEATURE_INVPCID)) {
++		/*
++		 * Using INVPCID is considerably faster than a pair of writes
++		 * to CR4 sandwiched inside an IRQ flag save/restore.
++		 */
++		invpcid_flush_all();
++		return;
++	}
++
+ 	/*
+ 	 * Read-modify-write to CR4 - protect it from preemption and
+ 	 * from interrupts. (Use the raw variant because this code can
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-make-flush_tlb_mm_range-more-predictable.patch b/debian/patches/bugfix/all/kpti/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
new file mode 100644
index 0000000..b17e6c1
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
@@ -0,0 +1,77 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Sat, 22 Apr 2017 00:01:21 -0700
+Subject: x86/mm: Make flush_tlb_mm_range() more predictable
+
+commit ce27374fabf553153c3f53efcaa9bfab9216bd8c upstream.
+
+I'm about to rewrite the function almost completely, but first I
+want to get a functional change out of the way.  Currently, if
+flush_tlb_mm_range() does not flush the local TLB at all, it will
+never do individual page flushes on remote CPUs.  This seems to be
+an accident, and preserving it will be awkward.  Let's change it
+first so that any regressions in the rewrite will be easier to
+bisect and so that the rewrite can attempt to change no visible
+behavior at all.
+
+The fix is simple: we can simply avoid short-circuiting the
+calculation of base_pages_to_flush.
+
+As a side effect, this also eliminates a potential corner case: if
+tlb_single_page_flush_ceiling == TLB_FLUSH_ALL, flush_tlb_mm_range()
+could have ended up flushing the entire address space one page at a
+time.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Acked-by: Dave Hansen <dave.hansen at intel.com>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Josh Poimboeuf <jpoimboe at redhat.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Michal Hocko <mhocko at suse.com>
+Cc: Nadav Amit <namit at vmware.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/4b29b771d9975aad7154c314534fec235618175a.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/tlb.c | 16 +++++++++++-----
+ 1 file changed, 11 insertions(+), 5 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -257,6 +257,12 @@ void flush_tlb_mm_range(struct mm_struct
+ 	unsigned long base_pages_to_flush = TLB_FLUSH_ALL;
+ 
+ 	preempt_disable();
++
++	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
++		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
++	if (base_pages_to_flush > tlb_single_page_flush_ceiling)
++		base_pages_to_flush = TLB_FLUSH_ALL;
++
+ 	if (current->active_mm != mm) {
+ 		/* Synchronize with switch_mm. */
+ 		smp_mb();
+@@ -273,11 +279,11 @@ void flush_tlb_mm_range(struct mm_struct
+ 		goto out;
+ 	}
+ 
+-	if ((end != TLB_FLUSH_ALL) && !(vmflag & VM_HUGETLB))
+-		base_pages_to_flush = (end - start) >> PAGE_SHIFT;
+-
+-	if (base_pages_to_flush > tlb_single_page_flush_ceiling) {
+-		base_pages_to_flush = TLB_FLUSH_ALL;
++	/*
++	 * Both branches below are implicit full barriers (MOV to CR or
++	 * INVLPG) that synchronize with switch_mm.
++	 */
++	if (base_pages_to_flush == TLB_FLUSH_ALL) {
+ 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ 		local_flush_tlb();
+ 	} else {
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-new-tunable-for-single-vs-full-tlb-flush.patch b/debian/patches/bugfix/all/kpti/x86-mm-new-tunable-for-single-vs-full-tlb-flush.patch
new file mode 100644
index 0000000..c29c84e
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-new-tunable-for-single-vs-full-tlb-flush.patch
@@ -0,0 +1,160 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Thu, 31 Jul 2014 08:41:01 -0700
+Subject: x86/mm: New tunable for single vs full TLB flush
+
+commit 2d040a1ce903ca5d6e7c983621fb29c6883c4c48 upstream.
+
+Most of the logic here is in the documentation file.  Please take
+a look at it.
+
+I know we've come full-circle here back to a tunable, but this
+new one is *WAY* simpler.  I challenge anyone to describe in one
+sentence how the old one worked.  Here's the way the new one
+works:
+
+	If we are flushing more pages than the ceiling, we use
+	the full flush, otherwise we use per-page flushes.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Link: http://lkml.kernel.org/r/20140731154101.12B52CAF@viggo.jf.intel.com
+Acked-by: Rik van Riel <riel at redhat.com>
+Acked-by: Mel Gorman <mgorman at suse.de>
+Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ Documentation/x86/tlb.txt | 75 +++++++++++++++++++++++++++++++++++++++++++++++
+ arch/x86/mm/tlb.c         | 46 +++++++++++++++++++++++++++++
+ 2 files changed, 121 insertions(+)
+ create mode 100644 Documentation/x86/tlb.txt
+
+--- /dev/null
++++ b/Documentation/x86/tlb.txt
+@@ -0,0 +1,75 @@
++When the kernel unmaps or modified the attributes of a range of
++memory, it has two choices:
++ 1. Flush the entire TLB with a two-instruction sequence.  This is
++    a quick operation, but it causes collateral damage: TLB entries
++    from areas other than the one we are trying to flush will be
++    destroyed and must be refilled later, at some cost.
++ 2. Use the invlpg instruction to invalidate a single page at a
++    time.  This could potentialy cost many more instructions, but
++    it is a much more precise operation, causing no collateral
++    damage to other TLB entries.
++
++Which method to do depends on a few things:
++ 1. The size of the flush being performed.  A flush of the entire
++    address space is obviously better performed by flushing the
++    entire TLB than doing 2^48/PAGE_SIZE individual flushes.
++ 2. The contents of the TLB.  If the TLB is empty, then there will
++    be no collateral damage caused by doing the global flush, and
++    all of the individual flush will have ended up being wasted
++    work.
++ 3. The size of the TLB.  The larger the TLB, the more collateral
++    damage we do with a full flush.  So, the larger the TLB, the
++    more attrative an individual flush looks.  Data and
++    instructions have separate TLBs, as do different page sizes.
++ 4. The microarchitecture.  The TLB has become a multi-level
++    cache on modern CPUs, and the global flushes have become more
++    expensive relative to single-page flushes.
++
++There is obviously no way the kernel can know all these things,
++especially the contents of the TLB during a given flush.  The
++sizes of the flush will vary greatly depending on the workload as
++well.  There is essentially no "right" point to choose.
++
++You may be doing too many individual invalidations if you see the
++invlpg instruction (or instructions _near_ it) show up high in
++profiles.  If you believe that individual invalidations being
++called too often, you can lower the tunable:
++
++	/sys/debug/kernel/x86/tlb_single_page_flush_ceiling
++
++This will cause us to do the global flush for more cases.
++Lowering it to 0 will disable the use of the individual flushes.
++Setting it to 1 is a very conservative setting and it should
++never need to be 0 under normal circumstances.
++
++Despite the fact that a single individual flush on x86 is
++guaranteed to flush a full 2MB [1], hugetlbfs always uses the full
++flushes.  THP is treated exactly the same as normal memory.
++
++You might see invlpg inside of flush_tlb_mm_range() show up in
++profiles, or you can use the trace_tlb_flush() tracepoints. to
++determine how long the flush operations are taking.
++
++Essentially, you are balancing the cycles you spend doing invlpg
++with the cycles that you spend refilling the TLB later.
++
++You can measure how expensive TLB refills are by using
++performance counters and 'perf stat', like this:
++
++perf stat -e
++	cpu/event=0x8,umask=0x84,name=dtlb_load_misses_walk_duration/,
++	cpu/event=0x8,umask=0x82,name=dtlb_load_misses_walk_completed/,
++	cpu/event=0x49,umask=0x4,name=dtlb_store_misses_walk_duration/,
++	cpu/event=0x49,umask=0x2,name=dtlb_store_misses_walk_completed/,
++	cpu/event=0x85,umask=0x4,name=itlb_misses_walk_duration/,
++	cpu/event=0x85,umask=0x2,name=itlb_misses_walk_completed/
++
++That works on an IvyBridge-era CPU (i5-3320M).  Different CPUs
++may have differently-named counters, but they should at least
++be there in some form.  You can use pmu-tools 'ocperf list'
++(https://github.com/andikleen/pmu-tools) to find the right
++counters for a given CPU.
++
++1. A footnote in Intel's SDM "4.10.4.2 Recommended Invalidation"
++   says: "One execution of INVLPG is sufficient even for a page
++   with size greater than 4 KBytes."
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -275,3 +275,49 @@ void flush_tlb_kernel_range(unsigned lon
+ 		on_each_cpu(do_kernel_range_flush, &info, 1);
+ 	}
+ }
++
++static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
++			     size_t count, loff_t *ppos)
++{
++	char buf[32];
++	unsigned int len;
++
++	len = sprintf(buf, "%ld\n", tlb_single_page_flush_ceiling);
++	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
++}
++
++static ssize_t tlbflush_write_file(struct file *file,
++		 const char __user *user_buf, size_t count, loff_t *ppos)
++{
++	char buf[32];
++	ssize_t len;
++	int ceiling;
++
++	len = min(count, sizeof(buf) - 1);
++	if (copy_from_user(buf, user_buf, len))
++		return -EFAULT;
++
++	buf[len] = '\0';
++	if (kstrtoint(buf, 0, &ceiling))
++		return -EINVAL;
++
++	if (ceiling < 0)
++		return -EINVAL;
++
++	tlb_single_page_flush_ceiling = ceiling;
++	return count;
++}
++
++static const struct file_operations fops_tlbflush = {
++	.read = tlbflush_read_file,
++	.write = tlbflush_write_file,
++	.llseek = default_llseek,
++};
++
++static int __init create_tlb_single_page_flush_ceiling(void)
++{
++	debugfs_create_file("tlb_single_page_flush_ceiling", S_IRUSR | S_IWUSR,
++			    arch_debugfs_dir, NULL, &fops_tlbflush);
++	return 0;
++}
++late_initcall(create_tlb_single_page_flush_ceiling);
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch b/debian/patches/bugfix/all/kpti/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
new file mode 100644
index 0000000..935c8c8
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
@@ -0,0 +1,100 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Mon, 22 May 2017 15:30:01 -0700
+Subject: x86/mm: Reimplement flush_tlb_page() using  flush_tlb_mm_range()
+
+commit ca6c99c0794875c6d1db6e22f246699691ab7e6b upstream.
+
+flush_tlb_page() was very similar to flush_tlb_mm_range() except that
+it had a couple of issues:
+
+ - It was missing an smp_mb() in the case where
+   current->active_mm != mm.  (This is a longstanding bug reported by Nadav Amit)
+
+ - It was missing tracepoints and vm counter updates.
+
+The only reason that I can see for keeping it at as a separate
+function is that it could avoid a few branches that
+flush_tlb_mm_range() needs to decide to flush just one page.  This
+hardly seems worthwhile.  If we decide we want to get rid of those
+branches again, a better way would be to introduce an
+__flush_tlb_mm_range() helper and make both flush_tlb_page() and
+flush_tlb_mm_range() use it.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Acked-by: Kees Cook <keescook at chromium.org>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Borislav Petkov <bpetkov at suse.de>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Mel Gorman <mgorman at suse.de>
+Cc: Michal Hocko <mhocko at suse.com>
+Cc: Nadav Amit <nadav.amit at gmail.com>
+Cc: Nadav Amit <namit at vmware.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: linux-mm at kvack.org
+Link: http://lkml.kernel.org/r/3cc3847cf888d8907577569b8bac3f01992ef8f9.1495492063.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h |  6 +++++-
+ arch/x86/mm/tlb.c               | 27 ---------------------------
+ 2 files changed, 5 insertions(+), 28 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -227,11 +227,15 @@ static inline void flush_tlb_kernel_rang
+ 		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+ 
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+ 
++static inline void flush_tlb_page(struct vm_area_struct *vma, unsigned long a)
++{
++	flush_tlb_mm_range(vma->vm_mm, a, a + PAGE_SIZE, VM_NONE);
++}
++
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+ 				struct mm_struct *mm,
+ 				unsigned long start, unsigned long end);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -303,33 +303,6 @@ out:
+ 	preempt_enable();
+ }
+ 
+-void flush_tlb_page(struct vm_area_struct *vma, unsigned long start)
+-{
+-	struct mm_struct *mm = vma->vm_mm;
+-
+-	preempt_disable();
+-
+-	if (current->active_mm == mm) {
+-		if (current->mm) {
+-			/*
+-			 * Implicit full barrier (INVLPG) that synchronizes
+-			 * with switch_mm.
+-			 */
+-			__flush_tlb_one(start);
+-		} else {
+-			leave_mm(smp_processor_id());
+-
+-			/* Synchronize with switch_mm. */
+-			smp_mb();
+-		}
+-	}
+-
+-	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-		flush_tlb_others(mm_cpumask(mm), mm, start, start + PAGE_SIZE);
+-
+-	preempt_enable();
+-}
+-
+ static void do_flush_tlb_all(void *info)
+ {
+ 	count_vm_tlb_event(NR_TLB_REMOTE_FLUSH_RECEIVED);
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch b/debian/patches/bugfix/all/kpti/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
new file mode 100644
index 0000000..fef8433
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
@@ -0,0 +1,97 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Sat, 22 Apr 2017 00:01:20 -0700
+Subject: x86/mm: Remove flush_tlb() and flush_tlb_current_task()
+
+commit 29961b59a51f8c6838a26a45e871a7ed6771809b upstream.
+
+I was trying to figure out what how flush_tlb_current_task() would
+possibly work correctly if current->mm != current->active_mm, but I
+realized I could spare myself the effort: it has no callers except
+the unused flush_tlb() macro.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Josh Poimboeuf <jpoimboe at redhat.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Michal Hocko <mhocko at suse.com>
+Cc: Nadav Amit <namit at vmware.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/e52d64c11690f85e9f1d69d7b48cc2269cd2e94b.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/tlbflush.h |  9 ---------
+ arch/x86/mm/tlb.c               | 16 ----------------
+ 2 files changed, 25 deletions(-)
+
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -135,7 +135,6 @@ static inline void __flush_tlb_one(unsig
+ /*
+  * TLB flushing:
+  *
+- *  - flush_tlb() flushes the current mm struct TLBs
+  *  - flush_tlb_all() flushes all processes TLBs
+  *  - flush_tlb_mm(mm) flushes the specified mm context TLB's
+  *  - flush_tlb_page(vma, vmaddr) flushes one page
+@@ -167,11 +166,6 @@ static inline void flush_tlb_all(void)
+ 	__flush_tlb_all();
+ }
+ 
+-static inline void flush_tlb(void)
+-{
+-	__flush_tlb_up();
+-}
+-
+ static inline void local_flush_tlb(void)
+ {
+ 	__flush_tlb_up();
+@@ -233,14 +227,11 @@ static inline void flush_tlb_kernel_rang
+ 		flush_tlb_mm_range(vma->vm_mm, start, end, vma->vm_flags)
+ 
+ extern void flush_tlb_all(void);
+-extern void flush_tlb_current_task(void);
+ extern void flush_tlb_page(struct vm_area_struct *, unsigned long);
+ extern void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag);
+ extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
+ 
+-#define flush_tlb()	flush_tlb_current_task()
+-
+ void native_flush_tlb_others(const struct cpumask *cpumask,
+ 				struct mm_struct *mm,
+ 				unsigned long start, unsigned long end);
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -237,22 +237,6 @@ void native_flush_tlb_others(const struc
+ 	smp_call_function_many(cpumask, flush_tlb_func, &info, 1);
+ }
+ 
+-void flush_tlb_current_task(void)
+-{
+-	struct mm_struct *mm = current->mm;
+-
+-	preempt_disable();
+-
+-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-
+-	/* This is an implicit full barrier that synchronizes with switch_mm. */
+-	local_flush_tlb();
+-
+-	if (cpumask_any_but(mm_cpumask(mm), smp_processor_id()) < nr_cpu_ids)
+-		flush_tlb_others(mm_cpumask(mm), mm, 0UL, TLB_FLUSH_ALL);
+-	preempt_enable();
+-}
+-
+ /*
+  * See Documentation/x86/tlb.txt for details.  We choose 33
+  * because it is large enough to cover the vast majority (at
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch b/debian/patches/bugfix/all/kpti/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
new file mode 100644
index 0000000..df9d259
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
@@ -0,0 +1,259 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Sun, 28 May 2017 10:00:14 -0700
+Subject: x86/mm: Remove the UP asm/tlbflush.h code, always use  the (formerly) SMP code
+
+commit ce4a4e565f5264909a18c733b864c3f74467f69e upstream.
+
+The UP asm/tlbflush.h generates somewhat nicer code than the SMP version.
+Aside from that, it's fallen quite a bit behind the SMP code:
+
+ - flush_tlb_mm_range() didn't flush individual pages if the range
+   was small.
+
+ - The lazy TLB code was much weaker.  This usually wouldn't matter,
+   but, if a kernel thread flushed its lazy "active_mm" more than
+   once (due to reclaim or similar), it wouldn't be unlazied and
+   would instead pointlessly flush repeatedly.
+
+ - Tracepoints were missing.
+
+Aside from that, simply having the UP code around was a maintanence
+burden, since it means that any change to the TLB flush code had to
+make sure not to break it.
+
+Simplify everything by deleting the UP code.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Arjan van de Ven <arjan at linux.intel.com>
+Cc: Borislav Petkov <bpetkov at suse.de>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Mel Gorman <mgorman at suse.de>
+Cc: Michal Hocko <mhocko at suse.com>
+Cc: Nadav Amit <nadav.amit at gmail.com>
+Cc: Nadav Amit <namit at vmware.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Cc: linux-mm at kvack.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+[Hugh Dickins: Backported to 3.18]
+Signed-off-by: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/hardirq.h     |  2 +-
+ arch/x86/include/asm/mmu.h         |  6 ---
+ arch/x86/include/asm/mmu_context.h |  2 -
+ arch/x86/include/asm/tlbflush.h    | 76 +-------------------------------------
+ arch/x86/mm/tlb.c                  | 17 +--------
+ 5 files changed, 4 insertions(+), 99 deletions(-)
+
+--- a/arch/x86/include/asm/hardirq.h
++++ b/arch/x86/include/asm/hardirq.h
+@@ -21,8 +21,8 @@ typedef struct {
+ #ifdef CONFIG_SMP
+ 	unsigned int irq_resched_count;
+ 	unsigned int irq_call_count;
+-	unsigned int irq_tlb_count;
+ #endif
++	unsigned int irq_tlb_count;
+ #ifdef CONFIG_X86_THERMAL_VECTOR
+ 	unsigned int irq_thermal_count;
+ #endif
+--- a/arch/x86/include/asm/mmu.h
++++ b/arch/x86/include/asm/mmu.h
+@@ -20,12 +20,6 @@ typedef struct {
+ 	void __user *vdso;
+ } mm_context_t;
+ 
+-#ifdef CONFIG_SMP
+ void leave_mm(int cpu);
+-#else
+-static inline void leave_mm(int cpu)
+-{
+-}
+-#endif
+ 
+ #endif /* _ASM_X86_MMU_H */
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -68,10 +68,8 @@ void destroy_context(struct mm_struct *m
+ 
+ static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk)
+ {
+-#ifdef CONFIG_SMP
+ 	if (this_cpu_read(cpu_tlbstate.state) == TLBSTATE_OK)
+ 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_LAZY);
+-#endif
+ }
+ 
+ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+--- a/arch/x86/include/asm/tlbflush.h
++++ b/arch/x86/include/asm/tlbflush.h
+@@ -6,6 +6,7 @@
+ 
+ #include <asm/processor.h>
+ #include <asm/special_insns.h>
++#include <asm/smp.h>
+ 
+ static inline void __invpcid(unsigned long pcid, unsigned long addr,
+ 			     unsigned long type)
+@@ -146,79 +147,6 @@ static inline void __flush_tlb_one(unsig
+  * and page-granular flushes are available only on i486 and up.
+  */
+ 
+-#ifndef CONFIG_SMP
+-
+-/* "_up" is for UniProcessor.
+- *
+- * This is a helper for other header functions.  *Not* intended to be called
+- * directly.  All global TLB flushes need to either call this, or to bump the
+- * vm statistics themselves.
+- */
+-static inline void __flush_tlb_up(void)
+-{
+-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-	__flush_tlb();
+-}
+-
+-static inline void flush_tlb_all(void)
+-{
+-	count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+-	__flush_tlb_all();
+-}
+-
+-static inline void local_flush_tlb(void)
+-{
+-	__flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm(struct mm_struct *mm)
+-{
+-	if (mm == current->active_mm)
+-		__flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_page(struct vm_area_struct *vma,
+-				  unsigned long addr)
+-{
+-	if (vma->vm_mm == current->active_mm)
+-		__flush_tlb_one(addr);
+-}
+-
+-static inline void flush_tlb_range(struct vm_area_struct *vma,
+-				   unsigned long start, unsigned long end)
+-{
+-	if (vma->vm_mm == current->active_mm)
+-		__flush_tlb_up();
+-}
+-
+-static inline void flush_tlb_mm_range(struct mm_struct *mm,
+-	   unsigned long start, unsigned long end, unsigned long vmflag)
+-{
+-	if (mm == current->active_mm)
+-		__flush_tlb_up();
+-}
+-
+-static inline void native_flush_tlb_others(const struct cpumask *cpumask,
+-					   struct mm_struct *mm,
+-					   unsigned long start,
+-					   unsigned long end)
+-{
+-}
+-
+-static inline void reset_lazy_tlbstate(void)
+-{
+-}
+-
+-static inline void flush_tlb_kernel_range(unsigned long start,
+-					  unsigned long end)
+-{
+-	flush_tlb_all();
+-}
+-
+-#else  /* SMP */
+-
+-#include <asm/smp.h>
+-
+ #define local_flush_tlb() __flush_tlb()
+ 
+ #define flush_tlb_mm(mm)	flush_tlb_mm_range(mm, 0UL, TLB_FLUSH_ALL, 0UL)
+@@ -255,8 +183,6 @@ static inline void reset_lazy_tlbstate(v
+ 	this_cpu_write(cpu_tlbstate.active_mm, &init_mm);
+ }
+ 
+-#endif	/* SMP */
+-
+ #ifndef CONFIG_PARAVIRT
+ #define flush_tlb_others(mask, mm, start, end)	\
+ 	native_flush_tlb_others(mask, mm, start, end)
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -18,7 +18,7 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb
+ 			= { &init_mm, 0, };
+ 
+ /*
+- *	Smarter SMP flushing macros.
++ *	TLB flushing, formerly SMP-only
+  *		c/o Linus Torvalds.
+  *
+  *	These mean you can really definitely utterly forget about
+@@ -31,8 +31,6 @@ DEFINE_PER_CPU_SHARED_ALIGNED(struct tlb
+  *	Implement flush IPI by CALL_FUNCTION_VECTOR, Alex Shi
+  */
+ 
+-#ifdef CONFIG_SMP
+-
+ struct flush_tlb_info {
+ 	struct mm_struct *flush_mm;
+ 	unsigned long flush_start;
+@@ -55,8 +53,6 @@ void leave_mm(int cpu)
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+ 
+-#endif /* CONFIG_SMP */
+-
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ 	       struct task_struct *tsk)
+ {
+@@ -73,10 +69,8 @@ void switch_mm_irqs_off(struct mm_struct
+ 	unsigned cpu = smp_processor_id();
+ 
+ 	if (likely(prev != next)) {
+-#ifdef CONFIG_SMP
+ 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ 		this_cpu_write(cpu_tlbstate.active_mm, next);
+-#endif
+ 		cpumask_set_cpu(cpu, mm_cpumask(next));
+ 
+ 		/*
+@@ -115,9 +109,7 @@ void switch_mm_irqs_off(struct mm_struct
+ 		/* Load the LDT, if the LDT is different: */
+ 		if (unlikely(prev->context.ldt != next->context.ldt))
+ 			load_mm_ldt(next);
+-	}
+-#ifdef CONFIG_SMP
+-	  else {
++	} else {
+ 		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+ 		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+ 
+@@ -142,11 +134,8 @@ void switch_mm_irqs_off(struct mm_struct
+ 			load_mm_ldt(next);
+ 		}
+ 	}
+-#endif
+ }
+ 
+-#ifdef CONFIG_SMP
+-
+ /*
+  * The flush IPI assumes that a thread switch happens in this order:
+  * [cpu0: the cpu that switches]
+@@ -387,5 +376,3 @@ static int __init create_tlb_single_page
+ 	return 0;
+ }
+ late_initcall(create_tlb_single_page_flush_ceiling);
+-
+-#endif /* CONFIG_SMP */
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-rip-out-complicated-out-of-date-buggy-tlb-flushing.patch b/debian/patches/bugfix/all/kpti/x86-mm-rip-out-complicated-out-of-date-buggy-tlb-flushing.patch
new file mode 100644
index 0000000..394797c
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-rip-out-complicated-out-of-date-buggy-tlb-flushing.patch
@@ -0,0 +1,284 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Thu, 31 Jul 2014 08:40:55 -0700
+Subject: x86/mm: Rip out complicated, out-of-date, buggy TLB  flushing
+
+commit e9f4e0a9fe2723078b7a1a1169828dd46a7b2f9e upstream.
+
+I think the flush_tlb_mm_range() code that tries to tune the
+flush sizes based on the CPU needs to get ripped out for
+several reasons:
+
+1. It is obviously buggy.  It uses mm->total_vm to judge the
+   task's footprint in the TLB.  It should certainly be using
+   some measure of RSS, *NOT* ->total_vm since only resident
+   memory can populate the TLB.
+2. Haswell, and several other CPUs are missing from the
+   intel_tlb_flushall_shift_set() function.  Thus, it has been
+   demonstrated to bitrot quickly in practice.
+3. It is plain wrong in my vm:
+	[    0.037444] Last level iTLB entries: 4KB 0, 2MB 0, 4MB 0
+	[    0.037444] Last level dTLB entries: 4KB 0, 2MB 0, 4MB 0
+	[    0.037444] tlb_flushall_shift: 6
+   Which leads to it to never use invlpg.
+4. The assumptions about TLB refill costs are wrong:
+	http://lkml.kernel.org/r/1337782555-8088-3-git-send-email-alex.shi@intel.com
+    (more on this in later patches)
+5. I can not reproduce the original data: https://lkml.org/lkml/2012/5/17/59
+   I believe the sample times were too short.  Running the
+   benchmark in a loop yields times that vary quite a bit.
+
+Note that this leaves us with a static ceiling of 1 page.  This
+is a conservative, dumb setting, and will be revised in a later
+patch.
+
+This also removes the code which attempts to predict whether we
+are flushing data or instructions.  We expect instruction flushes
+to be relatively rare and not worth tuning for explicitly.
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Link: http://lkml.kernel.org/r/20140731154055.ABC88E89@viggo.jf.intel.com
+Acked-by: Rik van Riel <riel at redhat.com>
+Acked-by: Mel Gorman <mgorman at suse.de>
+Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/processor.h |  1 -
+ arch/x86/kernel/cpu/amd.c        |  7 ----
+ arch/x86/kernel/cpu/common.c     | 13 +-----
+ arch/x86/kernel/cpu/intel.c      | 26 ------------
+ arch/x86/mm/tlb.c                | 87 +++++-----------------------------------
+ 5 files changed, 13 insertions(+), 121 deletions(-)
+
+--- a/arch/x86/include/asm/processor.h
++++ b/arch/x86/include/asm/processor.h
+@@ -72,7 +72,6 @@ extern u16 __read_mostly tlb_lld_4k[NR_I
+ extern u16 __read_mostly tlb_lld_2m[NR_INFO];
+ extern u16 __read_mostly tlb_lld_4m[NR_INFO];
+ extern u16 __read_mostly tlb_lld_1g[NR_INFO];
+-extern s8  __read_mostly tlb_flushall_shift;
+ 
+ /*
+  *  CPU type and hardware bug flags. Kept separately for each CPU.
+--- a/arch/x86/kernel/cpu/amd.c
++++ b/arch/x86/kernel/cpu/amd.c
+@@ -762,11 +762,6 @@ static unsigned int amd_size_cache(struc
+ }
+ #endif
+ 
+-static void cpu_set_tlb_flushall_shift(struct cpuinfo_x86 *c)
+-{
+-	tlb_flushall_shift = 6;
+-}
+-
+ static void cpu_detect_tlb_amd(struct cpuinfo_x86 *c)
+ {
+ 	u32 ebx, eax, ecx, edx;
+@@ -814,8 +809,6 @@ static void cpu_detect_tlb_amd(struct cp
+ 		tlb_lli_2m[ENTRIES] = eax & mask;
+ 
+ 	tlb_lli_4m[ENTRIES] = tlb_lli_2m[ENTRIES] >> 1;
+-
+-	cpu_set_tlb_flushall_shift(c);
+ }
+ 
+ static const struct cpu_dev amd_cpu_dev = {
+--- a/arch/x86/kernel/cpu/common.c
++++ b/arch/x86/kernel/cpu/common.c
+@@ -482,26 +482,17 @@ u16 __read_mostly tlb_lld_2m[NR_INFO];
+ u16 __read_mostly tlb_lld_4m[NR_INFO];
+ u16 __read_mostly tlb_lld_1g[NR_INFO];
+ 
+-/*
+- * tlb_flushall_shift shows the balance point in replacing cr3 write
+- * with multiple 'invlpg'. It will do this replacement when
+- *   flush_tlb_lines <= active_lines/2^tlb_flushall_shift.
+- * If tlb_flushall_shift is -1, means the replacement will be disabled.
+- */
+-s8  __read_mostly tlb_flushall_shift = -1;
+-
+ void cpu_detect_tlb(struct cpuinfo_x86 *c)
+ {
+ 	if (this_cpu->c_detect_tlb)
+ 		this_cpu->c_detect_tlb(c);
+ 
+ 	printk(KERN_INFO "Last level iTLB entries: 4KB %d, 2MB %d, 4MB %d\n"
+-		"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n"
+-		"tlb_flushall_shift: %d\n",
++		"Last level dTLB entries: 4KB %d, 2MB %d, 4MB %d, 1GB %d\n",
+ 		tlb_lli_4k[ENTRIES], tlb_lli_2m[ENTRIES],
+ 		tlb_lli_4m[ENTRIES], tlb_lld_4k[ENTRIES],
+ 		tlb_lld_2m[ENTRIES], tlb_lld_4m[ENTRIES],
+-		tlb_lld_1g[ENTRIES], tlb_flushall_shift);
++		tlb_lld_1g[ENTRIES]);
+ }
+ 
+ void detect_ht(struct cpuinfo_x86 *c)
+--- a/arch/x86/kernel/cpu/intel.c
++++ b/arch/x86/kernel/cpu/intel.c
+@@ -663,31 +663,6 @@ static void intel_tlb_lookup(const unsig
+ 	}
+ }
+ 
+-static void intel_tlb_flushall_shift_set(struct cpuinfo_x86 *c)
+-{
+-	switch ((c->x86 << 8) + c->x86_model) {
+-	case 0x60f: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */
+-	case 0x616: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */
+-	case 0x617: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */
+-	case 0x61d: /* six-core 45 nm xeon "Dunnington" */
+-		tlb_flushall_shift = -1;
+-		break;
+-	case 0x63a: /* Ivybridge */
+-		tlb_flushall_shift = 2;
+-		break;
+-	case 0x61a: /* 45 nm nehalem, "Bloomfield" */
+-	case 0x61e: /* 45 nm nehalem, "Lynnfield" */
+-	case 0x625: /* 32 nm nehalem, "Clarkdale" */
+-	case 0x62c: /* 32 nm nehalem, "Gulftown" */
+-	case 0x62e: /* 45 nm nehalem-ex, "Beckton" */
+-	case 0x62f: /* 32 nm Xeon E7 */
+-	case 0x62a: /* SandyBridge */
+-	case 0x62d: /* SandyBridge, "Romely-EP" */
+-	default:
+-		tlb_flushall_shift = 6;
+-	}
+-}
+-
+ static void intel_detect_tlb(struct cpuinfo_x86 *c)
+ {
+ 	int i, j, n;
+@@ -712,7 +687,6 @@ static void intel_detect_tlb(struct cpui
+ 		for (j = 1 ; j < 16 ; j++)
+ 			intel_tlb_lookup(desc[j]);
+ 	}
+-	intel_tlb_flushall_shift_set(c);
+ }
+ 
+ static const struct cpu_dev intel_cpu_dev = {
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -159,13 +159,14 @@ void flush_tlb_current_task(void)
+ 	preempt_enable();
+ }
+ 
++/* in units of pages */
++unsigned long tlb_single_page_flush_ceiling = 1;
++
+ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag)
+ {
+-	bool need_flush_others_all = true;
++	int need_flush_others_all = 1;
+ 	unsigned long addr;
+-	unsigned act_entries, tlb_entries = 0;
+-	unsigned long nr_base_pages;
+ 
+ 	preempt_disable();
+ 	if (current->active_mm != mm) {
+@@ -184,29 +185,16 @@ void flush_tlb_mm_range(struct mm_struct
+ 		goto out;
+ 	}
+ 
+-	if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1
+-					|| vmflag & VM_HUGETLB) {
++	if (end == TLB_FLUSH_ALL || vmflag & VM_HUGETLB) {
+ 		local_flush_tlb();
+ 		goto out;
+ 	}
+ 
+-	/* In modern CPU, last level tlb used for both data/ins */
+-	if (vmflag & VM_EXEC)
+-		tlb_entries = tlb_lli_4k[ENTRIES];
+-	else
+-		tlb_entries = tlb_lld_4k[ENTRIES];
+-
+-	/* Assume all of TLB entries was occupied by this task */
+-	act_entries = tlb_entries >> tlb_flushall_shift;
+-	act_entries = mm->total_vm > act_entries ? act_entries : mm->total_vm;
+-	nr_base_pages = (end - start) >> PAGE_SHIFT;
+-
+-	/* tlb_flushall_shift is on balance point, details in commit log */
+-	if (nr_base_pages > act_entries) {
++	if ((end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
+ 		count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ALL);
+ 		local_flush_tlb();
+ 	} else {
+-		need_flush_others_all = false;
++		need_flush_others_all = 0;
+ 		/* flush range by one by one 'invlpg' */
+ 		for (addr = start; addr < end;	addr += PAGE_SIZE) {
+ 			count_vm_tlb_event(NR_TLB_LOCAL_FLUSH_ONE);
+@@ -276,68 +264,15 @@ static void do_kernel_range_flush(void *
+ 
+ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
+ {
+-	unsigned act_entries;
+-	struct flush_tlb_info info;
+-
+-	/* In modern CPU, last level tlb used for both data/ins */
+-	act_entries = tlb_lld_4k[ENTRIES];
+ 
+ 	/* Balance as user space task's flush, a bit conservative */
+-	if (end == TLB_FLUSH_ALL || tlb_flushall_shift == -1 ||
+-		(end - start) >> PAGE_SHIFT > act_entries >> tlb_flushall_shift)
+-
++	if (end == TLB_FLUSH_ALL ||
++	    (end - start) > tlb_single_page_flush_ceiling * PAGE_SIZE) {
+ 		on_each_cpu(do_flush_tlb_all, NULL, 1);
+-	else {
++	} else {
++		struct flush_tlb_info info;
+ 		info.flush_start = start;
+ 		info.flush_end = end;
+ 		on_each_cpu(do_kernel_range_flush, &info, 1);
+ 	}
+ }
+-
+-#ifdef CONFIG_DEBUG_TLBFLUSH
+-static ssize_t tlbflush_read_file(struct file *file, char __user *user_buf,
+-			     size_t count, loff_t *ppos)
+-{
+-	char buf[32];
+-	unsigned int len;
+-
+-	len = sprintf(buf, "%hd\n", tlb_flushall_shift);
+-	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+-}
+-
+-static ssize_t tlbflush_write_file(struct file *file,
+-		 const char __user *user_buf, size_t count, loff_t *ppos)
+-{
+-	char buf[32];
+-	ssize_t len;
+-	s8 shift;
+-
+-	len = min(count, sizeof(buf) - 1);
+-	if (copy_from_user(buf, user_buf, len))
+-		return -EFAULT;
+-
+-	buf[len] = '\0';
+-	if (kstrtos8(buf, 0, &shift))
+-		return -EINVAL;
+-
+-	if (shift < -1 || shift >= BITS_PER_LONG)
+-		return -EINVAL;
+-
+-	tlb_flushall_shift = shift;
+-	return count;
+-}
+-
+-static const struct file_operations fops_tlbflush = {
+-	.read = tlbflush_read_file,
+-	.write = tlbflush_write_file,
+-	.llseek = default_llseek,
+-};
+-
+-static int __init create_tlb_flushall_shift(void)
+-{
+-	debugfs_create_file("tlb_flushall_shift", S_IRUSR | S_IWUSR,
+-			    arch_debugfs_dir, NULL, &fops_tlbflush);
+-	return 0;
+-}
+-late_initcall(create_tlb_flushall_shift);
+-#endif
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch b/debian/patches/bugfix/all/kpti/x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch
new file mode 100644
index 0000000..0a188a8
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch
@@ -0,0 +1,64 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Tue, 26 Apr 2016 09:39:09 -0700
+Subject: x86/mm, sched/core: Turn off IRQs in switch_mm()
+
+commit 078194f8e9fe3cf54c8fd8bded48a1db5bd8eb8a upstream.
+
+Potential races between switch_mm() and TLB-flush or LDT-flush IPIs
+could be very messy.  AFAICT the code is currently okay, whether by
+accident or by careful design, but enabling PCID will make it
+considerably more complicated and will no longer be obviously safe.
+
+Fix it with a big hammer: run switch_mm() with IRQs off.
+
+To avoid a performance hit in the scheduler, we take advantage of
+our knowledge that the scheduler already has IRQs disabled when it
+calls switch_mm().
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/f19baf759693c9dcae64bbff76189db77cb13398.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/mmu_context.h |  4 ++++
+ arch/x86/mm/tlb.c                  | 10 ++++++++++
+ 2 files changed, 14 insertions(+)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -77,6 +77,10 @@ static inline void enter_lazy_tlb(struct
+ extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ 		      struct task_struct *tsk);
+ 
++extern void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
++			       struct task_struct *tsk);
++#define switch_mm_irqs_off switch_mm_irqs_off
++
+ #define activate_mm(prev, next)			\
+ do {						\
+ 	paravirt_activate_mm((prev), (next));	\
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -60,6 +60,16 @@ EXPORT_SYMBOL_GPL(leave_mm);
+ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+ 	       struct task_struct *tsk)
+ {
++	unsigned long flags;
++
++	local_irq_save(flags);
++	switch_mm_irqs_off(prev, next, tsk);
++	local_irq_restore(flags);
++}
++
++void switch_mm_irqs_off(struct mm_struct *prev, struct mm_struct *next,
++			struct task_struct *tsk)
++{
+ 	unsigned cpu = smp_processor_id();
+ 
+ 	if (likely(prev != next)) {
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-sched-core-uninline-switch_mm.patch b/debian/patches/bugfix/all/kpti/x86-mm-sched-core-uninline-switch_mm.patch
new file mode 100644
index 0000000..51ce32b
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-sched-core-uninline-switch_mm.patch
@@ -0,0 +1,204 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Tue, 26 Apr 2016 09:39:08 -0700
+Subject: x86/mm, sched/core: Uninline switch_mm()
+
+commit 69c0319aabba45bcf33178916a2f06967b4adede upstream.
+
+It's fairly large and it has quite a few callers.  This may also
+help untangle some headers down the road.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Reviewed-by: Borislav Petkov <bp at suse.de>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/54f3367803e7f80b2be62c8a21879aa74b1a5f57.1461688545.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Signed-off-by: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/include/asm/mmu_context.h | 79 +-----------------------------------
+ arch/x86/mm/tlb.c                  | 82 ++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 84 insertions(+), 77 deletions(-)
+
+--- a/arch/x86/include/asm/mmu_context.h
++++ b/arch/x86/include/asm/mmu_context.h
+@@ -74,83 +74,8 @@ static inline void enter_lazy_tlb(struct
+ #endif
+ }
+ 
+-static inline void switch_mm(struct mm_struct *prev, struct mm_struct *next,
+-			     struct task_struct *tsk)
+-{
+-	unsigned cpu = smp_processor_id();
+-
+-	if (likely(prev != next)) {
+-#ifdef CONFIG_SMP
+-		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+-		this_cpu_write(cpu_tlbstate.active_mm, next);
+-#endif
+-		cpumask_set_cpu(cpu, mm_cpumask(next));
+-
+-		/*
+-		 * Re-load page tables.
+-		 *
+-		 * This logic has an ordering constraint:
+-		 *
+-		 *  CPU 0: Write to a PTE for 'next'
+-		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
+-		 *  CPU 1: set bit 1 in next's mm_cpumask
+-		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
+-		 *
+-		 * We need to prevent an outcome in which CPU 1 observes
+-		 * the new PTE value and CPU 0 observes bit 1 clear in
+-		 * mm_cpumask.  (If that occurs, then the IPI will never
+-		 * be sent, and CPU 0's TLB will contain a stale entry.)
+-		 *
+-		 * The bad outcome can occur if either CPU's load is
+-		 * reordered before that CPU's store, so both CPUs must
+-		 * execute full barriers to prevent this from happening.
+-		 *
+-		 * Thus, switch_mm needs a full barrier between the
+-		 * store to mm_cpumask and any operation that could load
+-		 * from next->pgd.  TLB fills are special and can happen
+-		 * due to instruction fetches or for no reason at all,
+-		 * and neither LOCK nor MFENCE orders them.
+-		 * Fortunately, load_cr3() is serializing and gives the
+-		 * ordering guarantee we need.
+-		 *
+-		 */
+-		load_cr3(next->pgd);
+-
+-		/* Stop flush ipis for the previous mm */
+-		cpumask_clear_cpu(cpu, mm_cpumask(prev));
+-
+-		/* Load the LDT, if the LDT is different: */
+-		if (unlikely(prev->context.ldt != next->context.ldt))
+-			load_mm_ldt(next);
+-	}
+-#ifdef CONFIG_SMP
+-	  else {
+-		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
+-		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
+-
+-		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
+-			/*
+-			 * On established mms, the mm_cpumask is only changed
+-			 * from irq context, from ptep_clear_flush() while in
+-			 * lazy tlb mode, and here. Irqs are blocked during
+-			 * schedule, protecting us from simultaneous changes.
+-			 */
+-			cpumask_set_cpu(cpu, mm_cpumask(next));
+-
+-			/*
+-			 * We were in lazy tlb mode and leave_mm disabled
+-			 * tlb flush IPI delivery. We must reload CR3
+-			 * to make sure to use no freed page tables.
+-			 *
+-			 * As above, load_cr3() is serializing and orders TLB
+-			 * fills with respect to the mm_cpumask write.
+-			 */
+-			load_cr3(next->pgd);
+-			load_mm_ldt(next);
+-		}
+-	}
+-#endif
+-}
++extern void switch_mm(struct mm_struct *prev, struct mm_struct *next,
++		      struct task_struct *tsk);
+ 
+ #define activate_mm(prev, next)			\
+ do {						\
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -55,6 +55,88 @@ void leave_mm(int cpu)
+ }
+ EXPORT_SYMBOL_GPL(leave_mm);
+ 
++#endif /* CONFIG_SMP */
++
++void switch_mm(struct mm_struct *prev, struct mm_struct *next,
++	       struct task_struct *tsk)
++{
++	unsigned cpu = smp_processor_id();
++
++	if (likely(prev != next)) {
++#ifdef CONFIG_SMP
++		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
++		this_cpu_write(cpu_tlbstate.active_mm, next);
++#endif
++		cpumask_set_cpu(cpu, mm_cpumask(next));
++
++		/*
++		 * Re-load page tables.
++		 *
++		 * This logic has an ordering constraint:
++		 *
++		 *  CPU 0: Write to a PTE for 'next'
++		 *  CPU 0: load bit 1 in mm_cpumask.  if nonzero, send IPI.
++		 *  CPU 1: set bit 1 in next's mm_cpumask
++		 *  CPU 1: load from the PTE that CPU 0 writes (implicit)
++		 *
++		 * We need to prevent an outcome in which CPU 1 observes
++		 * the new PTE value and CPU 0 observes bit 1 clear in
++		 * mm_cpumask.  (If that occurs, then the IPI will never
++		 * be sent, and CPU 0's TLB will contain a stale entry.)
++		 *
++		 * The bad outcome can occur if either CPU's load is
++		 * reordered before that CPU's store, so both CPUs must
++		 * execute full barriers to prevent this from happening.
++		 *
++		 * Thus, switch_mm needs a full barrier between the
++		 * store to mm_cpumask and any operation that could load
++		 * from next->pgd.  TLB fills are special and can happen
++		 * due to instruction fetches or for no reason at all,
++		 * and neither LOCK nor MFENCE orders them.
++		 * Fortunately, load_cr3() is serializing and gives the
++		 * ordering guarantee we need.
++		 *
++		 */
++		load_cr3(next->pgd);
++
++		/* Stop flush ipis for the previous mm */
++		cpumask_clear_cpu(cpu, mm_cpumask(prev));
++
++		/* Load the LDT, if the LDT is different: */
++		if (unlikely(prev->context.ldt != next->context.ldt))
++			load_mm_ldt(next);
++	}
++#ifdef CONFIG_SMP
++	  else {
++		this_cpu_write(cpu_tlbstate.state, TLBSTATE_OK);
++		BUG_ON(this_cpu_read(cpu_tlbstate.active_mm) != next);
++
++		if (!cpumask_test_cpu(cpu, mm_cpumask(next))) {
++			/*
++			 * On established mms, the mm_cpumask is only changed
++			 * from irq context, from ptep_clear_flush() while in
++			 * lazy tlb mode, and here. Irqs are blocked during
++			 * schedule, protecting us from simultaneous changes.
++			 */
++			cpumask_set_cpu(cpu, mm_cpumask(next));
++
++			/*
++			 * We were in lazy tlb mode and leave_mm disabled
++			 * tlb flush IPI delivery. We must reload CR3
++			 * to make sure to use no freed page tables.
++			 *
++			 * As above, load_cr3() is serializing and orders TLB
++			 * fills with respect to the mm_cpumask write.
++			 */
++			load_cr3(next->pgd);
++			load_mm_ldt(next);
++		}
++	}
++#endif
++}
++
++#ifdef CONFIG_SMP
++
+ /*
+  * The flush IPI assumes that a thread switch happens in this order:
+  * [cpu0: the cpu that switches]
diff --git a/debian/patches/bugfix/all/kpti/x86-mm-set-tlb-flush-tunable-to-sane-value-33.patch b/debian/patches/bugfix/all/kpti/x86-mm-set-tlb-flush-tunable-to-sane-value-33.patch
new file mode 100644
index 0000000..7f22491
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-mm-set-tlb-flush-tunable-to-sane-value-33.patch
@@ -0,0 +1,268 @@
+From: Dave Hansen <dave.hansen at linux.intel.com>
+Date: Thu, 31 Jul 2014 08:41:03 -0700
+Subject: x86/mm: Set TLB flush tunable to sane value (33)
+
+commit a5102476a24bce364b74f1110005542a2c964103 upstream.
+
+This has been run through Intel's LKP tests across a wide range
+of modern sytems and workloads and it wasn't shown to make a
+measurable performance difference positive or negative.
+
+Now that we have some shiny new tracepoints, we can actually
+figure out what the heck is going on.
+
+During a kernel compile, 60% of the flush_tlb_mm_range() calls
+are for a single page.  It breaks down like this:
+
+ size   percent  percent<=
+  V        V        V
+GLOBAL:   2.20%   2.20% avg cycles:  2283
+     1:  56.92%  59.12% avg cycles:  1276
+     2:  13.78%  72.90% avg cycles:  1505
+     3:   8.26%  81.16% avg cycles:  1880
+     4:   7.41%  88.58% avg cycles:  2447
+     5:   1.73%  90.31% avg cycles:  2358
+     6:   1.32%  91.63% avg cycles:  2563
+     7:   1.14%  92.77% avg cycles:  2862
+     8:   0.62%  93.39% avg cycles:  3542
+     9:   0.08%  93.47% avg cycles:  3289
+    10:   0.43%  93.90% avg cycles:  3570
+    11:   0.20%  94.10% avg cycles:  3767
+    12:   0.08%  94.18% avg cycles:  3996
+    13:   0.03%  94.20% avg cycles:  4077
+    14:   0.02%  94.23% avg cycles:  4836
+    15:   0.04%  94.26% avg cycles:  5699
+    16:   0.06%  94.32% avg cycles:  5041
+    17:   0.57%  94.89% avg cycles:  5473
+    18:   0.02%  94.91% avg cycles:  5396
+    19:   0.03%  94.95% avg cycles:  5296
+    20:   0.02%  94.96% avg cycles:  6749
+    21:   0.18%  95.14% avg cycles:  6225
+    22:   0.01%  95.15% avg cycles:  6393
+    23:   0.01%  95.16% avg cycles:  6861
+    24:   0.12%  95.28% avg cycles:  6912
+    25:   0.05%  95.32% avg cycles:  7190
+    26:   0.01%  95.33% avg cycles:  7793
+    27:   0.01%  95.34% avg cycles:  7833
+    28:   0.01%  95.35% avg cycles:  8253
+    29:   0.08%  95.42% avg cycles:  8024
+    30:   0.03%  95.45% avg cycles:  9670
+    31:   0.01%  95.46% avg cycles:  8949
+    32:   0.01%  95.46% avg cycles:  9350
+    33:   3.11%  98.57% avg cycles:  8534
+    34:   0.02%  98.60% avg cycles: 10977
+    35:   0.02%  98.62% avg cycles: 11400
+
+We get in to dimishing returns pretty quickly.  On pre-IvyBridge
+CPUs, we used to set the limit at 8 pages, and it was set at 128
+on IvyBrige.  That 128 number looks pretty silly considering that
+less than 0.5% of the flushes are that large.
+
+The previous code tried to size this number based on the size of
+the TLB.  Good idea, but it's error-prone, needs maintenance
+(which it didn't get up to now), and probably would not matter in
+practice much.
+
+Settting it to 33 means that we cover the mallopt
+M_TRIM_THRESHOLD, which is the most universally common size to do
+flushes.
+
+That's the short version.  Here's the long one for why I chose 33:
+
+1. These numbers have a constant bias in the timestamps from the
+   tracing.  Probably counts for a couple hundred cycles in each of
+   these tests, but it should be fairly _even_ across all of them.
+   The smallest delta between the tracepoints I have ever seen is
+   335 cycles.  This is one reason the cycles/page cost goes down in
+   general as the flushes get larger.  The true cost is nearer to
+   100 cycles.
+2. A full flush is more expensive than a single invlpg, but not
+   by much (single percentages).
+3. A dtlb miss is 17.1ns (~45 cycles) and a itlb miss is 13.0ns
+   (~34 cycles).  At those rates, refilling the 512-entry dTLB takes
+   22,000 cycles.
+4. 22,000 cycles is approximately the equivalent of doing 85
+   invlpg operations.  But, the odds are that the TLB can
+   actually be filled up faster than that because TLB misses that
+   are close in time also tend to leverage the same caches.
+6. ~98% of flushes are <=33 pages.  There are a lot of flushes of
+   33 pages, probably because libc's M_TRIM_THRESHOLD is set to
+   128k (32 pages)
+7. I've found no consistent data to support changing the IvyBridge
+   vs. SandyBridge tunable by a factor of 16
+
+I used the performance counters on this hardware (IvyBridge i5-3320M)
+to figure out the tlb miss costs:
+
+ocperf.py stat -e dtlb_load_misses.walk_duration,dtlb_load_misses.walk_completed,dtlb_store_misses.walk_duration,dtlb_store_misses.walk_completed,itlb_misses.walk_duration,itlb_misses.walk_completed,itlb.itlb_flush
+
+     7,720,030,970      dtlb_load_misses_walk_duration                                    [57.13%]
+       169,856,353      dtlb_load_misses_walk_completed                                    [57.15%]
+       708,832,859      dtlb_store_misses_walk_duration                                    [57.17%]
+        19,346,823      dtlb_store_misses_walk_completed                                    [57.17%]
+     2,779,687,402      itlb_misses_walk_duration                                    [57.15%]
+        82,241,148      itlb_misses_walk_completed                                    [57.13%]
+           770,717      itlb_itlb_flush                                              [57.11%]
+
+Show that a dtlb miss is 17.1ns (~45 cycles) and a itlb miss is 13.0ns
+(~34 cycles).  At those rates, refilling the 512-entry dTLB takes
+22,000 cycles.  On a SandyBridge system with more cores and larger
+caches, those are dtlb=13.4ns and itlb=9.5ns.
+
+cat perf.stat.txt | perl -pe 's/,//g'
+	| awk '/itlb_misses_walk_duration/ { icyc+=$1 }
+		/itlb_misses_walk_completed/ { imiss+=$1 }
+		/dtlb_.*_walk_duration/ { dcyc+=$1 }
+		/dtlb_.*.*completed/ { dmiss+=$1 }
+		END {print "itlb cyc/miss: ", icyc/imiss, " dtlb cyc/miss: ", dcyc/dmiss, "   -----    ", icyc,imiss, dcyc,dmiss }
+
+On Westmere CPUs, the counters to use are: itlb_flush,itlb_misses.walk_cycles,itlb_misses.any,dtlb_misses.walk_cycles,dtlb_misses.any
+
+The assumptions that this code went in under:
+https://lkml.org/lkml/2012/6/12/119 say that a flush and a refill are
+about 100ns.  Being generous, that is over by a factor of 6 on the
+refill side, although it is fairly close on the cost of an invlpg.
+An increase of a single invlpg operation seems to lengthen the flush
+range operation by about 200 cycles.  Here is one example of the data
+collected for flushing 10 and 11 pages (full data are below):
+
+    10:   0.43%  93.90% avg cycles:  3570 cycles/page:  357 samples: 4714
+    11:   0.20%  94.10% avg cycles:  3767 cycles/page:  342 samples: 2145
+
+How to generate this table:
+
+	echo 10000 > /sys/kernel/debug/tracing/buffer_size_kb
+	echo x86-tsc > /sys/kernel/debug/tracing/trace_clock
+	echo 'reason != 0' > /sys/kernel/debug/tracing/events/tlb/tlb_flush/filter
+	echo 1 > /sys/kernel/debug/tracing/events/tlb/tlb_flush/enable
+
+Pipe the trace output in to this script:
+
+	http://sr71.net/~dave/intel/201402-tlb/trace-time-diff-process.pl.txt
+
+Note that these data were gathered with the invlpg threshold set to
+150 pages.  Only data points with >=50 of samples were printed:
+
+Flush    % of     %<=
+in       flush    this
+pages      es     size
+------------------------------------------------------------------------------
+    -1:   2.20%   2.20% avg cycles:  2283 cycles/page: xxxx samples: 23960
+     1:  56.92%  59.12% avg cycles:  1276 cycles/page: 1276 samples: 620895
+     2:  13.78%  72.90% avg cycles:  1505 cycles/page:  752 samples: 150335
+     3:   8.26%  81.16% avg cycles:  1880 cycles/page:  626 samples: 90131
+     4:   7.41%  88.58% avg cycles:  2447 cycles/page:  611 samples: 80877
+     5:   1.73%  90.31% avg cycles:  2358 cycles/page:  471 samples: 18885
+     6:   1.32%  91.63% avg cycles:  2563 cycles/page:  427 samples: 14397
+     7:   1.14%  92.77% avg cycles:  2862 cycles/page:  408 samples: 12441
+     8:   0.62%  93.39% avg cycles:  3542 cycles/page:  442 samples: 6721
+     9:   0.08%  93.47% avg cycles:  3289 cycles/page:  365 samples: 917
+    10:   0.43%  93.90% avg cycles:  3570 cycles/page:  357 samples: 4714
+    11:   0.20%  94.10% avg cycles:  3767 cycles/page:  342 samples: 2145
+    12:   0.08%  94.18% avg cycles:  3996 cycles/page:  333 samples: 864
+    13:   0.03%  94.20% avg cycles:  4077 cycles/page:  313 samples: 289
+    14:   0.02%  94.23% avg cycles:  4836 cycles/page:  345 samples: 236
+    15:   0.04%  94.26% avg cycles:  5699 cycles/page:  379 samples: 390
+    16:   0.06%  94.32% avg cycles:  5041 cycles/page:  315 samples: 643
+    17:   0.57%  94.89% avg cycles:  5473 cycles/page:  321 samples: 6229
+    18:   0.02%  94.91% avg cycles:  5396 cycles/page:  299 samples: 224
+    19:   0.03%  94.95% avg cycles:  5296 cycles/page:  278 samples: 367
+    20:   0.02%  94.96% avg cycles:  6749 cycles/page:  337 samples: 185
+    21:   0.18%  95.14% avg cycles:  6225 cycles/page:  296 samples: 1964
+    22:   0.01%  95.15% avg cycles:  6393 cycles/page:  290 samples: 83
+    23:   0.01%  95.16% avg cycles:  6861 cycles/page:  298 samples: 61
+    24:   0.12%  95.28% avg cycles:  6912 cycles/page:  288 samples: 1307
+    25:   0.05%  95.32% avg cycles:  7190 cycles/page:  287 samples: 533
+    26:   0.01%  95.33% avg cycles:  7793 cycles/page:  299 samples: 94
+    27:   0.01%  95.34% avg cycles:  7833 cycles/page:  290 samples: 66
+    28:   0.01%  95.35% avg cycles:  8253 cycles/page:  294 samples: 73
+    29:   0.08%  95.42% avg cycles:  8024 cycles/page:  276 samples: 846
+    30:   0.03%  95.45% avg cycles:  9670 cycles/page:  322 samples: 296
+    31:   0.01%  95.46% avg cycles:  8949 cycles/page:  288 samples: 79
+    32:   0.01%  95.46% avg cycles:  9350 cycles/page:  292 samples: 60
+    33:   3.11%  98.57% avg cycles:  8534 cycles/page:  258 samples: 33936
+    34:   0.02%  98.60% avg cycles: 10977 cycles/page:  322 samples: 268
+    35:   0.02%  98.62% avg cycles: 11400 cycles/page:  325 samples: 177
+    36:   0.01%  98.63% avg cycles: 11504 cycles/page:  319 samples: 161
+    37:   0.02%  98.65% avg cycles: 11596 cycles/page:  313 samples: 182
+    38:   0.02%  98.66% avg cycles: 11850 cycles/page:  311 samples: 195
+    39:   0.01%  98.68% avg cycles: 12158 cycles/page:  311 samples: 128
+    40:   0.01%  98.68% avg cycles: 11626 cycles/page:  290 samples: 78
+    41:   0.04%  98.73% avg cycles: 11435 cycles/page:  278 samples: 477
+    42:   0.01%  98.73% avg cycles: 12571 cycles/page:  299 samples: 74
+    43:   0.01%  98.74% avg cycles: 12562 cycles/page:  292 samples: 78
+    44:   0.01%  98.75% avg cycles: 12991 cycles/page:  295 samples: 108
+    45:   0.01%  98.76% avg cycles: 13169 cycles/page:  292 samples: 78
+    46:   0.02%  98.78% avg cycles: 12891 cycles/page:  280 samples: 261
+    47:   0.01%  98.79% avg cycles: 13099 cycles/page:  278 samples: 67
+    48:   0.01%  98.80% avg cycles: 13851 cycles/page:  288 samples: 77
+    49:   0.01%  98.80% avg cycles: 13749 cycles/page:  280 samples: 66
+    50:   0.01%  98.81% avg cycles: 13949 cycles/page:  278 samples: 73
+    52:   0.00%  98.82% avg cycles: 14243 cycles/page:  273 samples: 52
+    54:   0.01%  98.83% avg cycles: 15312 cycles/page:  283 samples: 87
+    55:   0.01%  98.84% avg cycles: 15197 cycles/page:  276 samples: 109
+    56:   0.02%  98.86% avg cycles: 15234 cycles/page:  272 samples: 208
+    57:   0.00%  98.86% avg cycles: 14888 cycles/page:  261 samples: 53
+    58:   0.01%  98.87% avg cycles: 15037 cycles/page:  259 samples: 59
+    59:   0.01%  98.87% avg cycles: 15752 cycles/page:  266 samples: 63
+    62:   0.00%  98.89% avg cycles: 16222 cycles/page:  261 samples: 54
+    64:   0.02%  98.91% avg cycles: 17179 cycles/page:  268 samples: 248
+    65:   0.12%  99.03% avg cycles: 18762 cycles/page:  288 samples: 1324
+    85:   0.00%  99.10% avg cycles: 21649 cycles/page:  254 samples: 50
+   127:   0.01%  99.18% avg cycles: 32397 cycles/page:  255 samples: 75
+   128:   0.13%  99.31% avg cycles: 31711 cycles/page:  247 samples: 1466
+   129:   0.18%  99.49% avg cycles: 33017 cycles/page:  255 samples: 1927
+   181:   0.33%  99.84% avg cycles:  2489 cycles/page:   13 samples: 3547
+   256:   0.05%  99.91% avg cycles:  2305 cycles/page:    9 samples: 550
+   512:   0.03%  99.95% avg cycles:  2133 cycles/page:    4 samples: 304
+  1512:   0.01%  99.99% avg cycles:  3038 cycles/page:    2 samples: 65
+
+Here are the tlb counters during a 10-second slice of a kernel compile
+for a SandyBridge system.  It's better than IvyBridge, but probably
+due to the larger caches since this was one of the 'X' extreme parts.
+
+    10,873,007,282      dtlb_load_misses_walk_duration
+       250,711,333      dtlb_load_misses_walk_completed
+     1,212,395,865      dtlb_store_misses_walk_duration
+        31,615,772      dtlb_store_misses_walk_completed
+     5,091,010,274      itlb_misses_walk_duration
+       163,193,511      itlb_misses_walk_completed
+         1,321,980      itlb_itlb_flush
+
+      10.008045158 seconds time elapsed
+
+itlb ns/miss:  9.45338  dtlb ns/miss:  12.9716
+
+Signed-off-by: Dave Hansen <dave.hansen at linux.intel.com>
+Link: http://lkml.kernel.org/r/20140731154103.10C1115E@viggo.jf.intel.com
+Acked-by: Rik van Riel <riel at redhat.com>
+Acked-by: Mel Gorman <mgorman at suse.de>
+Signed-off-by: H. Peter Anvin <hpa at linux.intel.com>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/mm/tlb.c | 13 +++++++++++--
+ 1 file changed, 11 insertions(+), 2 deletions(-)
+
+--- a/arch/x86/mm/tlb.c
++++ b/arch/x86/mm/tlb.c
+@@ -159,8 +159,17 @@ void flush_tlb_current_task(void)
+ 	preempt_enable();
+ }
+ 
+-/* in units of pages */
+-unsigned long tlb_single_page_flush_ceiling = 1;
++/*
++ * See Documentation/x86/tlb.txt for details.  We choose 33
++ * because it is large enough to cover the vast majority (at
++ * least 95%) of allocations, and is small enough that we are
++ * confident it will not cause too much overhead.  Each single
++ * flush is about 100 ns, so this caps the maximum overhead at
++ * _about_ 3,000 ns.
++ *
++ * This is in units of pages.
++ */
++unsigned long tlb_single_page_flush_ceiling = 33;
+ 
+ void flush_tlb_mm_range(struct mm_struct *mm, unsigned long start,
+ 				unsigned long end, unsigned long vmflag)
diff --git a/debian/patches/bugfix/all/kpti/x86-paravirt-dont-patch-flush_tlb_single.patch b/debian/patches/bugfix/all/kpti/x86-paravirt-dont-patch-flush_tlb_single.patch
new file mode 100644
index 0000000..d108dd5
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-paravirt-dont-patch-flush_tlb_single.patch
@@ -0,0 +1,66 @@
+From: Thomas Gleixner <tglx at linutronix.de>
+Date: Mon, 4 Dec 2017 15:07:30 +0100
+Subject: x86/paravirt: Dont patch flush_tlb_single
+
+commit a035795499ca1c2bd1928808d1a156eda1420383 upstream.
+
+native_flush_tlb_single() will be changed with the upcoming
+PAGE_TABLE_ISOLATION feature. This requires to have more code in
+there than INVLPG.
+
+Remove the paravirt patching for it.
+
+Signed-off-by: Thomas Gleixner <tglx at linutronix.de>
+Reviewed-by: Josh Poimboeuf <jpoimboe at redhat.com>
+Reviewed-by: Juergen Gross <jgross at suse.com>
+Acked-by: Peter Zijlstra <peterz at infradead.org>
+Cc: Andy Lutomirski <luto at kernel.org>
+Cc: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Borislav Petkov <bpetkov at suse.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Dave Hansen <dave.hansen at linux.intel.com>
+Cc: David Laight <David.Laight at aculab.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: Eduardo Valentin <eduval at amazon.com>
+Cc: Greg KH <gregkh at linuxfoundation.org>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Will Deacon <will.deacon at arm.com>
+Cc: aliguori at amazon.com
+Cc: daniel.gruss at iaik.tugraz.at
+Cc: hughd at google.com
+Cc: keescook at google.com
+Cc: linux-mm at kvack.org
+Cc: michael.schwarz at iaik.tugraz.at
+Cc: moritz.lipp at iaik.tugraz.at
+Cc: richard.fellner at student.tugraz.at
+Link: https://lkml.kernel.org/r/20171204150606.828111617@linutronix.de
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+[bwh: Backported to 3.16: adjust context]
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/paravirt_patch_64.c | 2 --
+ 1 file changed, 2 deletions(-)
+
+--- a/arch/x86/kernel/paravirt_patch_64.c
++++ b/arch/x86/kernel/paravirt_patch_64.c
+@@ -9,7 +9,6 @@ DEF_NATIVE(pv_irq_ops, save_fl, "pushfq;
+ DEF_NATIVE(pv_mmu_ops, read_cr2, "movq %cr2, %rax");
+ DEF_NATIVE(pv_mmu_ops, read_cr3, "movq %cr3, %rax");
+ DEF_NATIVE(pv_mmu_ops, write_cr3, "movq %rdi, %cr3");
+-DEF_NATIVE(pv_mmu_ops, flush_tlb_single, "invlpg (%rdi)");
+ DEF_NATIVE(pv_cpu_ops, clts, "clts");
+ DEF_NATIVE(pv_cpu_ops, wbinvd, "wbinvd");
+ 
+@@ -57,7 +56,6 @@ unsigned native_patch(u8 type, u16 clobb
+ 		PATCH_SITE(pv_mmu_ops, read_cr3);
+ 		PATCH_SITE(pv_mmu_ops, write_cr3);
+ 		PATCH_SITE(pv_cpu_ops, clts);
+-		PATCH_SITE(pv_mmu_ops, flush_tlb_single);
+ 		PATCH_SITE(pv_cpu_ops, wbinvd);
+ 
+ 	patch_site:
diff --git a/debian/patches/bugfix/all/kpti/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch b/debian/patches/bugfix/all/kpti/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
new file mode 100644
index 0000000..ff3ea4d
--- /dev/null
+++ b/debian/patches/bugfix/all/kpti/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
@@ -0,0 +1,46 @@
+From: Andy Lutomirski <luto at kernel.org>
+Date: Sat, 22 Apr 2017 00:01:19 -0700
+Subject: x86/vm86/32: Switch to flush_tlb_mm_range() in  mark_screen_rdonly()
+
+commit 9ccee2373f0658f234727700e619df097ba57023 upstream.
+
+mark_screen_rdonly() is the last remaining caller of flush_tlb().
+flush_tlb_mm_range() is potentially faster and isn't obsolete.
+
+Compile-tested only because I don't know whether software that uses
+this mechanism even exists.
+
+Signed-off-by: Andy Lutomirski <luto at kernel.org>
+Cc: Andrew Morton <akpm at linux-foundation.org>
+Cc: Borislav Petkov <bp at alien8.de>
+Cc: Brian Gerst <brgerst at gmail.com>
+Cc: Dave Hansen <dave.hansen at intel.com>
+Cc: Denys Vlasenko <dvlasenk at redhat.com>
+Cc: H. Peter Anvin <hpa at zytor.com>
+Cc: Josh Poimboeuf <jpoimboe at redhat.com>
+Cc: Linus Torvalds <torvalds at linux-foundation.org>
+Cc: Michal Hocko <mhocko at suse.com>
+Cc: Nadav Amit <namit at vmware.com>
+Cc: Peter Zijlstra <peterz at infradead.org>
+Cc: Rik van Riel <riel at redhat.com>
+Cc: Sasha Levin <sasha.levin at oracle.com>
+Cc: Thomas Gleixner <tglx at linutronix.de>
+Link: http://lkml.kernel.org/r/791a644076fc3577ba7f7b7cafd643cc089baa7d.1492844372.git.luto@kernel.org
+Signed-off-by: Ingo Molnar <mingo at kernel.org>
+Cc: Hugh Dickins <hughd at google.com>
+Signed-off-by: Ben Hutchings <ben at decadent.org.uk>
+---
+ arch/x86/kernel/vm86_32.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+--- a/arch/x86/kernel/vm86_32.c
++++ b/arch/x86/kernel/vm86_32.c
+@@ -194,7 +194,7 @@ static void mark_screen_rdonly(struct mm
+ 	pte_unmap_unlock(pte, ptl);
+ out:
+ 	up_write(&mm->mmap_sem);
+-	flush_tlb();
++	flush_tlb_mm_range(mm, 0xA0000, 0xA0000 + 32*PAGE_SIZE, 0UL);
+ }
+ 
+ 
diff --git a/debian/patches/series b/debian/patches/series
index b21b02b..de82003 100644
--- a/debian/patches/series
+++ b/debian/patches/series
@@ -703,5 +703,57 @@ bugfix/all/crypto-hmac-require-that-the-underlying-hash-algorit.patch
 bugfix/all/keys-add-missing-permission-check-for-request_key-de.patch
 bugfix/x86/kvm-vmx-remove-i-o-port-0x80-bypass-on-intel-hosts.patch
 bugfix/all/bluetooth-prevent-stack-info-leak-from-the-efs-element.patch
+bugfix/all/kpti/x86-mm-clean-up-the-tlb-flushing-code.patch
+bugfix/all/kpti/x86-mm-rip-out-complicated-out-of-date-buggy-tlb-flushing.patch
+bugfix/all/kpti/x86-mm-fix-missed-global-tlb-flush-stat.patch
+bugfix/all/kpti/x86-mm-new-tunable-for-single-vs-full-tlb-flush.patch
+bugfix/all/kpti/x86-mm-set-tlb-flush-tunable-to-sane-value-33.patch
+bugfix/all/kpti/x86-mm-fix-sparse-tlb_single_page_flush_ceiling-warning-and-make-the-variable-read-mostly.patch
+bugfix/all/kpti/x86-mm-add-invpcid-helpers.patch
+bugfix/all/kpti/x86-mm-fix-invpcid-asm-constraint.patch
+bugfix/all/kpti/x86-mm-add-a-noinvpcid-boot-option-to-turn-off-invpcid.patch
+bugfix/all/kpti/x86-mm-if-invpcid-is-available-use-it-to-flush-global-mappings.patch
+bugfix/all/kpti/mm-mmu_context-sched-core-fix-mmu_context.h-assumption.patch
+bugfix/all/kpti/sched-core-add-switch_mm_irqs_off-and-use-it-in-the-scheduler.patch
+bugfix/all/kpti/x86-mm-build-arch-x86-mm-tlb.c-even-on-smp.patch
+bugfix/all/kpti/x86-mm-sched-core-uninline-switch_mm.patch
+bugfix/all/kpti/x86-mm-sched-core-turn-off-irqs-in-switch_mm.patch
+bugfix/all/kpti/arm-hide-finish_arch_post_lock_switch-from-modules.patch
+bugfix/all/kpti/sched-core-idle_task_exit-shouldn-t-use-switch_mm_irqs_off.patch
+bugfix/all/kpti/x86-irq-do-not-substract-irq_tlb_count-from-irq_call_count.patch
+bugfix/all/kpti/x86-vm86-32-switch-to-flush_tlb_mm_range-in-mark_screen_rdonly.patch
+bugfix/all/kpti/x86-mm-remove-flush_tlb-and-flush_tlb_current_task.patch
+bugfix/all/kpti/x86-mm-make-flush_tlb_mm_range-more-predictable.patch
+bugfix/all/kpti/x86-mm-reimplement-flush_tlb_page-using-flush_tlb_mm_range.patch
+bugfix/all/kpti/x86-mm-remove-the-up-asm-tlbflush.h-code-always-use-the-formerly-smp-code.patch
+bugfix/all/kpti/x86-mm-disable-pcid-on-32-bit-kernels.patch
+bugfix/all/kpti/x86-mm-add-the-nopcid-boot-option-to-turn-off-pcid.patch
+bugfix/all/kpti/x86-mm-enable-cr4.pcide-on-supported-systems.patch
+bugfix/all/kpti/x86-mm-64-fix-reboot-interaction-with-cr4.pcide.patch
+bugfix/all/kpti/kaiser-kernel-address-isolation.patch
+bugfix/all/kpti/kaiser-set-_page_user-of-the-vsyscall-page.patch
+bugfix/all/kpti/x86-kvmclock-disable-use-from-vdso-if-kpti-is-enabled.patch
+bugfix/all/kpti/kaiser-alloc_ldt_struct-use-get_zeroed_page.patch
+bugfix/all/kpti/x86-alternatives-cleanup-dprintk-macro.patch
+bugfix/all/kpti/x86-alternatives-add-instruction-padding.patch
+bugfix/all/kpti/x86-alternatives-make-jmps-more-robust.patch
+bugfix/all/kpti/x86-alternatives-use-optimized-nops-for-padding.patch
+bugfix/all/kpti/kaiser-add-nokaiser-boot-option-using-alternative.patch
+bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-matching-at-end.patch
+bugfix/all/kpti/x86-boot-fix-early-command-line-parsing-when-partial-word-matches.patch
+bugfix/all/kpti/x86-boot-simplify-early-command-line-parsing.patch
+bugfix/all/kpti/x86-boot-pass-in-size-to-early-cmdline-parsing.patch
+bugfix/all/kpti/x86-boot-add-early-cmdline-parsing-for-options-with-arguments.patch
+bugfix/all/kpti/x86-kaiser-rename-and-simplify-x86_feature_kaiser-handling.patch
+bugfix/all/kpti/x86-kaiser-check-boottime-cmdline-params.patch
+bugfix/all/kpti/kaiser-use-alternative-instead-of-x86_cr3_pcid_noflush.patch
+bugfix/all/kpti/kaiser-asm-tlbflush.h-handle-nopge-at-lower-level.patch
+bugfix/all/kpti/kaiser-kaiser_flush_tlb_on_return_to_user-check-pcid.patch
+bugfix/all/kpti/x86-paravirt-dont-patch-flush_tlb_single.patch
+bugfix/all/kpti/x86-kaiser-reenable-paravirt.patch
+bugfix/all/kpti/kaiser-disabled-on-xen-pv.patch
+bugfix/all/kpti/x86-kaiser-move-feature-detection-up.patch
+bugfix/all/kpti/kpti-rename-to-page_table_isolation.patch
+bugfix/all/kpti/kpti-report-when-enabled.patch
 
 # Fix ABI changes

-- 
Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/kernel/linux.git