[kernel] r16338 - in dists/sid/linux-2.6/debian: . patches/bugfix/all/stable patches/debian patches/features/all/vserver patches/features/all/xen patches/series

Mon Sep 20 23:25:09 UTC 2010

Author: benh
Date: Mon Sep 20 23:25:04 2010
New Revision: 16338

Log:
Add stable 2.6.32.22

Revert one ABI-breaking change in sched.
Hide sched_class changes from genksyms; this structure is really private.
Revert all remaining sched changes for OpenVZ and VServer.
Update context for Xen-pvops.

Added:
   dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.22.patch
   dists/sid/linux-2.6/debian/patches/debian/revert-sched-2.6.32.22-changes.patch
   dists/sid/linux-2.6/debian/patches/debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch
   dists/sid/linux-2.6/debian/patches/debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch
   dists/sid/linux-2.6/debian/patches/series/24-extra
      - copied, changed from r16335, dists/sid/linux-2.6/debian/patches/series/23-extra
Deleted:
   dists/sid/linux-2.6/debian/patches/series/23-extra
Modified:
   dists/sid/linux-2.6/debian/changelog
   dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch
   dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
   dists/sid/linux-2.6/debian/patches/series/24

Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================

--- dists/sid/linux-2.6/debian/changelog	Mon Sep 20 20:58:50 2010	(r16337)
+++ dists/sid/linux-2.6/debian/changelog	Mon Sep 20 23:25:04 2010	(r16338)
@@ -5,6 +5,9 @@
   * scsi_dh_emc: Fix mode select request setup (Closes: #591540)
   * snd-hda-codec-via: Fix syntax error when CONFIG_SND_HDA_POWER_SAVE is
     disabled (Closes: #597043)
+  * Add stable 2.6.32.22:
+    - [openvz,vserver] Revert sched changes since they conflict with
+      these featuresets
 
   [ Martin Michlmayr ]
   * ARM: update mach types.

Added: dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.22.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.22.patch	Mon Sep 20 23:25:04 2010	(r16338)
@@ -0,0 +1,4236 @@
+diff --git a/Makefile b/Makefile
+index 3e7196f..1786938 100644
+diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
+index 2c1db77..a6c66f5 100644
+--- a/arch/arm/kernel/entry-common.S
++++ b/arch/arm/kernel/entry-common.S
+@@ -382,11 +382,13 @@ ENDPROC(sys_clone_wrapper)
+ 
+ sys_sigreturn_wrapper:
+ 		add	r0, sp, #S_OFF
++		mov	why, #0		@ prevent syscall restart handling
+ 		b	sys_sigreturn
+ ENDPROC(sys_sigreturn_wrapper)
+ 
+ sys_rt_sigreturn_wrapper:
+ 		add	r0, sp, #S_OFF
++		mov	why, #0		@ prevent syscall restart handling
+ 		b	sys_rt_sigreturn
+ ENDPROC(sys_rt_sigreturn_wrapper)
+ 
+diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h
+index dfcf75b..c8662cd 100644
+--- a/arch/ia64/include/asm/compat.h
++++ b/arch/ia64/include/asm/compat.h
+@@ -198,7 +198,7 @@ ptr_to_compat(void __user *uptr)
+ }
+ 
+ static __inline__ void __user *
+-compat_alloc_user_space (long len)
++arch_compat_alloc_user_space (long len)
+ {
+ 	struct pt_regs *regs = task_pt_regs(current);
+ 	return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len);
+diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
+index 6c89228..4a746ea 100644
+--- a/arch/ia64/kernel/msi_ia64.c
++++ b/arch/ia64/kernel/msi_ia64.c
+@@ -25,7 +25,7 @@ static int ia64_set_msi_irq_affinity(unsigned int irq,
+ 	if (irq_prepare_move(irq, cpu))
+ 		return -1;
+ 
+-	read_msi_msg(irq, &msg);
++	get_cached_msi_msg(irq, &msg);
+ 
+ 	addr = msg.address_lo;
+ 	addr &= MSI_ADDR_DEST_ID_MASK;
+diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
+index fbbfb97..9ab2617 100644
+--- a/arch/ia64/sn/kernel/msi_sn.c
++++ b/arch/ia64/sn/kernel/msi_sn.c
+@@ -174,7 +174,7 @@ static int sn_set_msi_irq_affinity(unsigned int irq,
+ 	 * Release XIO resources for the old MSI PCI address
+ 	 */
+ 
+-	read_msi_msg(irq, &msg);
++	get_cached_msi_msg(irq, &msg);
+         sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+ 	pdev = sn_pdev->pdi_linux_pcidev;
+ 	provider = SN_PCIDEV_BUSPROVIDER(pdev);
+diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
+index f58aed3..27505bd 100644
+--- a/arch/mips/include/asm/compat.h
++++ b/arch/mips/include/asm/compat.h
+@@ -144,7 +144,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ 	return (u32)(unsigned long)uptr;
+ }
+ 
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ 	struct pt_regs *regs = (struct pt_regs *)
+ 		((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
+diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
+index 7f32611..7c77fa9 100644
+--- a/arch/parisc/include/asm/compat.h
++++ b/arch/parisc/include/asm/compat.h
+@@ -146,7 +146,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ 	return (u32)(unsigned long)uptr;
+ }
+ 
+-static __inline__ void __user *compat_alloc_user_space(long len)
++static __inline__ void __user *arch_compat_alloc_user_space(long len)
+ {
+ 	struct pt_regs *regs = &current->thread.regs;
+ 	return (void __user *)regs->gr[30];
+diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
+index 4774c2f..8d0fff3 100644
+--- a/arch/powerpc/include/asm/compat.h
++++ b/arch/powerpc/include/asm/compat.h
+@@ -133,7 +133,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ 	return (u32)(unsigned long)uptr;
+ }
+ 
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ 	struct pt_regs *regs = current->thread.regs;
+ 	unsigned long usp = regs->gpr[1];
+diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
+index 01a0802..0c940d3 100644
+--- a/arch/s390/include/asm/compat.h
++++ b/arch/s390/include/asm/compat.h
+@@ -180,7 +180,7 @@ static inline int is_compat_task(void)
+ 
+ #endif
+ 
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ 	unsigned long stack;
+ 
+diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
+index 0e70625..612bb38 100644
+--- a/arch/sparc/include/asm/compat.h
++++ b/arch/sparc/include/asm/compat.h
+@@ -166,7 +166,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ 	return (u32)(unsigned long)uptr;
+ }
+ 
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ 	struct pt_regs *regs = current_thread_info()->kregs;
+ 	unsigned long usp = regs->u_regs[UREG_I6];
+diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
+index 5294d84..4edd8eb 100644
+--- a/arch/x86/ia32/ia32entry.S
++++ b/arch/x86/ia32/ia32entry.S
+@@ -50,7 +50,12 @@
+ 	/*
+ 	 * Reload arg registers from stack in case ptrace changed them.
+ 	 * We don't reload %eax because syscall_trace_enter() returned
+-	 * the value it wants us to use in the table lookup.
++	 * the %rax value we should see.  Instead, we just truncate that
++	 * value to 32 bits again as we did on entry from user mode.
++	 * If it's a new value set by user_regset during entry tracing,
++	 * this matches the normal truncation of the user-mode value.
++	 * If it's -1 to make us punt the syscall, then (u32)-1 is still
++	 * an appropriately invalid value.
+ 	 */
+ 	.macro LOAD_ARGS32 offset, _r9=0
+ 	.if \_r9
+@@ -60,6 +65,7 @@
+ 	movl \offset+48(%rsp),%edx
+ 	movl \offset+56(%rsp),%esi
+ 	movl \offset+64(%rsp),%edi
++	movl %eax,%eax			/* zero extension */
+ 	.endm
+ 	
+ 	.macro CFI_STARTPROC32 simple
+@@ -153,7 +159,7 @@ ENTRY(ia32_sysenter_target)
+ 	testl  $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ 	CFI_REMEMBER_STATE
+ 	jnz  sysenter_tracesys
+-	cmpl	$(IA32_NR_syscalls-1),%eax
++	cmpq	$(IA32_NR_syscalls-1),%rax
+ 	ja	ia32_badsys
+ sysenter_do_call:
+ 	IA32_ARG_FIXUP
+@@ -195,7 +201,7 @@ sysexit_from_sys_call:
+ 	movl $AUDIT_ARCH_I386,%edi	/* 1st arg: audit arch */
+ 	call audit_syscall_entry
+ 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
+-	cmpl $(IA32_NR_syscalls-1),%eax
++	cmpq $(IA32_NR_syscalls-1),%rax
+ 	ja ia32_badsys
+ 	movl %ebx,%edi			/* reload 1st syscall arg */
+ 	movl RCX-ARGOFFSET(%rsp),%esi	/* reload 2nd syscall arg */
+@@ -248,7 +254,7 @@ sysenter_tracesys:
+ 	call	syscall_trace_enter
+ 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+ 	RESTORE_REST
+-	cmpl	$(IA32_NR_syscalls-1),%eax
++	cmpq	$(IA32_NR_syscalls-1),%rax
+ 	ja	int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
+ 	jmp	sysenter_do_call
+ 	CFI_ENDPROC
+@@ -314,7 +320,7 @@ ENTRY(ia32_cstar_target)
+ 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ 	CFI_REMEMBER_STATE
+ 	jnz   cstar_tracesys
+-	cmpl $IA32_NR_syscalls-1,%eax
++	cmpq $IA32_NR_syscalls-1,%rax
+ 	ja  ia32_badsys
+ cstar_do_call:
+ 	IA32_ARG_FIXUP 1
+@@ -367,7 +373,7 @@ cstar_tracesys:
+ 	LOAD_ARGS32 ARGOFFSET, 1  /* reload args from stack in case ptrace changed it */
+ 	RESTORE_REST
+ 	xchgl %ebp,%r9d
+-	cmpl $(IA32_NR_syscalls-1),%eax
++	cmpq $(IA32_NR_syscalls-1),%rax
+ 	ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
+ 	jmp cstar_do_call
+ END(ia32_cstar_target)
+@@ -425,7 +431,7 @@ ENTRY(ia32_syscall)
+ 	orl   $TS_COMPAT,TI_status(%r10)
+ 	testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ 	jnz ia32_tracesys
+-	cmpl $(IA32_NR_syscalls-1),%eax
++	cmpq $(IA32_NR_syscalls-1),%rax
+ 	ja ia32_badsys
+ ia32_do_call:
+ 	IA32_ARG_FIXUP
+@@ -444,7 +450,7 @@ ia32_tracesys:
+ 	call syscall_trace_enter
+ 	LOAD_ARGS32 ARGOFFSET  /* reload args from stack in case ptrace changed it */
+ 	RESTORE_REST
+-	cmpl $(IA32_NR_syscalls-1),%eax
++	cmpq $(IA32_NR_syscalls-1),%rax
+ 	ja  int_ret_from_sys_call	/* ia32_tracesys has set RAX(%rsp) */
+ 	jmp ia32_do_call
+ END(ia32_syscall)
+diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
+index 9a9c7bd..c8c9a74 100644
+--- a/arch/x86/include/asm/compat.h
++++ b/arch/x86/include/asm/compat.h
+@@ -204,7 +204,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ 	return (u32)(unsigned long)uptr;
+ }
+ 
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ 	struct pt_regs *regs = task_pt_regs(current);
+ 	return (void __user *)regs->sp - len;
+diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
+index c042729..1ca132f 100644
+--- a/arch/x86/include/asm/tsc.h
++++ b/arch/x86/include/asm/tsc.h
+@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cpu);
+ extern void check_tsc_sync_target(void);
+ 
+ extern int notsc_setup(char *);
++extern void save_sched_clock_state(void);
++extern void restore_sched_clock_state(void);
+ 
+ #endif /* _ASM_X86_TSC_H */
+diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
+index 1acd1c4..0da6495 100644
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -3338,7 +3338,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+ 
+ 	cfg = desc->chip_data;
+ 
+-	read_msi_msg_desc(desc, &msg);
++	get_cached_msi_msg_desc(desc, &msg);
+ 
+ 	msg.data &= ~MSI_DATA_VECTOR_MASK;
+ 	msg.data |= MSI_DATA_VECTOR(cfg->vector);
+diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
+index 597683a..aaefa71 100644
+--- a/arch/x86/kernel/tsc.c
++++ b/arch/x86/kernel/tsc.c
+@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+ 	local_irq_restore(flags);
+ }
+ 
++static unsigned long long cyc2ns_suspend;
++
++void save_sched_clock_state(void)
++{
++	if (!sched_clock_stable)
++		return;
++
++	cyc2ns_suspend = sched_clock();
++}
++
++/*
++ * Even on processors with invariant TSC, TSC gets reset in some the
++ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
++ * arbitrary value (still sync'd across cpu's) during resume from such sleep
++ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
++ * that sched_clock() continues from the point where it was left off during
++ * suspend.
++ */
++void restore_sched_clock_state(void)
++{
++	unsigned long long offset;
++	unsigned long flags;
++	int cpu;
++
++	if (!sched_clock_stable)
++		return;
++
++	local_irq_save(flags);
++
++	__get_cpu_var(cyc2ns_offset) = 0;
++	offset = cyc2ns_suspend - sched_clock();
++
++	for_each_possible_cpu(cpu)
++		per_cpu(cyc2ns_offset, cpu) = offset;
++
++	local_irq_restore(flags);
++}
++
+ #ifdef CONFIG_CPU_FREQ
+ 
+ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
+index c41ad50..3130a4b 100644
+--- a/arch/x86/oprofile/nmi_int.c
++++ b/arch/x86/oprofile/nmi_int.c
+@@ -518,8 +518,13 @@ static int __init init_sysfs(void)
+ 	int error;
+ 
+ 	error = sysdev_class_register(&oprofile_sysclass);
+-	if (!error)
+-		error = sysdev_register(&device_oprofile);
++	if (error)
++		return error;
++
++	error = sysdev_register(&device_oprofile);
++	if (error)
++		sysdev_class_unregister(&oprofile_sysclass);
++
+ 	return error;
+ }
+ 
+@@ -530,8 +535,10 @@ static void exit_sysfs(void)
+ }
+ 
+ #else
+-#define init_sysfs() do { } while (0)
+-#define exit_sysfs() do { } while (0)
++
++static inline int  init_sysfs(void) { return 0; }
++static inline void exit_sysfs(void) { }
++
+ #endif /* CONFIG_PM */
+ 
+ static int __init p4_init(char **cpu_type)
+@@ -645,6 +652,8 @@ int __init op_nmi_init(struct oprofile_operations *ops)
+ 	char *cpu_type = NULL;
+ 	int ret = 0;
+ 
++	using_nmi = 0;
++
+ 	if (!cpu_has_apic)
+ 		return -ENODEV;
+ 
+@@ -727,7 +736,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
+ 
+ 	mux_init(ops);
+ 
+-	init_sysfs();
++	ret = init_sysfs();
++	if (ret)
++		return ret;
++
+ 	using_nmi = 1;
+ 	printk(KERN_INFO "oprofile: using NMI interrupt.\n");
+ 	return 0;
+diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
+index eeeb522..fa0f651 100644
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -112,6 +112,7 @@ static void __save_processor_state(struct saved_context *ctxt)
+ void save_processor_state(void)
+ {
+ 	__save_processor_state(&saved_context);
++	save_sched_clock_state();
+ }
+ #ifdef CONFIG_X86_32
+ EXPORT_SYMBOL(save_processor_state);
+@@ -253,6 +254,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
+ void restore_processor_state(void)
+ {
+ 	__restore_processor_state(&saved_context);
++	restore_sched_clock_state();
+ }
+ #ifdef CONFIG_X86_32
+ EXPORT_SYMBOL(restore_processor_state);
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index 6a96da6..0963cd6 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -5504,6 +5504,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
+  */
+ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
+ {
++	unsigned int ehi_flags = ATA_EHI_QUIET;
+ 	int rc;
+ 
+ 	/*
+@@ -5512,7 +5513,18 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
+ 	 */
+ 	ata_lpm_enable(host);
+ 
+-	rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1);
++	/*
++	 * On some hardware, device fails to respond after spun down
++	 * for suspend.  As the device won't be used before being
++	 * resumed, we don't need to touch the device.  Ask EH to skip
++	 * the usual stuff and proceed directly to suspend.
++	 *
++	 * http://thread.gmane.org/gmane.linux.ide/46764
++	 */
++	if (mesg.event == PM_EVENT_SUSPEND)
++		ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY;
++
++	rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1);
+ 	if (rc == 0)
+ 		host->dev->power.power_state = mesg;
+ 	return rc;
+diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
+index e30b9e7..fa9bed0 100644
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -3149,6 +3149,10 @@ static int ata_eh_skip_recovery(struct ata_link *link)
+ 	if (link->flags & ATA_LFLAG_DISABLED)
+ 		return 1;
+ 
++	/* skip if explicitly requested */
++	if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
++		return 1;
++
+ 	/* thaw frozen port and recover failed devices */
+ 	if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
+ 		return 0;
+diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
+index 6f5093b..cf41126 100644
+--- a/drivers/ata/sata_mv.c
++++ b/drivers/ata/sata_mv.c
+@@ -1879,19 +1879,25 @@ static void mv_bmdma_start(struct ata_queued_cmd *qc)
+  *	LOCKING:
+  *	Inherited from caller.
+  */
+-static void mv_bmdma_stop(struct ata_queued_cmd *qc)
++static void mv_bmdma_stop_ap(struct ata_port *ap)
+ {
+-	struct ata_port *ap = qc->ap;
+ 	void __iomem *port_mmio = mv_ap_base(ap);
+ 	u32 cmd;
+ 
+ 	/* clear start/stop bit */
+ 	cmd = readl(port_mmio + BMDMA_CMD);
+-	cmd &= ~ATA_DMA_START;
+-	writelfl(cmd, port_mmio + BMDMA_CMD);
++	if (cmd & ATA_DMA_START) {
++		cmd &= ~ATA_DMA_START;
++		writelfl(cmd, port_mmio + BMDMA_CMD);
++
++		/* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
++		ata_sff_dma_pause(ap);
++	}
++}
+ 
+-	/* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
+-	ata_sff_dma_pause(ap);
++static void mv_bmdma_stop(struct ata_queued_cmd *qc)
++{
++	mv_bmdma_stop_ap(qc->ap);
+ }
+ 
+ /**
+@@ -1915,8 +1921,21 @@ static u8 mv_bmdma_status(struct ata_port *ap)
+ 	reg = readl(port_mmio + BMDMA_STATUS);
+ 	if (reg & ATA_DMA_ACTIVE)
+ 		status = ATA_DMA_ACTIVE;
+-	else
++	else if (reg & ATA_DMA_ERR)
+ 		status = (reg & ATA_DMA_ERR) | ATA_DMA_INTR;
++	else {
++		/*
++		 * Just because DMA_ACTIVE is 0 (DMA completed),
++		 * this does _not_ mean the device is "done".
++		 * So we should not yet be signalling ATA_DMA_INTR
++		 * in some cases.  Eg. DSM/TRIM, and perhaps others.
++		 */
++		mv_bmdma_stop_ap(ap);
++		if (ioread8(ap->ioaddr.altstatus_addr) & ATA_BUSY)
++			status = 0;
++		else
++			status = ATA_DMA_INTR;
++	}
+ 	return status;
+ }
+ 
+@@ -1976,6 +1995,9 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
+ 
+ 	switch (tf->protocol) {
+ 	case ATA_PROT_DMA:
++		if (tf->command == ATA_CMD_DSM)
++			return;
++		/* fall-thru */
+ 	case ATA_PROT_NCQ:
+ 		break;	/* continue below */
+ 	case ATA_PROT_PIO:
+@@ -2075,6 +2097,8 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
+ 	if ((tf->protocol != ATA_PROT_DMA) &&
+ 	    (tf->protocol != ATA_PROT_NCQ))
+ 		return;
++	if (tf->command == ATA_CMD_DSM)
++		return;  /* use bmdma for this */
+ 
+ 	/* Fill in Gen IIE command request block */
+ 	if (!(tf->flags & ATA_TFLAG_WRITE))
+@@ -2270,6 +2294,12 @@ static unsigned int mv_qc_issue(struct ata_queued_cmd *qc)
+ 
+ 	switch (qc->tf.protocol) {
+ 	case ATA_PROT_DMA:
++		if (qc->tf.command == ATA_CMD_DSM) {
++			if (!ap->ops->bmdma_setup)  /* no bmdma on GEN_I */
++				return AC_ERR_OTHER;
++			break;  /* use bmdma for this */
++		}
++		/* fall thru */
+ 	case ATA_PROT_NCQ:
+ 		mv_start_edma(ap, port_mmio, pp, qc->tf.protocol);
+ 		pp->req_idx = (pp->req_idx + 1) & MV_MAX_Q_DEPTH_MASK;
+diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
+index 08173fc..1b8745d 100644
+diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
+index 2680db7..c3aca5c 100644
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index 176a6df..3ada62b 100644
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 4f5c733..79cc437 100644
+diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
+index 1ca6574..e9add5b 100644
+--- a/drivers/hid/usbhid/hid-core.c
++++ b/drivers/hid/usbhid/hid-core.c
+@@ -1000,16 +1000,6 @@ static int usbhid_start(struct hid_device *hid)
+ 		}
+ 	}
+ 
+-	init_waitqueue_head(&usbhid->wait);
+-	INIT_WORK(&usbhid->reset_work, hid_reset);
+-	INIT_WORK(&usbhid->restart_work, __usbhid_restart_queues);
+-	setup_timer(&usbhid->io_retry, hid_retry_timeout, (unsigned long) hid);
+-
+-	spin_lock_init(&usbhid->lock);
+-
+-	usbhid->intf = intf;
+-	usbhid->ifnum = interface->desc.bInterfaceNumber;
+-
+ 	usbhid->urbctrl = usb_alloc_urb(0, GFP_KERNEL);
+ 	if (!usbhid->urbctrl) {
+ 		ret = -ENOMEM;
+@@ -1180,6 +1170,14 @@ static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *
+ 
+ 	hid->driver_data = usbhid;
+ 	usbhid->hid = hid;
++	usbhid->intf = intf;
++	usbhid->ifnum = interface->desc.bInterfaceNumber;
++
++	init_waitqueue_head(&usbhid->wait);
++	INIT_WORK(&usbhid->reset_work, hid_reset);
++	INIT_WORK(&usbhid->restart_work, __usbhid_restart_queues);
++	setup_timer(&usbhid->io_retry, hid_retry_timeout, (unsigned long) hid);
++	spin_lock_init(&usbhid->lock);
+ 
+ 	ret = hid_add_device(hid);
+ 	if (ret) {
+diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c
+index e2107e5..afebc34 100644
+--- a/drivers/hwmon/f75375s.c
++++ b/drivers/hwmon/f75375s.c
+@@ -79,7 +79,7 @@ I2C_CLIENT_INSMOD_2(f75373, f75375);
+ #define F75375_REG_PWM2_DROP_DUTY	0x6C
+ 
+ #define FAN_CTRL_LINEAR(nr)		(4 + nr)
+-#define FAN_CTRL_MODE(nr)		(5 + ((nr) * 2))
++#define FAN_CTRL_MODE(nr)		(4 + ((nr) * 2))
+ 
+ /*
+  * Data structures and manipulation thereof
+@@ -298,7 +298,7 @@ static int set_pwm_enable_direct(struct i2c_client *client, int nr, int val)
+ 		return -EINVAL;
+ 
+ 	fanmode = f75375_read8(client, F75375_REG_FAN_TIMER);
+-	fanmode = ~(3 << FAN_CTRL_MODE(nr));
++	fanmode &= ~(3 << FAN_CTRL_MODE(nr));
+ 
+ 	switch (val) {
+ 	case 0: /* Full speed */
+@@ -350,7 +350,7 @@ static ssize_t set_pwm_mode(struct device *dev, struct device_attribute *attr,
+ 
+ 	mutex_lock(&data->update_lock);
+ 	conf = f75375_read8(client, F75375_REG_CONFIG1);
+-	conf = ~(1 << FAN_CTRL_LINEAR(nr));
++	conf &= ~(1 << FAN_CTRL_LINEAR(nr));
+ 
+ 	if (val == 0)
+ 		conf |= (1 << FAN_CTRL_LINEAR(nr)) ;
+diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c
+index f808d18..4f84d1a 100644
+--- a/drivers/hwmon/k8temp.c
++++ b/drivers/hwmon/k8temp.c
+@@ -143,6 +143,37 @@ static struct pci_device_id k8temp_ids[] = {
+ 
+ MODULE_DEVICE_TABLE(pci, k8temp_ids);
+ 
++static int __devinit is_rev_g_desktop(u8 model)
++{
++	u32 brandidx;
++
++	if (model < 0x69)
++		return 0;
++
++	if (model == 0xc1 || model == 0x6c || model == 0x7c)
++		return 0;
++
++	/*
++	 * Differentiate between AM2 and ASB1.
++	 * See "Constructing the processor Name String" in "Revision
++	 * Guide for AMD NPT Family 0Fh Processors" (33610).
++	 */
++	brandidx = cpuid_ebx(0x80000001);
++	brandidx = (brandidx >> 9) & 0x1f;
++
++	/* Single core */
++	if ((model == 0x6f || model == 0x7f) &&
++	    (brandidx == 0x7 || brandidx == 0x9 || brandidx == 0xc))
++		return 0;
++
++	/* Dual core */
++	if (model == 0x6b &&
++	    (brandidx == 0xb || brandidx == 0xc))
++		return 0;
++
++	return 1;
++}
++
+ static int __devinit k8temp_probe(struct pci_dev *pdev,
+ 				  const struct pci_device_id *id)
+ {
+@@ -179,9 +210,7 @@ static int __devinit k8temp_probe(struct pci_dev *pdev,
+ 				 "wrong - check erratum #141\n");
+ 		}
+ 
+-		if ((model >= 0x69) &&
+-		    !(model == 0xc1 || model == 0x6c || model == 0x7c ||
+-		      model == 0x6b || model == 0x6f || model == 0x7f)) {
++		if (is_rev_g_desktop(model)) {
+ 			/*
+ 			 * RevG desktop CPUs (i.e. no socket S1G1 or
+ 			 * ASB1 parts) need additional offset,
+diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
+index 1df02d2..16f5ab2 100644
+--- a/drivers/input/serio/i8042.c
++++ b/drivers/input/serio/i8042.c
+@@ -1412,8 +1412,8 @@ static int __init i8042_init(void)
+ 
+ static void __exit i8042_exit(void)
+ {
+-	platform_driver_unregister(&i8042_driver);
+ 	platform_device_unregister(i8042_platform_device);
++	platform_driver_unregister(&i8042_driver);
+ 	i8042_platform_exit();
+ 
+ 	panic_blink = NULL;
+diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
+index 91991b4..f43edfd 100644
+--- a/drivers/mmc/host/tmio_mmc.c
++++ b/drivers/mmc/host/tmio_mmc.c
+@@ -161,6 +161,7 @@ tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
+ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
+ {
+ 	struct mmc_data *data = host->data;
++	void *sg_virt;
+ 	unsigned short *buf;
+ 	unsigned int count;
+ 	unsigned long flags;
+@@ -170,8 +171,8 @@ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
+ 		return;
+ 	}
+ 
+-	buf = (unsigned short *)(tmio_mmc_kmap_atomic(host, &flags) +
+-	      host->sg_off);
++	sg_virt = tmio_mmc_kmap_atomic(host->sg_ptr, &flags);
++	buf = (unsigned short *)(sg_virt + host->sg_off);
+ 
+ 	count = host->sg_ptr->length - host->sg_off;
+ 	if (count > data->blksz)
+@@ -188,7 +189,7 @@ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
+ 
+ 	host->sg_off += count;
+ 
+-	tmio_mmc_kunmap_atomic(host, &flags);
++	tmio_mmc_kunmap_atomic(sg_virt, &flags);
+ 
+ 	if (host->sg_off == host->sg_ptr->length)
+ 		tmio_mmc_next_sg(host);
+diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
+index 9fa9985..ee8fa89 100644
+--- a/drivers/mmc/host/tmio_mmc.h
++++ b/drivers/mmc/host/tmio_mmc.h
+@@ -102,10 +102,7 @@
+ 
+ #define ack_mmc_irqs(host, i) \
+ 	do { \
+-		u32 mask;\
+-		mask  = sd_ctrl_read32((host), CTL_STATUS); \
+-		mask &= ~((i) & TMIO_MASK_IRQ); \
+-		sd_ctrl_write32((host), CTL_STATUS, mask); \
++		sd_ctrl_write32((host), CTL_STATUS, ~(i)); \
+ 	} while (0)
+ 
+ 
+@@ -200,19 +197,17 @@ static inline int tmio_mmc_next_sg(struct tmio_mmc_host *host)
+ 	return --host->sg_len;
+ }
+ 
+-static inline char *tmio_mmc_kmap_atomic(struct tmio_mmc_host *host,
++static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg,
+ 	unsigned long *flags)
+ {
+-	struct scatterlist *sg = host->sg_ptr;
+-
+ 	local_irq_save(*flags);
+ 	return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+ }
+ 
+-static inline void tmio_mmc_kunmap_atomic(struct tmio_mmc_host *host,
++static inline void tmio_mmc_kunmap_atomic(void *virt,
+ 	unsigned long *flags)
+ {
+-	kunmap_atomic(sg_page(host->sg_ptr), KM_BIO_SRC_IRQ);
++	kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+ 	local_irq_restore(*flags);
+ }
+ 
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 4fdfa2a..0f77aca 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1006,7 +1006,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
+ 		if (err < 0)
+ 			goto err_free_sk;
+ 
+-		if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
++		if (!net_eq(dev_net(tun->dev), &init_net) ||
++		    device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
+ 		    device_create_file(&tun->dev->dev, &dev_attr_owner) ||
+ 		    device_create_file(&tun->dev->dev, &dev_attr_group))
+ 			printk(KERN_ERR "Failed to create tun sysfs files\n");
+diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
+index ce166ae..2c4914a 100644
+--- a/drivers/net/wireless/ath/ath5k/base.c
++++ b/drivers/net/wireless/ath/ath5k/base.c
+@@ -1288,6 +1288,10 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
+ 			PCI_DMA_TODEVICE);
+ 
+ 	rate = ieee80211_get_tx_rate(sc->hw, info);
++	if (!rate) {
++		ret = -EINVAL;
++		goto err_unmap;
++	}
+ 
+ 	if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+ 		flags |= AR5K_TXDESC_NOACK;
+diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h
+index 4fe33f7..a5daa0d 100644
+--- a/drivers/net/wireless/ath/ath9k/eeprom.h
++++ b/drivers/net/wireless/ath/ath9k/eeprom.h
+@@ -60,7 +60,7 @@
+ 
+ #define SD_NO_CTL               0xE0
+ #define NO_CTL                  0xff
+-#define CTL_MODE_M              7
++#define CTL_MODE_M              0xf
+ #define CTL_11A                 0
+ #define CTL_11B                 1
+ #define CTL_11G                 2
+diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
+index c1dd857..21cf521 100644
+--- a/drivers/net/wireless/ath/regd.h
++++ b/drivers/net/wireless/ath/regd.h
+@@ -31,7 +31,6 @@ enum ctl_group {
+ #define NO_CTL 0xff
+ #define SD_NO_CTL               0xE0
+ #define NO_CTL                  0xff
+-#define CTL_MODE_M              7
+ #define CTL_11A                 0
+ #define CTL_11B                 1
+ #define CTL_11G                 2
+diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
+index 9d147de..0edd7b4 100644
+--- a/drivers/net/wireless/p54/txrx.c
++++ b/drivers/net/wireless/p54/txrx.c
+@@ -445,7 +445,7 @@ static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb)
+ 	}
+ 
+ 	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) &&
+-	     (!payload->status))
++	     !(payload->status & P54_TX_FAILED))
+ 		info->flags |= IEEE80211_TX_STAT_ACK;
+ 	if (payload->status & P54_TX_PSM_CANCELLED)
+ 		info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
+diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
+index c9e2ae9..5c4df24 100644
+--- a/drivers/oprofile/buffer_sync.c
++++ b/drivers/oprofile/buffer_sync.c
+@@ -140,16 +140,6 @@ static struct notifier_block module_load_nb = {
+ 	.notifier_call = module_load_notify,
+ };
+ 
+-
+-static void end_sync(void)
+-{
+-	end_cpu_work();
+-	/* make sure we don't leak task structs */
+-	process_task_mortuary();
+-	process_task_mortuary();
+-}
+-
+-
+ int sync_start(void)
+ {
+ 	int err;
+@@ -157,7 +147,7 @@ int sync_start(void)
+ 	if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
+ 		return -ENOMEM;
+ 
+-	start_cpu_work();
++	mutex_lock(&buffer_mutex);
+ 
+ 	err = task_handoff_register(&task_free_nb);
+ 	if (err)
+@@ -172,7 +162,10 @@ int sync_start(void)
+ 	if (err)
+ 		goto out4;
+ 
++	start_cpu_work();
++
+ out:
++	mutex_unlock(&buffer_mutex);
+ 	return err;
+ out4:
+ 	profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
+@@ -181,7 +174,6 @@ out3:
+ out2:
+ 	task_handoff_unregister(&task_free_nb);
+ out1:
+-	end_sync();
+ 	free_cpumask_var(marked_cpus);
+ 	goto out;
+ }
+@@ -189,11 +181,20 @@ out1:
+ 
+ void sync_stop(void)
+ {
++	/* flush buffers */
++	mutex_lock(&buffer_mutex);
++	end_cpu_work();
+ 	unregister_module_notifier(&module_load_nb);
+ 	profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
+ 	profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
+ 	task_handoff_unregister(&task_free_nb);
+-	end_sync();
++	mutex_unlock(&buffer_mutex);
++	flush_scheduled_work();
++
++	/* make sure we don't leak task structs */
++	process_task_mortuary();
++	process_task_mortuary();
++
+ 	free_cpumask_var(marked_cpus);
+ }
+ 
+diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
+index 1f1f5a8..5e2ac4a 100644
+--- a/drivers/oprofile/cpu_buffer.c
++++ b/drivers/oprofile/cpu_buffer.c
+@@ -121,8 +121,6 @@ void end_cpu_work(void)
+ 
+ 		cancel_delayed_work(&b->work);
+ 	}
+-
+-	flush_scheduled_work();
+ }
+ 
+ /*
+diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
+index f9cf317..0fb1d05 100644
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -195,6 +195,9 @@ void unmask_msi_irq(unsigned int irq)
+ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+ {
+ 	struct msi_desc *entry = get_irq_desc_msi(desc);
++
++	BUG_ON(entry->dev->current_state != PCI_D0);
++
+ 	if (entry->msi_attrib.is_msix) {
+ 		void __iomem *base = entry->mask_base +
+ 			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+@@ -228,10 +231,32 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+ 	read_msi_msg_desc(desc, msg);
+ }
+ 
++void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
++{
++	struct msi_desc *entry = get_irq_desc_msi(desc);
++
++	/* Assert that the cache is valid, assuming that
++	 * valid messages are not all-zeroes. */
++	BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
++		 entry->msg.data));
++
++	*msg = entry->msg;
++}
++
++void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
++{
++	struct irq_desc *desc = irq_to_desc(irq);
++
++	get_cached_msi_msg_desc(desc, msg);
++}
++
+ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+ {
+ 	struct msi_desc *entry = get_irq_desc_msi(desc);
+-	if (entry->msi_attrib.is_msix) {
++
++	if (entry->dev->current_state != PCI_D0) {
++		/* Don't touch the hardware now */
++	} else if (entry->msi_attrib.is_msix) {
+ 		void __iomem *base;
+ 		base = entry->mask_base +
+ 			entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+diff --git a/drivers/power/apm_power.c b/drivers/power/apm_power.c
+index 936bae5..dc628cb 100644
+--- a/drivers/power/apm_power.c
++++ b/drivers/power/apm_power.c
+@@ -233,6 +233,7 @@ static int calculate_capacity(enum apm_source source)
+ 		empty_design_prop = POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN;
+ 		now_prop = POWER_SUPPLY_PROP_ENERGY_NOW;
+ 		avg_prop = POWER_SUPPLY_PROP_ENERGY_AVG;
++		break;
+ 	case SOURCE_VOLTAGE:
+ 		full_prop = POWER_SUPPLY_PROP_VOLTAGE_MAX;
+ 		empty_prop = POWER_SUPPLY_PROP_VOLTAGE_MIN;
+diff --git a/drivers/staging/hv/RingBuffer.c b/drivers/staging/hv/RingBuffer.c
+index f69ae33..3a38103 100644
+--- a/drivers/staging/hv/RingBuffer.c
++++ b/drivers/staging/hv/RingBuffer.c
+@@ -192,7 +192,7 @@ Description:
+ static inline u64
+ GetRingBufferIndices(RING_BUFFER_INFO* RingInfo)
+ {
+-	return ((u64)RingInfo->RingBuffer->WriteIndex << 32) || RingInfo->RingBuffer->ReadIndex;
++	return (u64)RingInfo->RingBuffer->WriteIndex << 32;
+ }
+ 
+ 
+diff --git a/drivers/staging/hv/StorVscApi.h b/drivers/staging/hv/StorVscApi.h
+index 69c1406..3d8ff08 100644
+--- a/drivers/staging/hv/StorVscApi.h
++++ b/drivers/staging/hv/StorVscApi.h
+@@ -28,10 +28,10 @@
+ #include "VmbusApi.h"
+ 
+ /* Defines */
+-#define STORVSC_RING_BUFFER_SIZE			(10*PAGE_SIZE)
++#define STORVSC_RING_BUFFER_SIZE			(20*PAGE_SIZE)
+ #define BLKVSC_RING_BUFFER_SIZE				(20*PAGE_SIZE)
+ 
+-#define STORVSC_MAX_IO_REQUESTS				64
++#define STORVSC_MAX_IO_REQUESTS				128
+ 
+ /*
+  * In Hyper-V, each port/path/target maps to 1 scsi host adapter.  In
+diff --git a/drivers/staging/hv/netvsc_drv.c b/drivers/staging/hv/netvsc_drv.c
+index 4c3c8bc..547261d 100644
+--- a/drivers/staging/hv/netvsc_drv.c
++++ b/drivers/staging/hv/netvsc_drv.c
+@@ -392,6 +392,9 @@ static const struct net_device_ops device_ops = {
+ 	.ndo_start_xmit =		netvsc_start_xmit,
+ 	.ndo_get_stats =		netvsc_get_stats,
+ 	.ndo_set_multicast_list =	netvsc_set_multicast_list,
++	.ndo_change_mtu =		eth_change_mtu,
++	.ndo_validate_addr =		eth_validate_addr,
++	.ndo_set_mac_address =		eth_mac_addr,
+ };
+ 
+ static int netvsc_probe(struct device *device)
+diff --git a/drivers/staging/hv/storvsc_drv.c b/drivers/staging/hv/storvsc_drv.c
+index d49dc21..2a4b147 100644
+--- a/drivers/staging/hv/storvsc_drv.c
++++ b/drivers/staging/hv/storvsc_drv.c
+@@ -532,7 +532,7 @@ static unsigned int copy_to_bounce_buffer(struct scatterlist *orig_sgl,
+ 
+ 		ASSERT(orig_sgl[i].offset + orig_sgl[i].length <= PAGE_SIZE);
+ 
+-		if (j == 0)
++		if (bounce_addr == 0)
+ 			bounce_addr = (unsigned long)kmap_atomic(sg_page((&bounce_sgl[j])), KM_IRQ0);
+ 
+ 		while (srclen) {
+@@ -593,7 +593,7 @@ static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl,
+ 		destlen = orig_sgl[i].length;
+ 		ASSERT(orig_sgl[i].offset + orig_sgl[i].length <= PAGE_SIZE);
+ 
+-		if (j == 0)
++		if (bounce_addr == 0)
+ 			bounce_addr = (unsigned long)kmap_atomic(sg_page((&bounce_sgl[j])), KM_IRQ0);
+ 
+ 		while (destlen) {
+@@ -652,6 +652,7 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
+ 	unsigned int request_size = 0;
+ 	int i;
+ 	struct scatterlist *sgl;
++	unsigned int sg_count = 0;
+ 
+ 	DPRINT_ENTER(STORVSC_DRV);
+ 
+@@ -736,6 +737,7 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
+ 	request->DataBuffer.Length = scsi_bufflen(scmnd);
+ 	if (scsi_sg_count(scmnd)) {
+ 		sgl = (struct scatterlist *)scsi_sglist(scmnd);
++		sg_count = scsi_sg_count(scmnd);
+ 
+ 		/* check if we need to bounce the sgl */
+ 		if (do_bounce_buffer(sgl, scsi_sg_count(scmnd)) != -1) {
+@@ -770,11 +772,12 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
+ 					      scsi_sg_count(scmnd));
+ 
+ 			sgl = cmd_request->bounce_sgl;
++			sg_count = cmd_request->bounce_sgl_count;
+ 		}
+ 
+ 		request->DataBuffer.Offset = sgl[0].offset;
+ 
+-		for (i = 0; i < scsi_sg_count(scmnd); i++) {
++		for (i = 0; i < sg_count; i++) {
+ 			DPRINT_DBG(STORVSC_DRV, "sgl[%d] len %d offset %d \n",
+ 				   i, sgl[i].length, sgl[i].offset);
+ 			request->DataBuffer.PfnArray[i] =
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 0e64037..e3017c4 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -971,7 +971,8 @@ static int acm_probe(struct usb_interface *intf,
+ 	}
+ 
+ 	if (!buflen) {
+-		if (intf->cur_altsetting->endpoint->extralen &&
++		if (intf->cur_altsetting->endpoint &&
++				intf->cur_altsetting->endpoint->extralen &&
+ 				intf->cur_altsetting->endpoint->extra) {
+ 			dev_dbg(&intf->dev,
+ 				"Seeking extra descriptors on endpoint\n");
+@@ -1464,6 +1465,17 @@ err_out:
+ }
+ 
+ #endif /* CONFIG_PM */
++
++#define NOKIA_PCSUITE_ACM_INFO(x) \
++		USB_DEVICE_AND_INTERFACE_INFO(0x0421, x, \
++		USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \
++		USB_CDC_ACM_PROTO_VENDOR)
++
++#define SAMSUNG_PCSUITE_ACM_INFO(x) \
++		USB_DEVICE_AND_INTERFACE_INFO(0x04e7, x, \
++		USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \
++		USB_CDC_ACM_PROTO_VENDOR)
++
+ /*
+  * USB driver structure.
+  */
+@@ -1521,6 +1533,76 @@ static struct usb_device_id acm_ids[] = {
+ 	{ USB_DEVICE(0x1bbb, 0x0003), /* Alcatel OT-I650 */
+ 	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
+ 	},
++	{ USB_DEVICE(0x1576, 0x03b1), /* Maretron USB100 */
++	.driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
++	},
++
++	/* Nokia S60 phones expose two ACM channels. The first is
++	 * a modem and is picked up by the standard AT-command
++	 * information below. The second is 'vendor-specific' but
++	 * is treated as a serial device at the S60 end, so we want
++	 * to expose it on Linux too. */
++	{ NOKIA_PCSUITE_ACM_INFO(0x042D), }, /* Nokia 3250 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x04D8), }, /* Nokia 5500 Sport */
++	{ NOKIA_PCSUITE_ACM_INFO(0x04C9), }, /* Nokia E50 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0419), }, /* Nokia E60 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x044D), }, /* Nokia E61 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0001), }, /* Nokia E61i */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0475), }, /* Nokia E62 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0508), }, /* Nokia E65 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0418), }, /* Nokia E70 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0425), }, /* Nokia N71 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0486), }, /* Nokia N73 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x04DF), }, /* Nokia N75 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x000e), }, /* Nokia N77 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0445), }, /* Nokia N80 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x042F), }, /* Nokia N91 & N91 8GB */
++	{ NOKIA_PCSUITE_ACM_INFO(0x048E), }, /* Nokia N92 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0420), }, /* Nokia N93 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x04E6), }, /* Nokia N93i  */
++	{ NOKIA_PCSUITE_ACM_INFO(0x04B2), }, /* Nokia 5700 XpressMusic */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0134), }, /* Nokia 6110 Navigator (China) */
++	{ NOKIA_PCSUITE_ACM_INFO(0x046E), }, /* Nokia 6110 Navigator */
++	{ NOKIA_PCSUITE_ACM_INFO(0x002f), }, /* Nokia 6120 classic &  */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0088), }, /* Nokia 6121 classic */
++	{ NOKIA_PCSUITE_ACM_INFO(0x00fc), }, /* Nokia 6124 classic */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0042), }, /* Nokia E51 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x00b0), }, /* Nokia E66 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x00ab), }, /* Nokia E71 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0481), }, /* Nokia N76 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0007), }, /* Nokia N81 & N81 8GB */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0071), }, /* Nokia N82 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x04F0), }, /* Nokia N95 & N95-3 NAM */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0070), }, /* Nokia N95 8GB  */
++	{ NOKIA_PCSUITE_ACM_INFO(0x00e9), }, /* Nokia 5320 XpressMusic */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0099), }, /* Nokia 6210 Navigator, RM-367 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0128), }, /* Nokia 6210 Navigator, RM-419 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x008f), }, /* Nokia 6220 Classic */
++	{ NOKIA_PCSUITE_ACM_INFO(0x00a0), }, /* Nokia 6650 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x007b), }, /* Nokia N78 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0094), }, /* Nokia N85 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x003a), }, /* Nokia N96 & N96-3  */
++	{ NOKIA_PCSUITE_ACM_INFO(0x00e9), }, /* Nokia 5320 XpressMusic */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0108), }, /* Nokia 5320 XpressMusic 2G */
++	{ NOKIA_PCSUITE_ACM_INFO(0x01f5), }, /* Nokia N97, RM-505 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x02e3), }, /* Nokia 5230, RM-588 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0178), }, /* Nokia E63 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x010e), }, /* Nokia E75 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x02d9), }, /* Nokia 6760 Slide */
++	{ NOKIA_PCSUITE_ACM_INFO(0x01d0), }, /* Nokia E52 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0223), }, /* Nokia E72 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0275), }, /* Nokia X6 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x026c), }, /* Nokia N97 Mini */
++	{ NOKIA_PCSUITE_ACM_INFO(0x0154), }, /* Nokia 5800 XpressMusic */
++	{ NOKIA_PCSUITE_ACM_INFO(0x04ce), }, /* Nokia E90 */
++	{ NOKIA_PCSUITE_ACM_INFO(0x01d4), }, /* Nokia E55 */
++	{ SAMSUNG_PCSUITE_ACM_INFO(0x6651), }, /* Samsung GTi8510 (INNOV8) */
++
++	/* NOTE: non-Nokia COMM/ACM/0xff is likely MSFT RNDIS... NOT a modem! */
++
++	/* control interfaces without any protocol set */
++	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
++		USB_CDC_PROTO_NONE) },
+ 
+ 	/* control interfaces with various AT-command sets */
+ 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
+@@ -1536,7 +1618,6 @@ static struct usb_device_id acm_ids[] = {
+ 	{ USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
+ 		USB_CDC_ACM_PROTO_AT_CDMA) },
+ 
+-	/* NOTE:  COMM/ACM/0xff is likely MSFT RNDIS ... NOT a modem!! */
+ 	{ }
+ };
+ 
+diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
+index 48267bc..33ac6ac 100644
+--- a/drivers/usb/gadget/rndis.c
++++ b/drivers/usb/gadget/rndis.c
+@@ -291,9 +291,13 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
+ 	/* mandatory */
+ 	case OID_GEN_VENDOR_DESCRIPTION:
+ 		pr_debug("%s: OID_GEN_VENDOR_DESCRIPTION\n", __func__);
+-		length = strlen (rndis_per_dev_params [configNr].vendorDescr);
+-		memcpy (outbuf,
+-			rndis_per_dev_params [configNr].vendorDescr, length);
++		if ( rndis_per_dev_params [configNr].vendorDescr ) {
++			length = strlen (rndis_per_dev_params [configNr].vendorDescr);
++			memcpy (outbuf,
++				rndis_per_dev_params [configNr].vendorDescr, length);
++		} else {
++			outbuf[0] = 0;
++		}
+ 		retval = 0;
+ 		break;
+ 
+diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c
+index 36f96da..ab26c2b 100644
+--- a/drivers/usb/host/ehci-ppc-of.c
++++ b/drivers/usb/host/ehci-ppc-of.c
+@@ -192,17 +192,19 @@ ehci_hcd_ppc_of_probe(struct of_device *op, const struct of_device_id *match)
+ 	}
+ 
+ 	rv = usb_add_hcd(hcd, irq, 0);
+-	if (rv == 0)
+-		return 0;
++	if (rv)
++		goto err_ehci;
++
++	return 0;
+ 
++err_ehci:
++	if (ehci->has_amcc_usb23)
++		iounmap(ehci->ohci_hcctrl_reg);
+ 	iounmap(hcd->regs);
+ err_ioremap:
+ 	irq_dispose_mapping(irq);
+ err_irq:
+ 	release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
+-
+-	if (ehci->has_amcc_usb23)
+-		iounmap(ehci->ohci_hcctrl_reg);
+ err_rmr:
+ 	usb_put_hcd(hcd);
+ 
+diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
+index 99bde5f..93c4923 100644
+--- a/drivers/usb/serial/cp210x.c
++++ b/drivers/usb/serial/cp210x.c
+@@ -90,6 +90,7 @@ static struct usb_device_id id_table [] = {
+ 	{ USB_DEVICE(0x10C4, 0x8149) }, /* West Mountain Radio Computerized Battery Analyzer */
+ 	{ USB_DEVICE(0x10C4, 0x814A) }, /* West Mountain Radio RIGblaster P&P */
+ 	{ USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */
++	{ USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */
+ 	{ USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */
+ 	{ USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */
+ 	{ USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */
+@@ -111,6 +112,7 @@ static struct usb_device_id id_table [] = {
+ 	{ USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
+ 	{ USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */
+ 	{ USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
++	{ USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
+ 	{ USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
+ 	{ USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
+ 	{ USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */
+@@ -124,14 +126,14 @@ static struct usb_device_id id_table [] = {
+ 	{ USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
+ 	{ USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
+ 	{ USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
+-	{ USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
+-	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
+-	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+-	{ USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
+ 	{ USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */
+ 	{ USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */
+ 	{ USB_DEVICE(0x16DC, 0x0012) }, /* W-IE-NE-R Plein & Baus GmbH MPOD Multi Channel Power Supply */
+ 	{ USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
++	{ USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
++	{ USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
++	{ USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
++	{ USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
+ 	{ } /* Terminating Entry */
+ };
+ 
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index 813ec3d..a7044b1 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -759,6 +759,14 @@ static struct usb_device_id id_table_combined [] = {
+ 	{ USB_DEVICE(FTDI_VID, SEGWAY_RMP200_PID) },
+ 	{ USB_DEVICE(IONICS_VID, IONICS_PLUGCOMPUTER_PID),
+ 		.driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_24_MASTER_WING_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_PC_WING_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_USB_DMX_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MIDI_TIMECODE_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MINI_WING_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MAXI_WING_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MEDIA_WING_PID) },
++	{ USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_WING_PID) },
+ 	{ },					/* Optional parameter entry */
+ 	{ }					/* Terminating entry */
+ };
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index 52c3b68..30d3011 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -135,6 +135,18 @@
+ #define FTDI_NDI_AURORA_SCU_PID		0xDA74	/* NDI Aurora SCU */
+ 
+ /*
++ * ChamSys Limited (www.chamsys.co.uk) USB wing/interface product IDs
++ */
++#define FTDI_CHAMSYS_24_MASTER_WING_PID        0xDAF8
++#define FTDI_CHAMSYS_PC_WING_PID       0xDAF9
++#define FTDI_CHAMSYS_USB_DMX_PID       0xDAFA
++#define FTDI_CHAMSYS_MIDI_TIMECODE_PID 0xDAFB
++#define FTDI_CHAMSYS_MINI_WING_PID     0xDAFC
++#define FTDI_CHAMSYS_MAXI_WING_PID     0xDAFD
++#define FTDI_CHAMSYS_MEDIA_WING_PID    0xDAFE
++#define FTDI_CHAMSYS_WING_PID  0xDAFF
++
++/*
+  * Westrex International devices submitted by Cory Lee
+  */
+ #define FTDI_WESTREX_MODEL_777_PID	0xDC00	/* Model 777 */
+diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
+index a861cd2..cf79fb2 100644
+--- a/drivers/usb/serial/mos7840.c
++++ b/drivers/usb/serial/mos7840.c
+@@ -120,15 +120,20 @@
+  * by making a change here, in moschip_port_id_table, and in
+  * moschip_id_table_combined
+  */
+-#define USB_VENDOR_ID_BANDB             0x0856
+-#define BANDB_DEVICE_ID_USO9ML2_2	0xAC22
+-#define BANDB_DEVICE_ID_USO9ML2_4	0xAC24
+-#define BANDB_DEVICE_ID_US9ML2_2	0xAC29
+-#define BANDB_DEVICE_ID_US9ML2_4	0xAC30
+-#define BANDB_DEVICE_ID_USPTL4_2	0xAC31
+-#define BANDB_DEVICE_ID_USPTL4_4	0xAC32
+-#define BANDB_DEVICE_ID_USOPTL4_2       0xAC42
+-#define BANDB_DEVICE_ID_USOPTL4_4       0xAC44
++#define USB_VENDOR_ID_BANDB              0x0856
++#define BANDB_DEVICE_ID_USO9ML2_2        0xAC22
++#define BANDB_DEVICE_ID_USO9ML2_2P       0xBC00
++#define BANDB_DEVICE_ID_USO9ML2_4        0xAC24
++#define BANDB_DEVICE_ID_USO9ML2_4P       0xBC01
++#define BANDB_DEVICE_ID_US9ML2_2         0xAC29
++#define BANDB_DEVICE_ID_US9ML2_4         0xAC30
++#define BANDB_DEVICE_ID_USPTL4_2         0xAC31
++#define BANDB_DEVICE_ID_USPTL4_4         0xAC32
++#define BANDB_DEVICE_ID_USOPTL4_2        0xAC42
++#define BANDB_DEVICE_ID_USOPTL4_2P       0xBC02
++#define BANDB_DEVICE_ID_USOPTL4_4        0xAC44
++#define BANDB_DEVICE_ID_USOPTL4_4P       0xBC03
++#define BANDB_DEVICE_ID_USOPTL2_4        0xAC24
+ 
+ /* This driver also supports
+  * ATEN UC2324 device using Moschip MCS7840
+@@ -184,13 +189,18 @@ static struct usb_device_id moschip_port_id_table[] = {
+ 	{USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)},
+ 	{USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2P)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4P)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_2)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_4)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_2)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_4)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)},
+ 	{USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)},
+ 	{USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)},
+ 	{}			/* terminating entry */
+@@ -200,13 +210,18 @@ static __devinitdata struct usb_device_id moschip_id_table_combined[] = {
+ 	{USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)},
+ 	{USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2P)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4P)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_2)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_4)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_2)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_4)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P)},
+ 	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P)},
++	{USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)},
+ 	{USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)},
+ 	{USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)},
+ 	{}			/* terminating entry */
+@@ -280,12 +295,19 @@ static int mos7840_get_reg_sync(struct usb_serial_port *port, __u16 reg,
+ {
+ 	struct usb_device *dev = port->serial->dev;
+ 	int ret = 0;
++	u8 *buf;
++
++	buf = kmalloc(VENDOR_READ_LENGTH, GFP_KERNEL);
++	if (!buf)
++		return -ENOMEM;
+ 
+ 	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+-			      MCS_RD_RTYPE, 0, reg, val, VENDOR_READ_LENGTH,
++			      MCS_RD_RTYPE, 0, reg, buf, VENDOR_READ_LENGTH,
+ 			      MOS_WDR_TIMEOUT);
++	*val = buf[0];
+ 	dbg("mos7840_get_reg_sync offset is %x, return val %x", reg, *val);
+-	*val = (*val) & 0x00ff;
++
++	kfree(buf);
+ 	return ret;
+ }
+ 
+@@ -338,6 +360,11 @@ static int mos7840_get_uart_reg(struct usb_serial_port *port, __u16 reg,
+ 	struct usb_device *dev = port->serial->dev;
+ 	int ret = 0;
+ 	__u16 Wval;
++	u8 *buf;
++
++	buf = kmalloc(VENDOR_READ_LENGTH, GFP_KERNEL);
++	if (!buf)
++		return -ENOMEM;
+ 
+ 	/* dbg("application number is %4x",
+ 	    (((__u16)port->number - (__u16)(port->serial->minor))+1)<<8); */
+@@ -361,9 +388,11 @@ static int mos7840_get_uart_reg(struct usb_serial_port *port, __u16 reg,
+ 		}
+ 	}
+ 	ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+-			      MCS_RD_RTYPE, Wval, reg, val, VENDOR_READ_LENGTH,
++			      MCS_RD_RTYPE, Wval, reg, buf, VENDOR_READ_LENGTH,
+ 			      MOS_WDR_TIMEOUT);
+-	*val = (*val) & 0x00ff;
++	*val = buf[0];
++
++	kfree(buf);
+ 	return ret;
+ }
+ 
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 30e0467..a4dc7bf 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -106,6 +106,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
+ #define VALID_EVTCHN(chn)	((chn) != 0)
+ 
+ static struct irq_chip xen_dynamic_chip;
++static struct irq_chip xen_percpu_chip;
+ 
+ /* Constructor for packed IRQ information. */
+ static struct irq_info mk_unbound_info(void)
+@@ -362,7 +363,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
+ 		irq = find_unbound_irq();
+ 
+ 		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+-					      handle_level_irq, "event");
++					      handle_edge_irq, "event");
+ 
+ 		evtchn_to_irq[evtchn] = irq;
+ 		irq_info[irq] = mk_evtchn_info(evtchn);
+@@ -388,8 +389,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ 		if (irq < 0)
+ 			goto out;
+ 
+-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+-					      handle_level_irq, "ipi");
++		set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
++					      handle_percpu_irq, "ipi");
+ 
+ 		bind_ipi.vcpu = cpu;
+ 		if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+@@ -429,8 +430,8 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+ 
+ 		irq = find_unbound_irq();
+ 
+-		set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+-					      handle_level_irq, "virq");
++		set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
++					      handle_percpu_irq, "virq");
+ 
+ 		evtchn_to_irq[evtchn] = irq;
+ 		irq_info[irq] = mk_virq_info(evtchn, virq);
+@@ -929,6 +930,16 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
+ 	.retrigger	= retrigger_dynirq,
+ };
+ 
++static struct irq_chip en_percpu_chip __read_mostly = {
++	.name		= "xen-percpu",
++
++	.disable	= disable_dynirq,
++	.mask		= disable_dynirq,
++	.unmask		= enable_dynirq,
++
++	.ack		= ack_dynirq,
++};
++
+ void __init xen_init_IRQ(void)
+ {
+ 	int i;
+diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
+index c4e8353..42b60b0 100644
+--- a/fs/binfmt_misc.c
++++ b/fs/binfmt_misc.c
+@@ -723,7 +723,7 @@ static int __init init_misc_binfmt(void)
+ {
+ 	int err = register_filesystem(&bm_fs_type);
+ 	if (!err) {
+-		err = register_binfmt(&misc_format);
++		err = insert_binfmt(&misc_format);
+ 		if (err)
+ 			unregister_filesystem(&bm_fs_type);
+ 	}
+diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
+index 51d9e33..650546f 100644
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -1158,6 +1158,14 @@ __acquires(&fc->lock)
+ 	}
+ }
+ 
++static void end_queued_requests(struct fuse_conn *fc)
++{
++	fc->max_background = UINT_MAX;
++	flush_bg_queue(fc);
++	end_requests(fc, &fc->pending);
++	end_requests(fc, &fc->processing);
++}
++
+ /*
+  * Abort all requests.
+  *
+@@ -1184,8 +1192,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
+ 		fc->connected = 0;
+ 		fc->blocked = 0;
+ 		end_io_requests(fc);
+-		end_requests(fc, &fc->pending);
+-		end_requests(fc, &fc->processing);
++		end_queued_requests(fc);
+ 		wake_up_all(&fc->waitq);
+ 		wake_up_all(&fc->blocked_waitq);
+ 		kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+@@ -1200,8 +1207,9 @@ int fuse_dev_release(struct inode *inode, struct file *file)
+ 	if (fc) {
+ 		spin_lock(&fc->lock);
+ 		fc->connected = 0;
+-		end_requests(fc, &fc->pending);
+-		end_requests(fc, &fc->processing);
++		fc->blocked = 0;
++		end_queued_requests(fc);
++		wake_up_all(&fc->blocked_waitq);
+ 		spin_unlock(&fc->lock);
+ 		fuse_conn_put(fc);
+ 	}
+diff --git a/fs/nfs/client.c b/fs/nfs/client.c
+index 127ed5c..19cbbf7 100644
+--- a/fs/nfs/client.c
++++ b/fs/nfs/client.c
+@@ -273,7 +273,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+ 	    sin1->sin6_scope_id != sin2->sin6_scope_id)
+ 		return 0;
+ 
+-	return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
++	return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
+ }
+ #else	/* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
+ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
+index 4c827d8..3fcb479 100644
+--- a/fs/ocfs2/inode.c
++++ b/fs/ocfs2/inode.c
+@@ -485,7 +485,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
+ 						     OCFS2_BH_IGNORE_CACHE);
+ 	} else {
+ 		status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
+-		if (!status)
++		/*
++		 * If buffer is in jbd, then its checksum may not have been
++		 * computed as yet.
++		 */
++		if (!status && !buffer_jbd(bh))
+ 			status = ocfs2_validate_inode_block(osb->sb, bh);
+ 	}
+ 	if (status < 0) {
+diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
+index f5ea468..7118a38 100644
+--- a/fs/sysfs/file.c
++++ b/fs/sysfs/file.c
+@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
+ 	char *p;
+ 
+ 	p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
+-	if (p)
++	if (!IS_ERR(p))
+ 		memmove(last_sysfs_file, p, strlen(p) + 1);
+ 
+ 	/* need attr_sd for attr and ops, its parent for kobj */
+diff --git a/include/linux/compat.h b/include/linux/compat.h
+index af931ee..cab23f2 100644
+--- a/include/linux/compat.h
++++ b/include/linux/compat.h
+@@ -309,5 +309,7 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
+ asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
+ 				  int flags, int mode);
+ 
++extern void __user *compat_alloc_user_space(unsigned long len);
++
+ #endif /* CONFIG_COMPAT */
+ #endif /* _LINUX_COMPAT_H */
+diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
+index a5740fc..a73454a 100644
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,8 +21,7 @@ extern int number_of_cpusets;	/* How many cpusets are defined in system? */
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+-extern void cpuset_cpus_allowed_locked(struct task_struct *p,
+-				       struct cpumask *mask);
++extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -69,9 +68,6 @@ struct seq_file;
+ extern void cpuset_task_status_allowed(struct seq_file *m,
+ 					struct task_struct *task);
+ 
+-extern void cpuset_lock(void);
+-extern void cpuset_unlock(void);
+-
+ extern int cpuset_mem_spread_node(void);
+ 
+ static inline int cpuset_do_page_mem_spread(void)
+@@ -105,10 +101,11 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
+ {
+ 	cpumask_copy(mask, cpu_possible_mask);
+ }
+-static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
+-					      struct cpumask *mask)
++
++static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
+ {
+-	cpumask_copy(mask, cpu_possible_mask);
++	cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
++	return cpumask_any(cpu_active_mask);
+ }
+ 
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+@@ -157,9 +154,6 @@ static inline void cpuset_task_status_allowed(struct seq_file *m,
+ {
+ }
+ 
+-static inline void cpuset_lock(void) {}
+-static inline void cpuset_unlock(void) {}
+-
+ static inline int cpuset_mem_spread_node(void)
+ {
+ 	return 0;
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index b0f6d97..a069916 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -339,6 +339,7 @@ enum {
+ 	ATA_EHI_HOTPLUGGED	= (1 << 0),  /* could have been hotplugged */
+ 	ATA_EHI_NO_AUTOPSY	= (1 << 2),  /* no autopsy */
+ 	ATA_EHI_QUIET		= (1 << 3),  /* be quiet */
++	ATA_EHI_NO_RECOVERY	= (1 << 4),  /* no recovery */
+ 
+ 	ATA_EHI_DID_SOFTRESET	= (1 << 16), /* already soft-reset this port */
+ 	ATA_EHI_DID_HARDRESET	= (1 << 17), /* already soft-reset this port */
+diff --git a/include/linux/msi.h b/include/linux/msi.h
+index 6991ab5..91b05c1 100644
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -14,8 +14,10 @@ struct irq_desc;
+ extern void mask_msi_irq(unsigned int irq);
+ extern void unmask_msi_irq(unsigned int irq);
+ extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
++extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+ extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+ extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
++extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
+ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+ 
+ struct msi_desc {
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index cc24beb..957a25f 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void);
+ 
+ 
+ extern void calc_global_load(void);
+-extern u64 cpu_nr_migrations(int cpu);
+ 
+ extern unsigned long get_parent_ip(unsigned long addr);
+ 
+@@ -1001,6 +1000,7 @@ struct sched_domain {
+ 	char *name;
+ #endif
+ 
++	unsigned int span_weight;
+ 	/*
+ 	 * Span of all CPUs in this domain.
+ 	 *
+@@ -1072,7 +1072,8 @@ struct sched_domain;
+ struct sched_class {
+ 	const struct sched_class *next;
+ 
+-	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
++	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
++			      bool head);
+ 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ 	void (*yield_task) (struct rq *rq);
+ 
+@@ -1082,7 +1083,8 @@ struct sched_class {
+ 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+ 
+ #ifdef CONFIG_SMP
+-	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
++	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
++			       int sd_flag, int flags);
+ 
+ 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
+ 			struct rq *busiest, unsigned long max_load_move,
+@@ -1094,7 +1096,8 @@ struct sched_class {
+ 			      enum cpu_idle_type idle);
+ 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ 	void (*post_schedule) (struct rq *this_rq);
+-	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
++	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
++	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
+ 
+ 	void (*set_cpus_allowed)(struct task_struct *p,
+ 				 const struct cpumask *newmask);
+@@ -1105,7 +1108,7 @@ struct sched_class {
+ 
+ 	void (*set_curr_task) (struct rq *rq);
+ 	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+-	void (*task_new) (struct rq *rq, struct task_struct *p);
++	void (*task_fork) (struct task_struct *p);
+ 
+ 	void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ 			       int running);
+@@ -1114,10 +1117,11 @@ struct sched_class {
+ 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ 			     int oldprio, int running);
+ 
+-	unsigned int (*get_rr_interval) (struct task_struct *task);
++	unsigned int (*get_rr_interval) (struct rq *rq,
++					 struct task_struct *task);
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-	void (*moved_group) (struct task_struct *p);
++	void (*moved_group) (struct task_struct *p, int on_rq);
+ #endif
+ };
+ 
+@@ -1178,7 +1182,6 @@ struct sched_entity {
+ 	u64			nr_failed_migrations_running;
+ 	u64			nr_failed_migrations_hot;
+ 	u64			nr_forced_migrations;
+-	u64			nr_forced2_migrations;
+ 
+ 	u64			nr_wakeups;
+ 	u64			nr_wakeups_sync;
+@@ -1886,6 +1889,7 @@ extern void sched_clock_idle_sleep_event(void);
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
+ extern void idle_task_exit(void);
+ #else
+ static inline void idle_task_exit(void) {}
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 57e6357..5b81156 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
+ 				| 1*SD_WAKE_AFFINE			\
+ 				| 1*SD_SHARE_CPUPOWER			\
+ 				| 0*SD_POWERSAVINGS_BALANCE		\
+-				| 0*SD_SHARE_PKG_RESOURCES		\
++				| 1*SD_SHARE_PKG_RESOURCES		\
+ 				| 0*SD_SERIALIZE			\
+ 				| 0*SD_PREFER_SIBLING			\
+ 				,					\
+diff --git a/kernel/compat.c b/kernel/compat.c
+index 180d188..8bc5578 100644
+--- a/kernel/compat.c
++++ b/kernel/compat.c
+@@ -25,6 +25,7 @@
+ #include <linux/posix-timers.h>
+ #include <linux/times.h>
+ #include <linux/ptrace.h>
++#include <linux/module.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -1136,3 +1137,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
+ 
+ 	return 0;
+ }
++
++/*
++ * Allocate user-space memory for the duration of a single system call,
++ * in order to marshall parameters inside a compat thunk.
++ */
++void __user *compat_alloc_user_space(unsigned long len)
++{
++	void __user *ptr;
++
++	/* If len would occupy more than half of the entire compat space... */
++	if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
++		return NULL;
++
++	ptr = arch_compat_alloc_user_space(len);
++
++	if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
++		return NULL;
++
++	return ptr;
++}
++EXPORT_SYMBOL_GPL(compat_alloc_user_space);
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 291ac58..7e8b6ac 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -151,7 +151,7 @@ static inline void check_for_tasks(int cpu)
+ 
+ 	write_lock_irq(&tasklist_lock);
+ 	for_each_process(p) {
+-		if (task_cpu(p) == cpu &&
++		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
+ 		    (!cputime_eq(p->utime, cputime_zero) ||
+ 		     !cputime_eq(p->stime, cputime_zero)))
+ 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+@@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu)
+ }
+ 
+ struct take_cpu_down_param {
++	struct task_struct *caller;
+ 	unsigned long mod;
+ 	void *hcpu;
+ };
+@@ -171,6 +172,7 @@ struct take_cpu_down_param {
+ static int __ref take_cpu_down(void *_param)
+ {
+ 	struct take_cpu_down_param *param = _param;
++	unsigned int cpu = (unsigned long)param->hcpu;
+ 	int err;
+ 
+ 	/* Ensure this CPU doesn't handle any more interrupts. */
+@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param)
+ 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ 				param->hcpu);
+ 
++	if (task_cpu(param->caller) == cpu)
++		move_task_off_dead_cpu(cpu, param->caller);
+ 	/* Force idle task to run as soon as we yield: it should
+ 	   immediately notice cpu is offline and die quickly. */
+ 	sched_idle_next();
+@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param)
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+ 	int err, nr_calls = 0;
+-	cpumask_var_t old_allowed;
+ 	void *hcpu = (void *)(long)cpu;
+ 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ 	struct take_cpu_down_param tcd_param = {
++		.caller = current,
+ 		.mod = mod,
+ 		.hcpu = hcpu,
+ 	};
+@@ -205,10 +209,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 	if (!cpu_online(cpu))
+ 		return -EINVAL;
+ 
+-	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
+-		return -ENOMEM;
+-
+ 	cpu_hotplug_begin();
++	set_cpu_active(cpu, false);
+ 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+ 					hcpu, -1, &nr_calls);
+ 	if (err == NOTIFY_BAD) {
+@@ -223,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 		goto out_release;
+ 	}
+ 
+-	/* Ensure that we are not runnable on dying cpu */
+-	cpumask_copy(old_allowed, &current->cpus_allowed);
+-	set_cpus_allowed_ptr(current, cpu_active_mask);
+-
+ 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ 	if (err) {
+ 		set_cpu_active(cpu, true);
+@@ -235,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 					    hcpu) == NOTIFY_BAD)
+ 			BUG();
+ 
+-		goto out_allowed;
++		goto out_release;
+ 	}
+ 	BUG_ON(cpu_online(cpu));
+ 
+@@ -253,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 
+ 	check_for_tasks(cpu);
+ 
+-out_allowed:
+-	set_cpus_allowed_ptr(current, old_allowed);
+ out_release:
+ 	cpu_hotplug_done();
+ 	if (!err) {
+@@ -262,7 +258,6 @@ out_release:
+ 					    hcpu) == NOTIFY_BAD)
+ 			BUG();
+ 	}
+-	free_cpumask_var(old_allowed);
+ 	return err;
+ }
+ 
+@@ -280,18 +275,6 @@ int __ref cpu_down(unsigned int cpu)
+ 		goto out;
+ 	}
+ 
+-	set_cpu_active(cpu, false);
+-
+-	/*
+-	 * Make sure the all cpus did the reschedule and are not
+-	 * using stale version of the cpu_active_mask.
+-	 * This is not strictly necessary becuase stop_machine()
+-	 * that we run down the line already provides the required
+-	 * synchronization. But it's really a side effect and we do not
+-	 * want to depend on the innards of the stop_machine here.
+-	 */
+-	synchronize_sched();
+-
+ 	err = _cpu_down(cpu, 0);
+ 
+ out:
+@@ -382,19 +365,12 @@ int disable_nonboot_cpus(void)
+ 		return error;
+ 	cpu_maps_update_begin();
+ 	first_cpu = cpumask_first(cpu_online_mask);
+-	/* We take down all of the non-boot CPUs in one shot to avoid races
++	/*
++	 * We take down all of the non-boot CPUs in one shot to avoid races
+ 	 * with the userspace trying to use the CPU hotplug at the same time
+ 	 */
+ 	cpumask_clear(frozen_cpus);
+ 
+-	for_each_online_cpu(cpu) {
+-		if (cpu == first_cpu)
+-			continue;
+-		set_cpu_active(cpu, false);
+-	}
+-
+-	synchronize_sched();
+-
+ 	printk("Disabling non-boot CPUs ...\n");
+ 	for_each_online_cpu(cpu) {
+ 		if (cpu == first_cpu)
+diff --git a/kernel/cpuset.c b/kernel/cpuset.c
+index a81a910..b120fd0 100644
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2145,19 +2145,52 @@ void __init cpuset_init_smp(void)
+ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+ {
+ 	mutex_lock(&callback_mutex);
+-	cpuset_cpus_allowed_locked(tsk, pmask);
++	task_lock(tsk);
++	guarantee_online_cpus(task_cs(tsk), pmask);
++	task_unlock(tsk);
+ 	mutex_unlock(&callback_mutex);
+ }
+ 
+-/**
+- * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+- * Must be called with callback_mutex held.
+- **/
+-void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
++int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+ {
+-	task_lock(tsk);
+-	guarantee_online_cpus(task_cs(tsk), pmask);
+-	task_unlock(tsk);
++	const struct cpuset *cs;
++	int cpu;
++
++	rcu_read_lock();
++	cs = task_cs(tsk);
++	if (cs)
++		cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
++	rcu_read_unlock();
++
++	/*
++	 * We own tsk->cpus_allowed, nobody can change it under us.
++	 *
++	 * But we used cs && cs->cpus_allowed lockless and thus can
++	 * race with cgroup_attach_task() or update_cpumask() and get
++	 * the wrong tsk->cpus_allowed. However, both cases imply the
++	 * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
++	 * which takes task_rq_lock().
++	 *
++	 * If we are called after it dropped the lock we must see all
++	 * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
++	 * set any mask even if it is not right from task_cs() pov,
++	 * the pending set_cpus_allowed_ptr() will fix things.
++	 */
++
++	cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
++	if (cpu >= nr_cpu_ids) {
++		/*
++		 * Either tsk->cpus_allowed is wrong (see above) or it
++		 * is actually empty. The latter case is only possible
++		 * if we are racing with remove_tasks_in_empty_cpuset().
++		 * Like above we can temporary set any mask and rely on
++		 * set_cpus_allowed_ptr() as synchronization point.
++		 */
++		cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
++		cpu = cpumask_any(cpu_active_mask);
++	}
++
++	return cpu;
+ }
+ 
+ void cpuset_init_current_mems_allowed(void)
+@@ -2346,22 +2379,6 @@ int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
+ }
+ 
+ /**
+- * cpuset_lock - lock out any changes to cpuset structures
+- *
+- * The out of memory (oom) code needs to mutex_lock cpusets
+- * from being changed while it scans the tasklist looking for a
+- * task in an overlapping cpuset.  Expose callback_mutex via this
+- * cpuset_lock() routine, so the oom code can lock it, before
+- * locking the task list.  The tasklist_lock is a spinlock, so
+- * must be taken inside callback_mutex.
+- */
+-
+-void cpuset_lock(void)
+-{
+-	mutex_lock(&callback_mutex);
+-}
+-
+-/**
+  * cpuset_unlock - release lock on cpuset changes
+  *
+  * Undo the lock taken in a previous cpuset_lock() call.
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 9f3b066..4bde56f 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1233,21 +1233,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
+ 	/* Need tasklist lock for parent etc handling! */
+ 	write_lock_irq(&tasklist_lock);
+ 
+-	/*
+-	 * The task hasn't been attached yet, so its cpus_allowed mask will
+-	 * not be changed, nor will its assigned CPU.
+-	 *
+-	 * The cpus_allowed mask of the parent may have changed after it was
+-	 * copied first time - so re-copy it here, then check the child's CPU
+-	 * to ensure it is on a valid CPU (and if not, just force it back to
+-	 * parent's CPU). This avoids alot of nasty races.
+-	 */
+-	p->cpus_allowed = current->cpus_allowed;
+-	p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
+-	if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+-			!cpu_online(task_cpu(p))))
+-		set_task_cpu(p, smp_processor_id());
+-
+ 	/* CLONE_PARENT re-uses the old parent */
+ 	if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
+ 		p->real_parent = current->real_parent;
+diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
+index ef3c3f8..f83972b 100644
+--- a/kernel/gcov/fs.c
++++ b/kernel/gcov/fs.c
+@@ -33,10 +33,11 @@
+  * @children: child nodes
+  * @all: list head for list of all nodes
+  * @parent: parent node
+- * @info: associated profiling data structure if not a directory
+- * @ghost: when an object file containing profiling data is unloaded we keep a
+- *         copy of the profiling data here to allow collecting coverage data
+- *         for cleanup code. Such a node is called a "ghost".
++ * @loaded_info: array of pointers to profiling data sets for loaded object
++ *   files.
++ * @num_loaded: number of profiling data sets for loaded object files.
++ * @unloaded_info: accumulated copy of profiling data sets for unloaded
++ *   object files. Used only when gcov_persist=1.
+  * @dentry: main debugfs entry, either a directory or data file
+  * @links: associated symbolic links
+  * @name: data file basename
+@@ -51,10 +52,11 @@ struct gcov_node {
+ 	struct list_head children;
+ 	struct list_head all;
+ 	struct gcov_node *parent;
+-	struct gcov_info *info;
+-	struct gcov_info *ghost;
++	struct gcov_info **loaded_info;
++	struct gcov_info *unloaded_info;
+ 	struct dentry *dentry;
+ 	struct dentry **links;
++	int num_loaded;
+ 	char name[0];
+ };
+ 
+@@ -136,16 +138,37 @@ static const struct seq_operations gcov_seq_ops = {
+ };
+ 
+ /*
+- * Return the profiling data set for a given node. This can either be the
+- * original profiling data structure or a duplicate (also called "ghost")
+- * in case the associated object file has been unloaded.
++ * Return a profiling data set associated with the given node. This is
++ * either a data set for a loaded object file or a data set copy in case
++ * all associated object files have been unloaded.
+  */
+ static struct gcov_info *get_node_info(struct gcov_node *node)
+ {
+-	if (node->info)
+-		return node->info;
++	if (node->num_loaded > 0)
++		return node->loaded_info[0];
+ 
+-	return node->ghost;
++	return node->unloaded_info;
++}
++
++/*
++ * Return a newly allocated profiling data set which contains the sum of
++ * all profiling data associated with the given node.
++ */
++static struct gcov_info *get_accumulated_info(struct gcov_node *node)
++{
++	struct gcov_info *info;
++	int i = 0;
++
++	if (node->unloaded_info)
++		info = gcov_info_dup(node->unloaded_info);
++	else
++		info = gcov_info_dup(node->loaded_info[i++]);
++	if (!info)
++		return NULL;
++	for (; i < node->num_loaded; i++)
++		gcov_info_add(info, node->loaded_info[i]);
++
++	return info;
+ }
+ 
+ /*
+@@ -163,9 +186,10 @@ static int gcov_seq_open(struct inode *inode, struct file *file)
+ 	mutex_lock(&node_lock);
+ 	/*
+ 	 * Read from a profiling data copy to minimize reference tracking
+-	 * complexity and concurrent access.
++	 * complexity and concurrent access and to keep accumulating multiple
++	 * profiling data sets associated with one node simple.
+ 	 */
+-	info = gcov_info_dup(get_node_info(node));
++	info = get_accumulated_info(node);
+ 	if (!info)
+ 		goto out_unlock;
+ 	iter = gcov_iter_new(info);
+@@ -225,12 +249,25 @@ static struct gcov_node *get_node_by_name(const char *name)
+ 	return NULL;
+ }
+ 
++/*
++ * Reset all profiling data associated with the specified node.
++ */
++static void reset_node(struct gcov_node *node)
++{
++	int i;
++
++	if (node->unloaded_info)
++		gcov_info_reset(node->unloaded_info);
++	for (i = 0; i < node->num_loaded; i++)
++		gcov_info_reset(node->loaded_info[i]);
++}
++
+ static void remove_node(struct gcov_node *node);
+ 
+ /*
+  * write() implementation for gcov data files. Reset profiling data for the
+- * associated file. If the object file has been unloaded (i.e. this is
+- * a "ghost" node), remove the debug fs node as well.
++ * corresponding file. If all associated object files have been unloaded,
++ * remove the debug fs node as well.
+  */
+ static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
+ 			      size_t len, loff_t *pos)
+@@ -245,10 +282,10 @@ static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
+ 	node = get_node_by_name(info->filename);
+ 	if (node) {
+ 		/* Reset counts or remove node for unloaded modules. */
+-		if (node->ghost)
++		if (node->num_loaded == 0)
+ 			remove_node(node);
+ 		else
+-			gcov_info_reset(node->info);
++			reset_node(node);
+ 	}
+ 	/* Reset counts for open file. */
+ 	gcov_info_reset(info);
+@@ -378,7 +415,10 @@ static void init_node(struct gcov_node *node, struct gcov_info *info,
+ 	INIT_LIST_HEAD(&node->list);
+ 	INIT_LIST_HEAD(&node->children);
+ 	INIT_LIST_HEAD(&node->all);
+-	node->info = info;
++	if (node->loaded_info) {
++		node->loaded_info[0] = info;
++		node->num_loaded = 1;
++	}
+ 	node->parent = parent;
+ 	if (name)
+ 		strcpy(node->name, name);
+@@ -394,9 +434,13 @@ static struct gcov_node *new_node(struct gcov_node *parent,
+ 	struct gcov_node *node;
+ 
+ 	node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL);
+-	if (!node) {
+-		pr_warning("out of memory\n");
+-		return NULL;
++	if (!node)
++		goto err_nomem;
++	if (info) {
++		node->loaded_info = kcalloc(1, sizeof(struct gcov_info *),
++					   GFP_KERNEL);
++		if (!node->loaded_info)
++			goto err_nomem;
+ 	}
+ 	init_node(node, info, name, parent);
+ 	/* Differentiate between gcov data file nodes and directory nodes. */
+@@ -416,6 +460,11 @@ static struct gcov_node *new_node(struct gcov_node *parent,
+ 	list_add(&node->all, &all_head);
+ 
+ 	return node;
++
++err_nomem:
++	kfree(node);
++	pr_warning("out of memory\n");
++	return NULL;
+ }
+ 
+ /* Remove symbolic links associated with node. */
+@@ -441,8 +490,9 @@ static void release_node(struct gcov_node *node)
+ 	list_del(&node->all);
+ 	debugfs_remove(node->dentry);
+ 	remove_links(node);
+-	if (node->ghost)
+-		gcov_info_free(node->ghost);
++	kfree(node->loaded_info);
++	if (node->unloaded_info)
++		gcov_info_free(node->unloaded_info);
+ 	kfree(node);
+ }
+ 
+@@ -477,7 +527,7 @@ static struct gcov_node *get_child_by_name(struct gcov_node *parent,
+ 
+ /*
+  * write() implementation for reset file. Reset all profiling data to zero
+- * and remove ghost nodes.
++ * and remove nodes for which all associated object files are unloaded.
+  */
+ static ssize_t reset_write(struct file *file, const char __user *addr,
+ 			   size_t len, loff_t *pos)
+@@ -487,8 +537,8 @@ static ssize_t reset_write(struct file *file, const char __user *addr,
+ 	mutex_lock(&node_lock);
+ restart:
+ 	list_for_each_entry(node, &all_head, all) {
+-		if (node->info)
+-			gcov_info_reset(node->info);
++		if (node->num_loaded > 0)
++			reset_node(node);
+ 		else if (list_empty(&node->children)) {
+ 			remove_node(node);
+ 			/* Several nodes may have gone - restart loop. */
+@@ -564,37 +614,115 @@ err_remove:
+ }
+ 
+ /*
+- * The profiling data set associated with this node is being unloaded. Store a
+- * copy of the profiling data and turn this node into a "ghost".
++ * Associate a profiling data set with an existing node. Needs to be called
++ * with node_lock held.
+  */
+-static int ghost_node(struct gcov_node *node)
++static void add_info(struct gcov_node *node, struct gcov_info *info)
+ {
+-	node->ghost = gcov_info_dup(node->info);
+-	if (!node->ghost) {
+-		pr_warning("could not save data for '%s' (out of memory)\n",
+-			   node->info->filename);
+-		return -ENOMEM;
++	struct gcov_info **loaded_info;
++	int num = node->num_loaded;
++
++	/*
++	 * Prepare new array. This is done first to simplify cleanup in
++	 * case the new data set is incompatible, the node only contains
++	 * unloaded data sets and there's not enough memory for the array.
++	 */
++	loaded_info = kcalloc(num + 1, sizeof(struct gcov_info *), GFP_KERNEL);
++	if (!loaded_info) {
++		pr_warning("could not add '%s' (out of memory)\n",
++			   info->filename);
++		return;
++	}
++	memcpy(loaded_info, node->loaded_info,
++	       num * sizeof(struct gcov_info *));
++	loaded_info[num] = info;
++	/* Check if the new data set is compatible. */
++	if (num == 0) {
++		/*
++		 * A module was unloaded, modified and reloaded. The new
++		 * data set replaces the copy of the last one.
++		 */
++		if (!gcov_info_is_compatible(node->unloaded_info, info)) {
++			pr_warning("discarding saved data for %s "
++				   "(incompatible version)\n", info->filename);
++			gcov_info_free(node->unloaded_info);
++			node->unloaded_info = NULL;
++		}
++	} else {
++		/*
++		 * Two different versions of the same object file are loaded.
++		 * The initial one takes precedence.
++		 */
++		if (!gcov_info_is_compatible(node->loaded_info[0], info)) {
++			pr_warning("could not add '%s' (incompatible "
++				   "version)\n", info->filename);
++			kfree(loaded_info);
++			return;
++		}
+ 	}
+-	node->info = NULL;
++	/* Overwrite previous array. */
++	kfree(node->loaded_info);
++	node->loaded_info = loaded_info;
++	node->num_loaded = num + 1;
++}
+ 
+-	return 0;
++/*
++ * Return the index of a profiling data set associated with a node.
++ */
++static int get_info_index(struct gcov_node *node, struct gcov_info *info)
++{
++	int i;
++
++	for (i = 0; i < node->num_loaded; i++) {
++		if (node->loaded_info[i] == info)
++			return i;
++	}
++	return -ENOENT;
+ }
+ 
+ /*
+- * Profiling data for this node has been loaded again. Add profiling data
+- * from previous instantiation and turn this node into a regular node.
++ * Save the data of a profiling data set which is being unloaded.
+  */
+-static void revive_node(struct gcov_node *node, struct gcov_info *info)
++static void save_info(struct gcov_node *node, struct gcov_info *info)
+ {
+-	if (gcov_info_is_compatible(node->ghost, info))
+-		gcov_info_add(info, node->ghost);
++	if (node->unloaded_info)
++		gcov_info_add(node->unloaded_info, info);
+ 	else {
+-		pr_warning("discarding saved data for '%s' (version changed)\n",
++		node->unloaded_info = gcov_info_dup(info);
++		if (!node->unloaded_info) {
++			pr_warning("could not save data for '%s' "
++				   "(out of memory)\n", info->filename);
++		}
++	}
++}
++
++/*
++ * Disassociate a profiling data set from a node. Needs to be called with
++ * node_lock held.
++ */
++static void remove_info(struct gcov_node *node, struct gcov_info *info)
++{
++	int i;
++
++	i = get_info_index(node, info);
++	if (i < 0) {
++		pr_warning("could not remove '%s' (not found)\n",
+ 			   info->filename);
++		return;
+ 	}
+-	gcov_info_free(node->ghost);
+-	node->ghost = NULL;
+-	node->info = info;
++	if (gcov_persist)
++		save_info(node, info);
++	/* Shrink array. */
++	node->loaded_info[i] = node->loaded_info[node->num_loaded - 1];
++	node->num_loaded--;
++	if (node->num_loaded > 0)
++		return;
++	/* Last loaded data set was removed. */
++	kfree(node->loaded_info);
++	node->loaded_info = NULL;
++	node->num_loaded = 0;
++	if (!node->unloaded_info)
++		remove_node(node);
+ }
+ 
+ /*
+@@ -609,30 +737,18 @@ void gcov_event(enum gcov_action action, struct gcov_info *info)
+ 	node = get_node_by_name(info->filename);
+ 	switch (action) {
+ 	case GCOV_ADD:
+-		/* Add new node or revive ghost. */
+-		if (!node) {
++		if (node)
++			add_info(node, info);
++		else
+ 			add_node(info);
+-			break;
+-		}
+-		if (gcov_persist)
+-			revive_node(node, info);
+-		else {
+-			pr_warning("could not add '%s' (already exists)\n",
+-				   info->filename);
+-		}
+ 		break;
+ 	case GCOV_REMOVE:
+-		/* Remove node or turn into ghost. */
+-		if (!node) {
++		if (node)
++			remove_info(node, info);
++		else {
+ 			pr_warning("could not remove '%s' (not found)\n",
+ 				   info->filename);
+-			break;
+ 		}
+-		if (gcov_persist) {
+-			if (!ghost_node(node))
+-				break;
+-		}
+-		remove_node(node);
+ 		break;
+ 	}
+ 	mutex_unlock(&node_lock);
+diff --git a/kernel/groups.c b/kernel/groups.c
+index 2b45b2e..f0c2528 100644
+--- a/kernel/groups.c
++++ b/kernel/groups.c
+@@ -143,10 +143,9 @@ int groups_search(const struct group_info *group_info, gid_t grp)
+ 	right = group_info->ngroups;
+ 	while (left < right) {
+ 		unsigned int mid = (left+right)/2;
+-		int cmp = grp - GROUP_AT(group_info, mid);
+-		if (cmp > 0)
++		if (grp > GROUP_AT(group_info, mid))
+ 			left = mid + 1;
+-		else if (cmp < 0)
++		else if (grp < GROUP_AT(group_info, mid))
+ 			right = mid;
+ 		else
+ 			return 1;
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 9990074..152214d 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -542,7 +542,6 @@ struct rq {
+ 	struct load_weight load;
+ 	unsigned long nr_load_updates;
+ 	u64 nr_switches;
+-	u64 nr_migrations_in;
+ 
+ 	struct cfs_rq cfs;
+ 	struct rt_rq rt;
+@@ -943,14 +942,25 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+ 
+ /*
++ * Check whether the task is waking, we use this to synchronize ->cpus_allowed
++ * against ttwu().
++ */
++static inline int task_is_waking(struct task_struct *p)
++{
++	return unlikely(p->state == TASK_WAKING);
++}
++
++/*
+  * __task_rq_lock - lock the runqueue a given task resides on.
+  * Must be called interrupts disabled.
+  */
+ static inline struct rq *__task_rq_lock(struct task_struct *p)
+ 	__acquires(rq->lock)
+ {
++	struct rq *rq;
++
+ 	for (;;) {
+-		struct rq *rq = task_rq(p);
++		rq = task_rq(p);
+ 		spin_lock(&rq->lock);
+ 		if (likely(rq == task_rq(p)))
+ 			return rq;
+@@ -1822,6 +1832,20 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
+ static void calc_load_account_active(struct rq *this_rq);
+ static void update_sysctl(void);
+ 
++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++	set_task_rq(p, cpu);
++#ifdef CONFIG_SMP
++	/*
++	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
++	 * successfuly executed on another CPU. We must ensure that updates of
++	 * per-task data have been completed by this moment.
++	 */
++	smp_wmb();
++	task_thread_info(p)->cpu = cpu;
++#endif
++}
++
+ #include "sched_stats.h"
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+@@ -1871,13 +1895,14 @@ static void update_avg(u64 *avg, u64 sample)
+ 	*avg += diff >> 3;
+ }
+ 
+-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ 	if (wakeup)
+ 		p->se.start_runtime = p->se.sum_exec_runtime;
+ 
+ 	sched_info_queued(p);
+-	p->sched_class->enqueue_task(rq, p, wakeup);
++	p->sched_class->enqueue_task(rq, p, wakeup, head);
+ 	p->se.on_rq = 1;
+ }
+ 
+@@ -1953,7 +1978,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+ 	if (task_contributes_to_load(p))
+ 		rq->nr_uninterruptible--;
+ 
+-	enqueue_task(rq, p, wakeup);
++	enqueue_task(rq, p, wakeup, false);
+ 	inc_nr_running(rq);
+ }
+ 
+@@ -1978,20 +2003,6 @@ inline int task_curr(const struct task_struct *p)
+ 	return cpu_curr(task_cpu(p)) == p;
+ }
+ 
+-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+-{
+-	set_task_rq(p, cpu);
+-#ifdef CONFIG_SMP
+-	/*
+-	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+-	 * successfuly executed on another CPU. We must ensure that updates of
+-	 * per-task data have been completed by this moment.
+-	 */
+-	smp_wmb();
+-	task_thread_info(p)->cpu = cpu;
+-#endif
+-}
+-
+ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ 				       const struct sched_class *prev_class,
+ 				       int oldprio, int running)
+@@ -2018,21 +2029,15 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+  */
+ void kthread_bind(struct task_struct *p, unsigned int cpu)
+ {
+-	struct rq *rq = cpu_rq(cpu);
+-	unsigned long flags;
+-
+ 	/* Must have done schedule() in kthread() before we set_task_cpu */
+ 	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+ 		WARN_ON(1);
+ 		return;
+ 	}
+ 
+-	spin_lock_irqsave(&rq->lock, flags);
+-	set_task_cpu(p, cpu);
+ 	p->cpus_allowed = cpumask_of_cpu(cpu);
+ 	p->rt.nr_cpus_allowed = 1;
+ 	p->flags |= PF_THREAD_BOUND;
+-	spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ EXPORT_SYMBOL(kthread_bind);
+ 
+@@ -2070,35 +2075,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ {
+ 	int old_cpu = task_cpu(p);
+-	struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
+-	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+-		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+-	u64 clock_offset;
+ 
+-	clock_offset = old_rq->clock - new_rq->clock;
++#ifdef CONFIG_SCHED_DEBUG
++	/*
++	 * We should never call set_task_cpu() on a blocked task,
++	 * ttwu() will sort out the placement.
++	 */
++	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
++			!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
++#endif
+ 
+ 	trace_sched_migrate_task(p, new_cpu);
+ 
+-#ifdef CONFIG_SCHEDSTATS
+-	if (p->se.wait_start)
+-		p->se.wait_start -= clock_offset;
+-	if (p->se.sleep_start)
+-		p->se.sleep_start -= clock_offset;
+-	if (p->se.block_start)
+-		p->se.block_start -= clock_offset;
+-#endif
+ 	if (old_cpu != new_cpu) {
+ 		p->se.nr_migrations++;
+-		new_rq->nr_migrations_in++;
+-#ifdef CONFIG_SCHEDSTATS
+-		if (task_hot(p, old_rq->clock, NULL))
+-			schedstat_inc(p, se.nr_forced2_migrations);
+-#endif
+ 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+ 				     1, 1, NULL, 0);
+ 	}
+-	p->se.vruntime -= old_cfsrq->min_vruntime -
+-					 new_cfsrq->min_vruntime;
+ 
+ 	__set_task_cpu(p, new_cpu);
+ }
+@@ -2331,6 +2324,69 @@ void task_oncpu_function_call(struct task_struct *p,
+ 	preempt_enable();
+ }
+ 
++#ifdef CONFIG_SMP
++/*
++ * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
++ */
++static int select_fallback_rq(int cpu, struct task_struct *p)
++{
++	int dest_cpu;
++	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
++
++	/* Look for allowed, online CPU in same node. */
++	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
++		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
++			return dest_cpu;
++
++	/* Any allowed, online CPU? */
++	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
++	if (dest_cpu < nr_cpu_ids)
++		return dest_cpu;
++
++	/* No more Mr. Nice Guy. */
++	if (unlikely(dest_cpu >= nr_cpu_ids)) {
++		dest_cpu = cpuset_cpus_allowed_fallback(p);
++		/*
++		 * Don't tell them about moving exiting tasks or
++		 * kernel threads (both mm NULL), since they never
++		 * leave kernel.
++		 */
++		if (p->mm && printk_ratelimit()) {
++			printk(KERN_INFO "process %d (%s) no "
++			       "longer affine to cpu%d\n",
++			       task_pid_nr(p), p->comm, cpu);
++		}
++	}
++
++	return dest_cpu;
++}
++
++/*
++ * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
++ */
++static inline
++int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
++{
++	int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
++
++	/*
++	 * In order not to call set_task_cpu() on a blocking task we need
++	 * to rely on ttwu() to place the task on a valid ->cpus_allowed
++	 * cpu.
++	 *
++	 * Since this is common to all placement strategies, this lives here.
++	 *
++	 * [ this allows ->select_task() to simply return task_cpu(p) and
++	 *   not worry about this generic constraint ]
++	 */
++	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
++		     !cpu_online(cpu)))
++		cpu = select_fallback_rq(task_cpu(p), p);
++
++	return cpu;
++}
++#endif
++
+ /***
+  * try_to_wake_up - wake up a thread
+  * @p: the to-be-woken-up thread
+@@ -2379,22 +2435,34 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
+ 	 *
+ 	 * First fix up the nr_uninterruptible count:
+ 	 */
+-	if (task_contributes_to_load(p))
+-		rq->nr_uninterruptible--;
++	if (task_contributes_to_load(p)) {
++		if (likely(cpu_online(orig_cpu)))
++			rq->nr_uninterruptible--;
++		else
++			this_rq()->nr_uninterruptible--;
++	}
+ 	p->state = TASK_WAKING;
+-	task_rq_unlock(rq, &flags);
+ 
+-	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
++	if (p->sched_class->task_waking)
++		p->sched_class->task_waking(rq, p);
++
++	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
+ 	if (cpu != orig_cpu)
+ 		set_task_cpu(p, cpu);
++	__task_rq_unlock(rq);
+ 
+-	rq = task_rq_lock(p, &flags);
+-
+-	if (rq != orig_rq)
+-		update_rq_clock(rq);
++	rq = cpu_rq(cpu);
++	spin_lock(&rq->lock);
++	update_rq_clock(rq);
+ 
++	/*
++	 * We migrated the task without holding either rq->lock, however
++	 * since the task is not on the task list itself, nobody else
++	 * will try and migrate the task, hence the rq should match the
++	 * cpu we just moved it to.
++	 */
++	WARN_ON(task_cpu(p) != cpu);
+ 	WARN_ON(p->state != TASK_WAKING);
+-	cpu = task_cpu(p);
+ 
+ #ifdef CONFIG_SCHEDSTATS
+ 	schedstat_inc(rq, ttwu_count);
+@@ -2447,8 +2515,8 @@ out_running:
+ 
+ 	p->state = TASK_RUNNING;
+ #ifdef CONFIG_SMP
+-	if (p->sched_class->task_wake_up)
+-		p->sched_class->task_wake_up(rq, p);
++	if (p->sched_class->task_woken)
++		p->sched_class->task_woken(rq, p);
+ 
+ 	if (unlikely(rq->idle_stamp)) {
+ 		u64 delta = rq->clock - rq->idle_stamp;
+@@ -2528,7 +2596,6 @@ static void __sched_fork(struct task_struct *p)
+ 	p->se.nr_failed_migrations_running	= 0;
+ 	p->se.nr_failed_migrations_hot		= 0;
+ 	p->se.nr_forced_migrations		= 0;
+-	p->se.nr_forced2_migrations		= 0;
+ 
+ 	p->se.nr_wakeups			= 0;
+ 	p->se.nr_wakeups_sync			= 0;
+@@ -2549,14 +2616,6 @@ static void __sched_fork(struct task_struct *p)
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 	INIT_HLIST_HEAD(&p->preempt_notifiers);
+ #endif
+-
+-	/*
+-	 * We mark the process as running here, but have not actually
+-	 * inserted it onto the runqueue yet. This guarantees that
+-	 * nobody will actually run it, and a signal or other external
+-	 * event cannot wake it up and insert it on the runqueue either.
+-	 */
+-	p->state = TASK_RUNNING;
+ }
+ 
+ /*
+@@ -2567,6 +2626,12 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ 	int cpu = get_cpu();
+ 
+ 	__sched_fork(p);
++	/*
++	 * We mark the process as running here. This guarantees that
++	 * nobody will actually run it, and a signal or other external
++	 * event cannot wake it up and insert it on the runqueue either.
++	 */
++	p->state = TASK_RUNNING;
+ 
+ 	/*
+ 	 * Revert to default priority/policy on fork if requested.
+@@ -2598,9 +2663,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ 	if (!rt_prio(p->prio))
+ 		p->sched_class = &fair_sched_class;
+ 
+-#ifdef CONFIG_SMP
+-	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+-#endif
++	if (p->sched_class->task_fork)
++		p->sched_class->task_fork(p);
++
+ 	set_task_cpu(p, cpu);
+ 
+ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+@@ -2630,28 +2695,38 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+ {
+ 	unsigned long flags;
+ 	struct rq *rq;
++	int cpu = get_cpu();
+ 
++#ifdef CONFIG_SMP
+ 	rq = task_rq_lock(p, &flags);
+-	BUG_ON(p->state != TASK_RUNNING);
+-	update_rq_clock(rq);
++	p->state = TASK_WAKING;
+ 
+-	if (!p->sched_class->task_new || !current->se.on_rq) {
+-		activate_task(rq, p, 0);
+-	} else {
+-		/*
+-		 * Let the scheduling class do new task startup
+-		 * management (if any):
+-		 */
+-		p->sched_class->task_new(rq, p);
+-		inc_nr_running(rq);
+-	}
++	/*
++	 * Fork balancing, do it here and not earlier because:
++	 *  - cpus_allowed can change in the fork path
++	 *  - any previously selected cpu might disappear through hotplug
++	 *
++	 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
++	 * without people poking at ->cpus_allowed.
++	 */
++	cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
++	set_task_cpu(p, cpu);
++
++	p->state = TASK_RUNNING;
++	task_rq_unlock(rq, &flags);
++#endif
++
++	rq = task_rq_lock(p, &flags);
++	update_rq_clock(rq);
++	activate_task(rq, p, 0);
+ 	trace_sched_wakeup_new(rq, p, 1);
+ 	check_preempt_curr(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+-	if (p->sched_class->task_wake_up)
+-		p->sched_class->task_wake_up(rq, p);
++	if (p->sched_class->task_woken)
++		p->sched_class->task_woken(rq, p);
+ #endif
+ 	task_rq_unlock(rq, &flags);
++	put_cpu();
+ }
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+@@ -3038,15 +3113,6 @@ static void calc_load_account_active(struct rq *this_rq)
+ }
+ 
+ /*
+- * Externally visible per-cpu scheduler statistics:
+- * cpu_nr_migrations(cpu) - number of migrations into that cpu
+- */
+-u64 cpu_nr_migrations(int cpu)
+-{
+-	return cpu_rq(cpu)->nr_migrations_in;
+-}
+-
+-/*
+  * Update rq->cpu_load[] statistics. This function is usually called every
+  * scheduler tick (TICK_NSEC).
+  */
+@@ -3128,24 +3194,28 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ }
+ 
+ /*
+- * If dest_cpu is allowed for this process, migrate the task to it.
+- * This is accomplished by forcing the cpu_allowed mask to only
+- * allow dest_cpu, which will force the cpu onto dest_cpu. Then
+- * the cpu_allowed mask is restored.
++ * sched_exec - execve() is a valuable balancing opportunity, because at
++ * this point the task has the smallest effective memory and cache footprint.
+  */
+-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
++void sched_exec(void)
+ {
++	struct task_struct *p = current;
+ 	struct migration_req req;
+ 	unsigned long flags;
+ 	struct rq *rq;
++	int dest_cpu;
+ 
+ 	rq = task_rq_lock(p, &flags);
+-	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
+-	    || unlikely(!cpu_active(dest_cpu)))
+-		goto out;
++	dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
++	if (dest_cpu == smp_processor_id())
++		goto unlock;
+ 
+-	/* force the process onto the specified CPU */
+-	if (migrate_task(p, dest_cpu, &req)) {
++	/*
++	 * select_task_rq() can race against ->cpus_allowed
++	 */
++	if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
++	    likely(cpu_active(dest_cpu)) &&
++	    migrate_task(p, dest_cpu, &req)) {
+ 		/* Need to wait for migration thread (might exit: take ref). */
+ 		struct task_struct *mt = rq->migration_thread;
+ 
+@@ -3157,24 +3227,11 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+ 
+ 		return;
+ 	}
+-out:
++unlock:
+ 	task_rq_unlock(rq, &flags);
+ }
+ 
+ /*
+- * sched_exec - execve() is a valuable balancing opportunity, because at
+- * this point the task has the smallest effective memory and cache footprint.
+- */
+-void sched_exec(void)
+-{
+-	int new_cpu, this_cpu = get_cpu();
+-	new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
+-	put_cpu();
+-	if (new_cpu != this_cpu)
+-		sched_migrate_task(current, new_cpu);
+-}
+-
+-/*
+  * pull_task - move a task from a remote runqueue to the local runqueue.
+  * Both runqueues must be locked.
+  */
+@@ -3621,7 +3678,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+ 
+ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+ {
+-	unsigned long weight = cpumask_weight(sched_domain_span(sd));
++	unsigned long weight = sd->span_weight;
+ 	unsigned long smt_gain = sd->smt_gain;
+ 
+ 	smt_gain /= weight;
+@@ -3654,7 +3711,7 @@ unsigned long scale_rt_power(int cpu)
+ 
+ static void update_cpu_power(struct sched_domain *sd, int cpu)
+ {
+-	unsigned long weight = cpumask_weight(sched_domain_span(sd));
++	unsigned long weight = sd->span_weight;
+ 	unsigned long power = SCHED_LOAD_SCALE;
+ 	struct sched_group *sdg = sd->groups;
+ 
+@@ -5974,14 +6031,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
+  */
+ bool try_wait_for_completion(struct completion *x)
+ {
++	unsigned long flags;
+ 	int ret = 1;
+ 
+-	spin_lock_irq(&x->wait.lock);
++	spin_lock_irqsave(&x->wait.lock, flags);
+ 	if (!x->done)
+ 		ret = 0;
+ 	else
+ 		x->done--;
+-	spin_unlock_irq(&x->wait.lock);
++	spin_unlock_irqrestore(&x->wait.lock, flags);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(try_wait_for_completion);
+@@ -5996,12 +6054,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
+  */
+ bool completion_done(struct completion *x)
+ {
++	unsigned long flags;
+ 	int ret = 1;
+ 
+-	spin_lock_irq(&x->wait.lock);
++	spin_lock_irqsave(&x->wait.lock, flags);
+ 	if (!x->done)
+ 		ret = 0;
+-	spin_unlock_irq(&x->wait.lock);
++	spin_unlock_irqrestore(&x->wait.lock, flags);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(completion_done);
+@@ -6095,7 +6154,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
+ 	if (running)
+ 		p->sched_class->set_curr_task(rq);
+ 	if (on_rq) {
+-		enqueue_task(rq, p, 0);
++		enqueue_task(rq, p, 0, oldprio < prio);
+ 
+ 		check_class_changed(rq, p, prev_class, oldprio, running);
+ 	}
+@@ -6139,7 +6198,7 @@ void set_user_nice(struct task_struct *p, long nice)
+ 	delta = p->prio - old_prio;
+ 
+ 	if (on_rq) {
+-		enqueue_task(rq, p, 0);
++		enqueue_task(rq, p, 0, false);
+ 		/*
+ 		 * If the task increased its priority or is running and
+ 		 * lowered its priority, then reschedule its CPU:
+@@ -6530,7 +6589,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ 		return -EINVAL;
+ 
+ 	retval = -ESRCH;
+-	read_lock(&tasklist_lock);
++	rcu_read_lock();
+ 	p = find_process_by_pid(pid);
+ 	if (p) {
+ 		retval = security_task_getscheduler(p);
+@@ -6538,7 +6597,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ 			retval = p->policy
+ 				| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
+ 	}
+-	read_unlock(&tasklist_lock);
++	rcu_read_unlock();
+ 	return retval;
+ }
+ 
+@@ -6556,7 +6615,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ 	if (!param || pid < 0)
+ 		return -EINVAL;
+ 
+-	read_lock(&tasklist_lock);
++	rcu_read_lock();
+ 	p = find_process_by_pid(pid);
+ 	retval = -ESRCH;
+ 	if (!p)
+@@ -6567,7 +6626,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ 		goto out_unlock;
+ 
+ 	lp.sched_priority = p->rt_priority;
+-	read_unlock(&tasklist_lock);
++	rcu_read_unlock();
+ 
+ 	/*
+ 	 * This one might sleep, we cannot do it with a spinlock held ...
+@@ -6577,7 +6636,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ 	return retval;
+ 
+ out_unlock:
+-	read_unlock(&tasklist_lock);
++	rcu_read_unlock();
+ 	return retval;
+ }
+ 
+@@ -6588,22 +6647,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+ 	int retval;
+ 
+ 	get_online_cpus();
+-	read_lock(&tasklist_lock);
++	rcu_read_lock();
+ 
+ 	p = find_process_by_pid(pid);
+ 	if (!p) {
+-		read_unlock(&tasklist_lock);
++		rcu_read_unlock();
+ 		put_online_cpus();
+ 		return -ESRCH;
+ 	}
+ 
+-	/*
+-	 * It is not safe to call set_cpus_allowed with the
+-	 * tasklist_lock held. We will bump the task_struct's
+-	 * usage count and then drop tasklist_lock.
+-	 */
++	/* Prevent p going away */
+ 	get_task_struct(p);
+-	read_unlock(&tasklist_lock);
++	rcu_read_unlock();
+ 
+ 	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+ 		retval = -ENOMEM;
+@@ -6684,10 +6739,12 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ {
+ 	struct task_struct *p;
++	unsigned long flags;
++	struct rq *rq;
+ 	int retval;
+ 
+ 	get_online_cpus();
+-	read_lock(&tasklist_lock);
++	rcu_read_lock();
+ 
+ 	retval = -ESRCH;
+ 	p = find_process_by_pid(pid);
+@@ -6698,10 +6755,12 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ 	if (retval)
+ 		goto out_unlock;
+ 
++	rq = task_rq_lock(p, &flags);
+ 	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
++	task_rq_unlock(rq, &flags);
+ 
+ out_unlock:
+-	read_unlock(&tasklist_lock);
++	rcu_read_unlock();
+ 	put_online_cpus();
+ 
+ 	return retval;
+@@ -6940,6 +6999,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ {
+ 	struct task_struct *p;
+ 	unsigned int time_slice;
++	unsigned long flags;
++	struct rq *rq;
+ 	int retval;
+ 	struct timespec t;
+ 
+@@ -6947,7 +7008,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ 		return -EINVAL;
+ 
+ 	retval = -ESRCH;
+-	read_lock(&tasklist_lock);
++	rcu_read_lock();
+ 	p = find_process_by_pid(pid);
+ 	if (!p)
+ 		goto out_unlock;
+@@ -6956,15 +7017,17 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ 	if (retval)
+ 		goto out_unlock;
+ 
+-	time_slice = p->sched_class->get_rr_interval(p);
++	rq = task_rq_lock(p, &flags);
++	time_slice = p->sched_class->get_rr_interval(rq, p);
++	task_rq_unlock(rq, &flags);
+ 
+-	read_unlock(&tasklist_lock);
++	rcu_read_unlock();
+ 	jiffies_to_timespec(time_slice, &t);
+ 	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
+ 	return retval;
+ 
+ out_unlock:
+-	read_unlock(&tasklist_lock);
++	rcu_read_unlock();
+ 	return retval;
+ }
+ 
+@@ -7055,6 +7118,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
+ 	spin_lock_irqsave(&rq->lock, flags);
+ 
+ 	__sched_fork(idle);
++	idle->state = TASK_RUNNING;
+ 	idle->se.exec_start = sched_clock();
+ 
+ 	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+@@ -7149,7 +7213,19 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ 	struct rq *rq;
+ 	int ret = 0;
+ 
++	/*
++	 * Serialize against TASK_WAKING so that ttwu() and wunt() can
++	 * drop the rq->lock and still rely on ->cpus_allowed.
++	 */
++again:
++	while (task_is_waking(p))
++		cpu_relax();
+ 	rq = task_rq_lock(p, &flags);
++	if (task_is_waking(p)) {
++		task_rq_unlock(rq, &flags);
++		goto again;
++	}
++
+ 	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+ 		ret = -EINVAL;
+ 		goto out;
+@@ -7178,7 +7254,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ 
+ 		get_task_struct(mt);
+ 		task_rq_unlock(rq, &flags);
+-		wake_up_process(rq->migration_thread);
++		wake_up_process(mt);
+ 		put_task_struct(mt);
+ 		wait_for_completion(&req.done);
+ 		tlb_migrate_finish(p->mm);
+@@ -7205,7 +7281,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ {
+ 	struct rq *rq_dest, *rq_src;
+-	int ret = 0, on_rq;
++	int ret = 0;
+ 
+ 	if (unlikely(!cpu_active(dest_cpu)))
+ 		return ret;
+@@ -7217,19 +7293,17 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ 	/* Already moved. */
+ 	if (task_cpu(p) != src_cpu)
+ 		goto done;
+-	/* Waking up, don't get in the way of try_to_wake_up(). */
+-	if (p->state == TASK_WAKING)
+-		goto fail;
+ 	/* Affinity changed (again). */
+ 	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ 		goto fail;
+ 
+-	on_rq = p->se.on_rq;
+-	if (on_rq)
++	/*
++	 * If we're not on a rq, the next wake-up will ensure we're
++	 * placed properly.
++	 */
++	if (p->se.on_rq) {
+ 		deactivate_task(rq_src, p, 0);
+-
+-	set_task_cpu(p, dest_cpu);
+-	if (on_rq) {
++		set_task_cpu(p, dest_cpu);
+ 		activate_task(rq_dest, p, 0);
+ 		check_preempt_curr(rq_dest, p, 0);
+ 	}
+@@ -7308,57 +7382,29 @@ static int migration_thread(void *data)
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-
+-static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
+-{
+-	int ret;
+-
+-	local_irq_disable();
+-	ret = __migrate_task(p, src_cpu, dest_cpu);
+-	local_irq_enable();
+-	return ret;
+-}
+-
+ /*
+  * Figure out where task on dead CPU should go, use force if necessary.
+  */
+-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
++void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+-	int dest_cpu;
+-	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
+-
+-again:
+-	/* Look for allowed, online CPU in same node. */
+-	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+-		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+-			goto move;
+-
+-	/* Any allowed, online CPU? */
+-	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+-	if (dest_cpu < nr_cpu_ids)
+-		goto move;
+-
+-	/* No more Mr. Nice Guy. */
+-	if (dest_cpu >= nr_cpu_ids) {
+-		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+-		dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
++	struct rq *rq = cpu_rq(dead_cpu);
++	int needs_cpu, uninitialized_var(dest_cpu);
++	unsigned long flags;
+ 
+-		/*
+-		 * Don't tell them about moving exiting tasks or
+-		 * kernel threads (both mm NULL), since they never
+-		 * leave kernel.
+-		 */
+-		if (p->mm && printk_ratelimit()) {
+-			printk(KERN_INFO "process %d (%s) no "
+-			       "longer affine to cpu%d\n",
+-			       task_pid_nr(p), p->comm, dead_cpu);
+-		}
+-	}
++	local_irq_save(flags);
+ 
+-move:
+-	/* It can have affinity changed while we were choosing. */
+-	if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
+-		goto again;
++	spin_lock(&rq->lock);
++	needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
++	if (needs_cpu)
++		dest_cpu = select_fallback_rq(dead_cpu, p);
++	spin_unlock(&rq->lock);
++	/*
++	 * It can only fail if we race with set_cpus_allowed(),
++	 * in the racer should migrate the task anyway.
++	 */
++	if (needs_cpu)
++		__migrate_task(p, dead_cpu, dest_cpu);
++	local_irq_restore(flags);
+ }
+ 
+ /*
+@@ -7752,14 +7798,23 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ 		cpu_rq(cpu)->migration_thread = NULL;
+ 		break;
+ 
+-	case CPU_DEAD:
+-	case CPU_DEAD_FROZEN:
+-		cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
+-		migrate_live_tasks(cpu);
++	case CPU_POST_DEAD:
++		/*
++		 * Bring the migration thread down in CPU_POST_DEAD event,
++		 * since the timers should have got migrated by now and thus
++		 * we should not see a deadlock between trying to kill the
++		 * migration thread and the sched_rt_period_timer.
++		 */
+ 		rq = cpu_rq(cpu);
+ 		kthread_stop(rq->migration_thread);
+ 		put_task_struct(rq->migration_thread);
+ 		rq->migration_thread = NULL;
++		break;
++
++	case CPU_DEAD:
++	case CPU_DEAD_FROZEN:
++		migrate_live_tasks(cpu);
++		rq = cpu_rq(cpu);
+ 		/* Idle task back to normal (off runqueue, low prio) */
+ 		spin_lock_irq(&rq->lock);
+ 		update_rq_clock(rq);
+@@ -7768,7 +7823,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ 		rq->idle->sched_class = &idle_sched_class;
+ 		migrate_dead_tasks(cpu);
+ 		spin_unlock_irq(&rq->lock);
+-		cpuset_unlock();
+ 		migrate_nr_uninterruptible(rq);
+ 		BUG_ON(rq->nr_running != 0);
+ 		calc_global_load_remove(rq);
+@@ -8112,6 +8166,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
+ 	struct rq *rq = cpu_rq(cpu);
+ 	struct sched_domain *tmp;
+ 
++	for (tmp = sd; tmp; tmp = tmp->parent)
++		tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
++
+ 	/* Remove the sched domains which do not contribute to scheduling. */
+ 	for (tmp = sd; tmp; ) {
+ 		struct sched_domain *parent = tmp->parent;
+@@ -10099,13 +10156,13 @@ void sched_move_task(struct task_struct *tsk)
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	if (tsk->sched_class->moved_group)
+-		tsk->sched_class->moved_group(tsk);
++		tsk->sched_class->moved_group(tsk, on_rq);
+ #endif
+ 
+ 	if (unlikely(running))
+ 		tsk->sched_class->set_curr_task(rq);
+ 	if (on_rq)
+-		enqueue_task(rq, tsk, 0);
++		enqueue_task(rq, tsk, 0, false);
+ 
+ 	task_rq_unlock(rq, &flags);
+ }
+@@ -10877,12 +10934,30 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
+ }
+ 
+ /*
++ * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
++ * in cputime_t units. As a result, cpuacct_update_stats calls
++ * percpu_counter_add with values large enough to always overflow the
++ * per cpu batch limit causing bad SMP scalability.
++ *
++ * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
++ * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
++ * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
++ */
++#ifdef CONFIG_SMP
++#define CPUACCT_BATCH	\
++	min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
++#else
++#define CPUACCT_BATCH	0
++#endif
++
++/*
+  * Charge the system/user time to the task's accounting group.
+  */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+ 		enum cpuacct_stat_index idx, cputime_t val)
+ {
+ 	struct cpuacct *ca;
++	int batch = CPUACCT_BATCH;
+ 
+ 	if (unlikely(!cpuacct_subsys.active))
+ 		return;
+@@ -10891,7 +10966,7 @@ static void cpuacct_update_stats(struct task_struct *tsk,
+ 	ca = task_ca(tsk);
+ 
+ 	do {
+-		percpu_counter_add(&ca->cpustat[idx], val);
++		__percpu_counter_add(&ca->cpustat[idx], val, batch);
+ 		ca = ca->parent;
+ 	} while (ca);
+ 	rcu_read_unlock();
+diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
+index 6988cf0..6f836a8 100644
+--- a/kernel/sched_debug.c
++++ b/kernel/sched_debug.c
+@@ -423,7 +423,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+ 	P(se.nr_failed_migrations_running);
+ 	P(se.nr_failed_migrations_hot);
+ 	P(se.nr_forced_migrations);
+-	P(se.nr_forced2_migrations);
+ 	P(se.nr_wakeups);
+ 	P(se.nr_wakeups_sync);
+ 	P(se.nr_wakeups_migrate);
+@@ -499,7 +498,6 @@ void proc_sched_set_task(struct task_struct *p)
+ 	p->se.nr_failed_migrations_running	= 0;
+ 	p->se.nr_failed_migrations_hot		= 0;
+ 	p->se.nr_forced_migrations		= 0;
+-	p->se.nr_forced2_migrations		= 0;
+ 	p->se.nr_wakeups			= 0;
+ 	p->se.nr_wakeups_sync			= 0;
+ 	p->se.nr_wakeups_migrate		= 0;
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index d80812d..01e311e 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -488,6 +488,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
+ 	curr->sum_exec_runtime += delta_exec;
+ 	schedstat_add(cfs_rq, exec_clock, delta_exec);
+ 	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
++
+ 	curr->vruntime += delta_exec_weighted;
+ 	update_min_vruntime(cfs_rq);
+ }
+@@ -743,16 +744,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ 	se->vruntime = vruntime;
+ }
+ 
++#define ENQUEUE_WAKEUP	1
++#define ENQUEUE_MIGRATE 2
++
+ static void
+-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
++enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
+ 	/*
++	 * Update the normalized vruntime before updating min_vruntime
++	 * through callig update_curr().
++	 */
++	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
++		se->vruntime += cfs_rq->min_vruntime;
++
++	/*
+ 	 * Update run-time statistics of the 'current'.
+ 	 */
+ 	update_curr(cfs_rq);
+ 	account_entity_enqueue(cfs_rq, se);
+ 
+-	if (wakeup) {
++	if (flags & ENQUEUE_WAKEUP) {
+ 		place_entity(cfs_rq, se, 0);
+ 		enqueue_sleeper(cfs_rq, se);
+ 	}
+@@ -806,6 +817,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
+ 		__dequeue_entity(cfs_rq, se);
+ 	account_entity_dequeue(cfs_rq, se);
+ 	update_min_vruntime(cfs_rq);
++
++	/*
++	 * Normalize the entity after updating the min_vruntime because the
++	 * update can refer to the ->curr item and we need to reflect this
++	 * movement in our normalized position.
++	 */
++	if (!sleep)
++		se->vruntime -= cfs_rq->min_vruntime;
+ }
+ 
+ /*
+@@ -1012,17 +1031,24 @@ static inline void hrtick_update(struct rq *rq)
+  * increased. Here we update the fair scheduling stats and
+  * then put the task into the rbtree:
+  */
+-static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ 	struct cfs_rq *cfs_rq;
+ 	struct sched_entity *se = &p->se;
++	int flags = 0;
++
++	if (wakeup)
++		flags |= ENQUEUE_WAKEUP;
++	if (p->state == TASK_WAKING)
++		flags |= ENQUEUE_MIGRATE;
+ 
+ 	for_each_sched_entity(se) {
+ 		if (se->on_rq)
+ 			break;
+ 		cfs_rq = cfs_rq_of(se);
+-		enqueue_entity(cfs_rq, se, wakeup);
+-		wakeup = 1;
++		enqueue_entity(cfs_rq, se, flags);
++		flags = ENQUEUE_WAKEUP;
+ 	}
+ 
+ 	hrtick_update(rq);
+@@ -1098,6 +1124,14 @@ static void yield_task_fair(struct rq *rq)
+ 
+ #ifdef CONFIG_SMP
+ 
++static void task_waking_fair(struct rq *rq, struct task_struct *p)
++{
++	struct sched_entity *se = &p->se;
++	struct cfs_rq *cfs_rq = cfs_rq_of(se);
++
++	se->vruntime -= cfs_rq->min_vruntime;
++}
++
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ /*
+  * effective_load() calculates the load change as seen from the root_task_group
+@@ -1216,6 +1250,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ 	 * effect of the currently running task from the load
+ 	 * of the current CPU:
+ 	 */
++	rcu_read_lock();
+ 	if (sync) {
+ 		tg = task_group(current);
+ 		weight = current->se.load.weight;
+@@ -1241,6 +1276,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ 	balanced = !this_load ||
+ 		100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
+ 		imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
++	rcu_read_unlock();
+ 
+ 	/*
+ 	 * If the currently running task will sleep within
+@@ -1348,6 +1384,56 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+ }
+ 
+ /*
++ * Try and locate an idle CPU in the sched_domain.
++ */
++static int select_idle_sibling(struct task_struct *p, int target)
++{
++	int cpu = smp_processor_id();
++	int prev_cpu = task_cpu(p);
++	struct sched_domain *sd;
++	int i;
++
++	/*
++	 * If the task is going to be woken-up on this cpu and if it is
++	 * already idle, then it is the right target.
++	 */
++	if (target == cpu && idle_cpu(cpu))
++		return cpu;
++
++	/*
++	 * If the task is going to be woken-up on the cpu where it previously
++	 * ran and if it is currently idle, then it the right target.
++	 */
++	if (target == prev_cpu && idle_cpu(prev_cpu))
++		return prev_cpu;
++
++	/*
++	 * Otherwise, iterate the domains and find an elegible idle cpu.
++	 */
++	for_each_domain(target, sd) {
++		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
++			break;
++
++		for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++			if (idle_cpu(i)) {
++				target = i;
++				break;
++			}
++		}
++
++		/*
++		 * Lets stop looking for an idle sibling when we reached
++		 * the domain that spans the current cpu and prev_cpu.
++		 */
++		if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
++		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
++			break;
++	}
++
++	return target;
++}
++
++/*
+  * sched_balance_self: balance the current task (running on cpu) in domains
+  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
+  * SD_BALANCE_EXEC.
+@@ -1358,7 +1444,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+  *
+  * preempt must be disabled.
+  */
+-static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
++static int
++select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
+ {
+ 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+ 	int cpu = smp_processor_id();
+@@ -1375,7 +1462,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ 		new_cpu = prev_cpu;
+ 	}
+ 
+-	rcu_read_lock();
+ 	for_each_domain(cpu, tmp) {
+ 		if (!(tmp->flags & SD_LOAD_BALANCE))
+ 			continue;
+@@ -1404,38 +1490,14 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ 				want_sd = 0;
+ 		}
+ 
+-		if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
+-			int candidate = -1, i;
+-
+-			if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
+-				candidate = cpu;
+-
+-			/*
+-			 * Check for an idle shared cache.
+-			 */
+-			if (tmp->flags & SD_PREFER_SIBLING) {
+-				if (candidate == cpu) {
+-					if (!cpu_rq(prev_cpu)->cfs.nr_running)
+-						candidate = prev_cpu;
+-				}
+-
+-				if (candidate == -1 || candidate == cpu) {
+-					for_each_cpu(i, sched_domain_span(tmp)) {
+-						if (!cpumask_test_cpu(i, &p->cpus_allowed))
+-							continue;
+-						if (!cpu_rq(i)->cfs.nr_running) {
+-							candidate = i;
+-							break;
+-						}
+-					}
+-				}
+-			}
+-
+-			if (candidate >= 0) {
+-				affine_sd = tmp;
+-				want_affine = 0;
+-				cpu = candidate;
+-			}
++		/*
++		 * If both cpu and prev_cpu are part of this domain,
++		 * cpu is a valid SD_WAKE_AFFINE target.
++		 */
++		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
++		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
++			affine_sd = tmp;
++			want_affine = 0;
+ 		}
+ 
+ 		if (!want_sd && !want_affine)
+@@ -1448,23 +1510,28 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ 			sd = tmp;
+ 	}
+ 
++#ifdef CONFIG_FAIR_GROUP_SCHED
+ 	if (sched_feat(LB_SHARES_UPDATE)) {
+ 		/*
+ 		 * Pick the largest domain to update shares over
+ 		 */
+ 		tmp = sd;
+-		if (affine_sd && (!tmp ||
+-				  cpumask_weight(sched_domain_span(affine_sd)) >
+-				  cpumask_weight(sched_domain_span(sd))))
++		if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
+ 			tmp = affine_sd;
+ 
+-		if (tmp)
++		if (tmp) {
++			spin_unlock(&rq->lock);
+ 			update_shares(tmp);
++			spin_lock(&rq->lock);
++		}
+ 	}
++#endif
+ 
+-	if (affine_sd && wake_affine(affine_sd, p, sync)) {
+-		new_cpu = cpu;
+-		goto out;
++	if (affine_sd) {
++		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
++			return select_idle_sibling(p, cpu);
++		else
++			return select_idle_sibling(p, prev_cpu);
+ 	}
+ 
+ 	while (sd) {
+@@ -1495,10 +1562,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ 
+ 		/* Now try balancing at a lower domain level of new_cpu */
+ 		cpu = new_cpu;
+-		weight = cpumask_weight(sched_domain_span(sd));
++		weight = sd->span_weight;
+ 		sd = NULL;
+ 		for_each_domain(cpu, tmp) {
+-			if (weight <= cpumask_weight(sched_domain_span(tmp)))
++			if (weight <= tmp->span_weight)
+ 				break;
+ 			if (tmp->flags & sd_flag)
+ 				sd = tmp;
+@@ -1506,8 +1573,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ 		/* while loop will break here if sd == NULL */
+ 	}
+ 
+-out:
+-	rcu_read_unlock();
+ 	return new_cpu;
+ }
+ #endif /* CONFIG_SMP */
+@@ -1911,28 +1976,32 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ }
+ 
+ /*
+- * Share the fairness runtime between parent and child, thus the
+- * total amount of pressure for CPU stays equal - new tasks
+- * get a chance to run but frequent forkers are not allowed to
+- * monopolize the CPU. Note: the parent runqueue is locked,
+- * the child is not running yet.
++ * called on fork with the child task as argument from the parent's context
++ *  - child not yet on the tasklist
++ *  - preemption disabled
+  */
+-static void task_new_fair(struct rq *rq, struct task_struct *p)
++static void task_fork_fair(struct task_struct *p)
+ {
+-	struct cfs_rq *cfs_rq = task_cfs_rq(p);
++	struct cfs_rq *cfs_rq = task_cfs_rq(current);
+ 	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+ 	int this_cpu = smp_processor_id();
++	struct rq *rq = this_rq();
++	unsigned long flags;
++
++	spin_lock_irqsave(&rq->lock, flags);
++
++	update_rq_clock(rq);
+ 
+-	sched_info_queued(p);
++	if (unlikely(task_cpu(p) != this_cpu))
++		__set_task_cpu(p, this_cpu);
+ 
+ 	update_curr(cfs_rq);
++
+ 	if (curr)
+ 		se->vruntime = curr->vruntime;
+ 	place_entity(cfs_rq, se, 1);
+ 
+-	/* 'curr' will be NULL if the child belongs to a different group */
+-	if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
+-			curr && entity_before(curr, se)) {
++	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
+ 		/*
+ 		 * Upon rescheduling, sched_class::put_prev_task() will place
+ 		 * 'current' within the tree based on its new key value.
+@@ -1941,7 +2010,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
+ 		resched_task(rq->curr);
+ 	}
+ 
+-	enqueue_task_fair(rq, p, 0);
++	se->vruntime -= cfs_rq->min_vruntime;
++
++	spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ 
+ /*
+@@ -1994,30 +2065,27 @@ static void set_curr_task_fair(struct rq *rq)
+ }
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-static void moved_group_fair(struct task_struct *p)
++static void moved_group_fair(struct task_struct *p, int on_rq)
+ {
+ 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ 
+ 	update_curr(cfs_rq);
+-	place_entity(cfs_rq, &p->se, 1);
++	if (!on_rq)
++		place_entity(cfs_rq, &p->se, 1);
+ }
+ #endif
+ 
+-unsigned int get_rr_interval_fair(struct task_struct *task)
++unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
+ {
+ 	struct sched_entity *se = &task->se;
+-	unsigned long flags;
+-	struct rq *rq;
+ 	unsigned int rr_interval = 0;
+ 
+ 	/*
+ 	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+ 	 * idle runqueue:
+ 	 */
+-	rq = task_rq_lock(task, &flags);
+ 	if (rq->cfs.load.weight)
+ 		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+-	task_rq_unlock(rq, &flags);
+ 
+ 	return rr_interval;
+ }
+@@ -2043,11 +2111,13 @@ static const struct sched_class fair_sched_class = {
+ 	.move_one_task		= move_one_task_fair,
+ 	.rq_online		= rq_online_fair,
+ 	.rq_offline		= rq_offline_fair,
++
++	.task_waking		= task_waking_fair,
+ #endif
+ 
+ 	.set_curr_task          = set_curr_task_fair,
+ 	.task_tick		= task_tick_fair,
+-	.task_new		= task_new_fair,
++	.task_fork		= task_fork_fair,
+ 
+ 	.prio_changed		= prio_changed_fair,
+ 	.switched_to		= switched_to_fair,
+diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
+index b133a28..93ad2e7 100644
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -6,7 +6,8 @@
+  */
+ 
+ #ifdef CONFIG_SMP
+-static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+ 	return task_cpu(p); /* IDLE tasks as never migrated */
+ }
+@@ -97,7 +98,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
+ 		check_preempt_curr(rq, p, 0);
+ }
+ 
+-unsigned int get_rr_interval_idle(struct task_struct *task)
++unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
+ {
+ 	return 0;
+ }
+diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
+index a4d790c..af24fab 100644
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -194,7 +194,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+ 	return rt_se->my_q;
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+ 
+ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+@@ -204,7 +204,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+ 
+ 	if (rt_rq->rt_nr_running) {
+ 		if (rt_se && !on_rt_rq(rt_se))
+-			enqueue_rt_entity(rt_se);
++			enqueue_rt_entity(rt_se, false);
+ 		if (rt_rq->highest_prio.curr < curr->prio)
+ 			resched_task(curr);
+ 	}
+@@ -803,7 +803,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+ 	dec_rt_group(rt_se, rt_rq);
+ }
+ 
+-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+ 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+ 	struct rt_prio_array *array = &rt_rq->active;
+@@ -819,7 +819,10 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
+ 	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+ 		return;
+ 
+-	list_add_tail(&rt_se->run_list, queue);
++	if (head)
++		list_add(&rt_se->run_list, queue);
++	else
++		list_add_tail(&rt_se->run_list, queue);
+ 	__set_bit(rt_se_prio(rt_se), array->bitmap);
+ 
+ 	inc_rt_tasks(rt_se, rt_rq);
+@@ -856,11 +859,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
+ 	}
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+ 	dequeue_rt_stack(rt_se);
+ 	for_each_sched_rt_entity(rt_se)
+-		__enqueue_rt_entity(rt_se);
++		__enqueue_rt_entity(rt_se, head);
+ }
+ 
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+@@ -871,21 +874,22 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+ 		struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ 
+ 		if (rt_rq && rt_rq->rt_nr_running)
+-			__enqueue_rt_entity(rt_se);
++			__enqueue_rt_entity(rt_se, false);
+ 	}
+ }
+ 
+ /*
+  * Adding/removing a task to/from a priority array:
+  */
+-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ 	struct sched_rt_entity *rt_se = &p->rt;
+ 
+ 	if (wakeup)
+ 		rt_se->timeout = 0;
+ 
+-	enqueue_rt_entity(rt_se);
++	enqueue_rt_entity(rt_se, head);
+ 
+ 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+ 		enqueue_pushable_task(rq, p);
+@@ -938,10 +942,9 @@ static void yield_task_rt(struct rq *rq)
+ #ifdef CONFIG_SMP
+ static int find_lowest_rq(struct task_struct *task);
+ 
+-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+-	struct rq *rq = task_rq(p);
+-
+ 	if (sd_flag != SD_BALANCE_WAKE)
+ 		return smp_processor_id();
+ 
+@@ -1485,7 +1488,7 @@ static void post_schedule_rt(struct rq *rq)
+  * If we are not running and we are not going to reschedule soon, we should
+  * try to push tasks away now
+  */
+-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
++static void task_woken_rt(struct rq *rq, struct task_struct *p)
+ {
+ 	if (!task_running(rq, p) &&
+ 	    !test_tsk_need_resched(rq->curr) &&
+@@ -1734,7 +1737,7 @@ static void set_curr_task_rt(struct rq *rq)
+ 	dequeue_pushable_task(rq, p);
+ }
+ 
+-unsigned int get_rr_interval_rt(struct task_struct *task)
++unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
+ {
+ 	/*
+ 	 * Time slice is 0 for SCHED_FIFO tasks
+@@ -1766,7 +1769,7 @@ static const struct sched_class rt_sched_class = {
+ 	.rq_offline             = rq_offline_rt,
+ 	.pre_schedule		= pre_schedule_rt,
+ 	.post_schedule		= post_schedule_rt,
+-	.task_wake_up		= task_wake_up_rt,
++	.task_woken		= task_woken_rt,
+ 	.switched_from		= switched_from_rt,
+ #endif
+ 
+diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
+index 0cccb6c..22cf21e 100644
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -369,11 +369,18 @@ static int function_stat_show(struct seq_file *m, void *v)
+ {
+ 	struct ftrace_profile *rec = v;
+ 	char str[KSYM_SYMBOL_LEN];
++	int ret = 0;
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+-	static DEFINE_MUTEX(mutex);
+ 	static struct trace_seq s;
+ 	unsigned long long avg;
+ #endif
++	mutex_lock(&ftrace_profile_lock);
++
++	/* we raced with function_profile_reset() */
++	if (unlikely(rec->counter == 0)) {
++		ret = -EBUSY;
++		goto out;
++	}
+ 
+ 	kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ 	seq_printf(m, "  %-30.30s  %10lu", str, rec->counter);
+@@ -383,17 +390,17 @@ static int function_stat_show(struct seq_file *m, void *v)
+ 	avg = rec->time;
+ 	do_div(avg, rec->counter);
+ 
+-	mutex_lock(&mutex);
+ 	trace_seq_init(&s);
+ 	trace_print_graph_duration(rec->time, &s);
+ 	trace_seq_puts(&s, "    ");
+ 	trace_print_graph_duration(avg, &s);
+ 	trace_print_seq(m, &s);
+-	mutex_unlock(&mutex);
+ #endif
+ 	seq_putc(m, '\n');
++out:
++	mutex_unlock(&ftrace_profile_lock);
+ 
+-	return 0;
++	return ret;
+ }
+ 
+ static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
+@@ -1473,6 +1480,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
+ 		if (*pos > 0)
+ 			return t_hash_start(m, pos);
+ 		iter->flags |= FTRACE_ITER_PRINTALL;
++		/* reset in case of seek/pread */
++		iter->flags &= ~FTRACE_ITER_HASH;
+ 		return iter;
+ 	}
+ 
+@@ -2393,7 +2402,7 @@ static const struct file_operations ftrace_filter_fops = {
+ 	.open = ftrace_filter_open,
+ 	.read = seq_read,
+ 	.write = ftrace_filter_write,
+-	.llseek = ftrace_regex_lseek,
++	.llseek = no_llseek,
+ 	.release = ftrace_filter_release,
+ };
+ 
+diff --git a/mm/bounce.c b/mm/bounce.c
+index a2b76a5..1d5fa08 100644
+--- a/mm/bounce.c
++++ b/mm/bounce.c
+@@ -115,8 +115,8 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
+ 		 */
+ 		vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
+ 
+-		flush_dcache_page(tovec->bv_page);
+ 		bounce_copy_vec(tovec, vfrom);
++		flush_dcache_page(tovec->bv_page);
+ 	}
+ }
+ 
+diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
+index 2047465..6d27a5b 100644
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -551,19 +551,19 @@ static inline int pageblock_free(struct page *page)
+ /* Return the start of the next active pageblock after a given page */
+ static struct page *next_active_pageblock(struct page *page)
+ {
+-	int pageblocks_stride;
+-
+ 	/* Ensure the starting page is pageblock-aligned */
+ 	BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+ 
+-	/* Move forward by at least 1 * pageblock_nr_pages */
+-	pageblocks_stride = 1;
+-
+ 	/* If the entire pageblock is free, move to the end of free page */
+-	if (pageblock_free(page))
+-		pageblocks_stride += page_order(page) - pageblock_order;
++	if (pageblock_free(page)) {
++		int order;
++		/* be careful. we don't have locks, page_order can be changed.*/
++		order = page_order(page);
++		if ((order < MAX_ORDER) && (order >= pageblock_order))
++			return page + (1 << order);
++	}
+ 
+-	return page + (pageblocks_stride * pageblock_nr_pages);
++	return page + pageblock_nr_pages;
+ }
+ 
+ /* Checks if this range of memory is likely to be hot-removable. */
+diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
+index 315ead3..cfef331 100644
+--- a/net/irda/irlan/irlan_common.c
++++ b/net/irda/irlan/irlan_common.c
+@@ -1101,7 +1101,7 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len)
+ 	memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */
+ 	le16_to_cpus(&val_len); n+=2;
+ 
+-	if (val_len > 1016) {
++	if (val_len >= 1016) {
+ 		IRDA_DEBUG(2, "%s(), parameter length to long\n", __func__ );
+ 		return -RSP_INVALID_COMMAND_FORMAT;
+ 	}
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index 2370ab4..4c32700 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -717,17 +717,18 @@ gss_pipe_release(struct inode *inode)
+ 	struct rpc_inode *rpci = RPC_I(inode);
+ 	struct gss_upcall_msg *gss_msg;
+ 
++restart:
+ 	spin_lock(&inode->i_lock);
+-	while (!list_empty(&rpci->in_downcall)) {
++	list_for_each_entry(gss_msg, &rpci->in_downcall, list) {
+ 
+-		gss_msg = list_entry(rpci->in_downcall.next,
+-				struct gss_upcall_msg, list);
++		if (!list_empty(&gss_msg->msg.list))
++			continue;
+ 		gss_msg->msg.errno = -EPIPE;
+ 		atomic_inc(&gss_msg->count);
+ 		__gss_unhash_msg(gss_msg);
+ 		spin_unlock(&inode->i_lock);
+ 		gss_release_msg(gss_msg);
+-		spin_lock(&inode->i_lock);
++		goto restart;
+ 	}
+ 	spin_unlock(&inode->i_lock);
+ 
+diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
+index 27a2378..ea1e6de 100644
+--- a/net/sunrpc/rpc_pipe.c
++++ b/net/sunrpc/rpc_pipe.c
+@@ -47,7 +47,7 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
+ 		return;
+ 	do {
+ 		msg = list_entry(head->next, struct rpc_pipe_msg, list);
+-		list_del(&msg->list);
++		list_del_init(&msg->list);
+ 		msg->errno = err;
+ 		destroy_msg(msg);
+ 	} while (!list_empty(head));
+@@ -207,7 +207,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
+ 	if (msg != NULL) {
+ 		spin_lock(&inode->i_lock);
+ 		msg->errno = -EAGAIN;
+-		list_del(&msg->list);
++		list_del_init(&msg->list);
+ 		spin_unlock(&inode->i_lock);
+ 		rpci->ops->destroy_msg(msg);
+ 	}
+@@ -267,7 +267,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
+ 	if (res < 0 || msg->len == msg->copied) {
+ 		filp->private_data = NULL;
+ 		spin_lock(&inode->i_lock);
+-		list_del(&msg->list);
++		list_del_init(&msg->list);
+ 		spin_unlock(&inode->i_lock);
+ 		rpci->ops->destroy_msg(msg);
+ 	}
+diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
+index 6a60c5a..62cfc0c 100644
+--- a/net/wireless/wext-compat.c
++++ b/net/wireless/wext-compat.c
+@@ -1358,6 +1358,9 @@ int cfg80211_wext_giwessid(struct net_device *dev,
+ {
+ 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+ 
++	data->flags = 0;
++	data->length = 0;
++
+ 	switch (wdev->iftype) {
+ 	case NL80211_IFTYPE_ADHOC:
+ 		return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
+diff --git a/net/wireless/wext.c b/net/wireless/wext.c
+index 60fe577..fddcf9c 100644
+--- a/net/wireless/wext.c
++++ b/net/wireless/wext.c
+@@ -854,6 +854,22 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
+ 		}
+ 	}
+ 
++	if (IW_IS_GET(cmd) && !(descr->flags & IW_DESCR_FLAG_NOMAX)) {
++		/*
++		 * If this is a GET, but not NOMAX, it means that the extra
++		 * data is not bounded by userspace, but by max_tokens. Thus
++		 * set the length to max_tokens. This matches the extra data
++		 * allocation.
++		 * The driver should fill it with the number of tokens it
++		 * provided, and it may check iwp->length rather than having
++		 * knowledge of max_tokens. If the driver doesn't change the
++		 * iwp->length, this ioctl just copies back max_token tokens
++		 * filled with zeroes. Hopefully the driver isn't claiming
++		 * them to be valid data.
++		 */
++		iwp->length = descr->max_tokens;
++	}
++
+ 	err = handler(dev, info, (union iwreq_data *) iwp, extra);
+ 
+ 	iwp->length += essid_compat;
+diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
+index d0d721c..1f133fe 100644
+--- a/sound/core/seq/oss/seq_oss_init.c
++++ b/sound/core/seq/oss/seq_oss_init.c
+@@ -280,13 +280,10 @@ snd_seq_oss_open(struct file *file, int level)
+ 	return 0;
+ 
+  _error:
+-	snd_seq_oss_writeq_delete(dp->writeq);
+-	snd_seq_oss_readq_delete(dp->readq);
+ 	snd_seq_oss_synth_cleanup(dp);
+ 	snd_seq_oss_midi_cleanup(dp);
+-	delete_port(dp);
+ 	delete_seq_queue(dp->queue);
+-	kfree(dp);
++	delete_port(dp);
+ 
+ 	return rc;
+ }
+@@ -349,8 +346,10 @@ create_port(struct seq_oss_devinfo *dp)
+ static int
+ delete_port(struct seq_oss_devinfo *dp)
+ {
+-	if (dp->port < 0)
++	if (dp->port < 0) {
++		kfree(dp);
+ 		return 0;
++	}
+ 
+ 	debug_printk(("delete_port %i\n", dp->port));
+ 	return snd_seq_event_port_detach(dp->cseq, dp->port);
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 7b4e74d..06c118c 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -6589,7 +6589,7 @@ static struct hda_input_mux alc883_lenovo_nb0763_capture_source = {
+ 	.num_items = 4,
+ 	.items = {
+ 		{ "Mic", 0x0 },
+-		{ "iMic", 0x1 },
++		{ "Int Mic", 0x1 },
+ 		{ "Line", 0x2 },
+ 		{ "CD", 0x4 },
+ 	},
+@@ -8038,8 +8038,8 @@ static struct snd_kcontrol_new alc883_lenovo_nb0763_mixer[] = {
+ 	HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+ 	HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+ 	HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+-	HDA_CODEC_VOLUME("iMic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+-	HDA_CODEC_MUTE("iMic Playback Switch", 0x0b, 0x1, HDA_INPUT),
++	HDA_CODEC_VOLUME("Int Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
++	HDA_CODEC_MUTE("Int Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+ 	{ } /* end */
+ };
+ 
+@@ -12389,6 +12389,9 @@ static int alc268_new_analog_output(struct alc_spec *spec, hda_nid_t nid,
+ 		dac = 0x02;
+ 		break;
+ 	case 0x15:
++	case 0x1a: /* ALC259/269 only */
++	case 0x1b: /* ALC259/269 only */
++	case 0x21: /* ALC269vb has this pin, too */
+ 		dac = 0x03;
+ 		break;
+ 	default:
+diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
+index a31a8cd..3c6d141 100644
+--- a/tools/perf/util/callchain.h
++++ b/tools/perf/util/callchain.h
+@@ -49,6 +49,7 @@ static inline void callchain_init(struct callchain_node *node)
+ 	INIT_LIST_HEAD(&node->children);
+ 	INIT_LIST_HEAD(&node->val);
+ 
++	node->children_hit = 0;
+ 	node->parent = NULL;
+ 	node->hit = 0;
+ }

Added: dists/sid/linux-2.6/debian/patches/debian/revert-sched-2.6.32.22-changes.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/revert-sched-2.6.32.22-changes.patch	Mon Sep 20 23:25:04 2010	(r16338)
@@ -0,0 +1,1802 @@
+Subject: [PATCH] Revert 2.6.32.22 sched changes
+
+Revert all changes between 2.6.32.21 and 2.6.32.22 in the files:
+
+kernel/sched*.c
+kernel/cpu.c
+include/linux/cpuset.h
+include/linux/sched.h
+include/linux/topology.h
+
+This is a temporary measure for OpenVZ and VServer until they are
+rebased on top of 2.6.32.22.
+
+diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
+index a73454a..a5740fc 100644
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,7 +21,8 @@ extern int number_of_cpusets;	/* How many cpusets are defined in system? */
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+-extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
++extern void cpuset_cpus_allowed_locked(struct task_struct *p,
++				       struct cpumask *mask);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -68,6 +69,9 @@ struct seq_file;
+ extern void cpuset_task_status_allowed(struct seq_file *m,
+ 					struct task_struct *task);
+ 
++extern void cpuset_lock(void);
++extern void cpuset_unlock(void);
++
+ extern int cpuset_mem_spread_node(void);
+ 
+ static inline int cpuset_do_page_mem_spread(void)
+@@ -101,11 +105,10 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
+ {
+ 	cpumask_copy(mask, cpu_possible_mask);
+ }
+-
+-static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
++static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
++					      struct cpumask *mask)
+ {
+-	cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+-	return cpumask_any(cpu_active_mask);
++	cpumask_copy(mask, cpu_possible_mask);
+ }
+ 
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+@@ -154,6 +157,9 @@ static inline void cpuset_task_status_allowed(struct seq_file *m,
+ {
+ }
+ 
++static inline void cpuset_lock(void) {}
++static inline void cpuset_unlock(void) {}
++
+ static inline int cpuset_mem_spread_node(void)
+ {
+ 	return 0;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 1184379..4b3dbc7 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);
+ 
+ 
+ extern void calc_global_load(void);
++extern u64 cpu_nr_migrations(int cpu);
+ 
+ extern unsigned long get_parent_ip(unsigned long addr);
+ 
+@@ -1071,12 +1072,10 @@ struct sched_domain;
+ #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
+ #define WF_FORK		0x02		/* child wakeup after fork */
+ 
+-#ifndef __GENKSYMS__
+ struct sched_class {
+ 	const struct sched_class *next;
+ 
+-	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
+-			      bool head);
++	void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
+ 	void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ 	void (*yield_task) (struct rq *rq);
+ 
+@@ -1086,8 +1085,7 @@ struct sched_class {
+ 	void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+ 
+ #ifdef CONFIG_SMP
+-	int  (*select_task_rq)(struct rq *rq, struct task_struct *p,
+-			       int sd_flag, int flags);
++	int  (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
+ 
+ 	unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
+ 			struct rq *busiest, unsigned long max_load_move,
+@@ -1099,8 +1097,7 @@ struct sched_class {
+ 			      enum cpu_idle_type idle);
+ 	void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ 	void (*post_schedule) (struct rq *this_rq);
+-	void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+-	void (*task_woken) (struct rq *this_rq, struct task_struct *task);
++	void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+ 
+ 	void (*set_cpus_allowed)(struct task_struct *p,
+ 				 const struct cpumask *newmask);
+@@ -1111,7 +1108,7 @@ struct sched_class {
+ 
+ 	void (*set_curr_task) (struct rq *rq);
+ 	void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+-	void (*task_fork) (struct task_struct *p);
++	void (*task_new) (struct rq *rq, struct task_struct *p);
+ 
+ 	void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ 			       int running);
+@@ -1120,53 +1117,12 @@ struct sched_class {
+ 	void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ 			     int oldprio, int running);
+ 
+-	unsigned int (*get_rr_interval) (struct rq *rq,
+-					 struct task_struct *task);
++	unsigned int (*get_rr_interval) (struct task_struct *task);
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-	void (*moved_group) (struct task_struct *p, int on_rq);
+-#endif
+-};
+-#else /* __GENKSYMS__ */
+-/*
+- * struct sched_class is private to the scheduler, but since it is
+- * defined here it affects the symbol version of many exported symbols.
+- * This is a fake definition purely to keep symbol versions stable.
+- */
+-struct sched_class {
+-	const struct sched_class *next;
+-	void (*enqueue_task) (struct rq *, struct task_struct *, int);
+-	void (*dequeue_task) (struct rq *, struct task_struct *, int);
+-	void (*yield_task) (struct rq *);
+-	void (*check_preempt_curr) (struct rq *, struct task_struct *, int);
+-	struct task_struct * (*pick_next_task) (struct rq *);
+-	void (*put_prev_task) (struct rq *, struct task_struct *);
+-#ifdef CONFIG_SMP
+-	int  (*select_task_rq)(struct task_struct *, int, int);
+-	unsigned long (*load_balance) (struct rq *, int, struct rq *,
+-				       unsigned long, struct sched_domain *,
+-				       enum cpu_idle_type, int *, int *);
+-	int (*move_one_task) (struct rq *, int, struct rq *,
+-			      struct sched_domain *, enum cpu_idle_type);
+-	void (*pre_schedule) (struct rq *, struct task_struct *);
+-	void (*post_schedule) (struct rq *);
+-	void (*task_wake_up) (struct rq *, struct task_struct *);
+-	void (*set_cpus_allowed)(struct task_struct *, const struct cpumask *);
+-	void (*rq_online)(struct rq *);
+-	void (*rq_offline)(struct rq *);
+-#endif
+-	void (*set_curr_task) (struct rq *);
+-	void (*task_tick) (struct rq *, struct task_struct *, int);
+-	void (*task_new) (struct rq *, struct task_struct *);
+-	void (*switched_from) (struct rq *, struct task_struct *, int);
+-	void (*switched_to) (struct rq *, struct task_struct *, int);
+-	void (*prio_changed) (struct rq *, struct task_struct *, int, int);
+-	unsigned int (*get_rr_interval) (struct task_struct *);
+-#ifdef CONFIG_FAIR_GROUP_SCHED
+-	void (*moved_group) (struct task_struct *);
++	void (*moved_group) (struct task_struct *p);
+ #endif
+ };
+-#endif /* __GENKSYMS__ */
+ 
+ struct load_weight {
+ 	unsigned long weight, inv_weight;
+@@ -1225,6 +1181,7 @@ struct sched_entity {
+ 	u64			nr_failed_migrations_running;
+ 	u64			nr_failed_migrations_hot;
+ 	u64			nr_forced_migrations;
++	u64			nr_forced2_migrations;
+ 
+ 	u64			nr_wakeups;
+ 	u64			nr_wakeups_sync;
+@@ -1933,7 +1890,6 @@ extern void sched_clock_idle_sleep_event(void);
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
+-extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
+ extern void idle_task_exit(void);
+ #else
+ static inline void idle_task_exit(void) {}
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 5b81156..57e6357 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
+ 				| 1*SD_WAKE_AFFINE			\
+ 				| 1*SD_SHARE_CPUPOWER			\
+ 				| 0*SD_POWERSAVINGS_BALANCE		\
+-				| 1*SD_SHARE_PKG_RESOURCES		\
++				| 0*SD_SHARE_PKG_RESOURCES		\
+ 				| 0*SD_SERIALIZE			\
+ 				| 0*SD_PREFER_SIBLING			\
+ 				,					\
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 7e8b6ac..291ac58 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -151,7 +151,7 @@ static inline void check_for_tasks(int cpu)
+ 
+ 	write_lock_irq(&tasklist_lock);
+ 	for_each_process(p) {
+-		if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
++		if (task_cpu(p) == cpu &&
+ 		    (!cputime_eq(p->utime, cputime_zero) ||
+ 		     !cputime_eq(p->stime, cputime_zero)))
+ 			printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+@@ -163,7 +163,6 @@ static inline void check_for_tasks(int cpu)
+ }
+ 
+ struct take_cpu_down_param {
+-	struct task_struct *caller;
+ 	unsigned long mod;
+ 	void *hcpu;
+ };
+@@ -172,7 +171,6 @@ struct take_cpu_down_param {
+ static int __ref take_cpu_down(void *_param)
+ {
+ 	struct take_cpu_down_param *param = _param;
+-	unsigned int cpu = (unsigned long)param->hcpu;
+ 	int err;
+ 
+ 	/* Ensure this CPU doesn't handle any more interrupts. */
+@@ -183,8 +181,6 @@ static int __ref take_cpu_down(void *_param)
+ 	raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ 				param->hcpu);
+ 
+-	if (task_cpu(param->caller) == cpu)
+-		move_task_off_dead_cpu(cpu, param->caller);
+ 	/* Force idle task to run as soon as we yield: it should
+ 	   immediately notice cpu is offline and die quickly. */
+ 	sched_idle_next();
+@@ -195,10 +191,10 @@ static int __ref take_cpu_down(void *_param)
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+ 	int err, nr_calls = 0;
++	cpumask_var_t old_allowed;
+ 	void *hcpu = (void *)(long)cpu;
+ 	unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ 	struct take_cpu_down_param tcd_param = {
+-		.caller = current,
+ 		.mod = mod,
+ 		.hcpu = hcpu,
+ 	};
+@@ -209,8 +205,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 	if (!cpu_online(cpu))
+ 		return -EINVAL;
+ 
++	if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
++		return -ENOMEM;
++
+ 	cpu_hotplug_begin();
+-	set_cpu_active(cpu, false);
+ 	err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+ 					hcpu, -1, &nr_calls);
+ 	if (err == NOTIFY_BAD) {
+@@ -225,6 +223,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 		goto out_release;
+ 	}
+ 
++	/* Ensure that we are not runnable on dying cpu */
++	cpumask_copy(old_allowed, &current->cpus_allowed);
++	set_cpus_allowed_ptr(current, cpu_active_mask);
++
+ 	err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ 	if (err) {
+ 		set_cpu_active(cpu, true);
+@@ -233,7 +235,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 					    hcpu) == NOTIFY_BAD)
+ 			BUG();
+ 
+-		goto out_release;
++		goto out_allowed;
+ 	}
+ 	BUG_ON(cpu_online(cpu));
+ 
+@@ -251,6 +253,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ 
+ 	check_for_tasks(cpu);
+ 
++out_allowed:
++	set_cpus_allowed_ptr(current, old_allowed);
+ out_release:
+ 	cpu_hotplug_done();
+ 	if (!err) {
+@@ -258,6 +262,7 @@ out_release:
+ 					    hcpu) == NOTIFY_BAD)
+ 			BUG();
+ 	}
++	free_cpumask_var(old_allowed);
+ 	return err;
+ }
+ 
+@@ -275,6 +280,18 @@ int __ref cpu_down(unsigned int cpu)
+ 		goto out;
+ 	}
+ 
++	set_cpu_active(cpu, false);
++
++	/*
++	 * Make sure the all cpus did the reschedule and are not
++	 * using stale version of the cpu_active_mask.
++	 * This is not strictly necessary becuase stop_machine()
++	 * that we run down the line already provides the required
++	 * synchronization. But it's really a side effect and we do not
++	 * want to depend on the innards of the stop_machine here.
++	 */
++	synchronize_sched();
++
+ 	err = _cpu_down(cpu, 0);
+ 
+ out:
+@@ -365,12 +382,19 @@ int disable_nonboot_cpus(void)
+ 		return error;
+ 	cpu_maps_update_begin();
+ 	first_cpu = cpumask_first(cpu_online_mask);
+-	/*
+-	 * We take down all of the non-boot CPUs in one shot to avoid races
++	/* We take down all of the non-boot CPUs in one shot to avoid races
+ 	 * with the userspace trying to use the CPU hotplug at the same time
+ 	 */
+ 	cpumask_clear(frozen_cpus);
+ 
++	for_each_online_cpu(cpu) {
++		if (cpu == first_cpu)
++			continue;
++		set_cpu_active(cpu, false);
++	}
++
++	synchronize_sched();
++
+ 	printk("Disabling non-boot CPUs ...\n");
+ 	for_each_online_cpu(cpu) {
+ 		if (cpu == first_cpu)
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 98d4048..9990074 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -542,6 +542,7 @@ struct rq {
+ 	struct load_weight load;
+ 	unsigned long nr_load_updates;
+ 	u64 nr_switches;
++	u64 nr_migrations_in;
+ 
+ 	struct cfs_rq cfs;
+ 	struct rt_rq rt;
+@@ -942,25 +943,14 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+ 
+ /*
+- * Check whether the task is waking, we use this to synchronize ->cpus_allowed
+- * against ttwu().
+- */
+-static inline int task_is_waking(struct task_struct *p)
+-{
+-	return unlikely(p->state == TASK_WAKING);
+-}
+-
+-/*
+  * __task_rq_lock - lock the runqueue a given task resides on.
+  * Must be called interrupts disabled.
+  */
+ static inline struct rq *__task_rq_lock(struct task_struct *p)
+ 	__acquires(rq->lock)
+ {
+-	struct rq *rq;
+-
+ 	for (;;) {
+-		rq = task_rq(p);
++		struct rq *rq = task_rq(p);
+ 		spin_lock(&rq->lock);
+ 		if (likely(rq == task_rq(p)))
+ 			return rq;
+@@ -1832,20 +1822,6 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
+ static void calc_load_account_active(struct rq *this_rq);
+ static void update_sysctl(void);
+ 
+-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+-{
+-	set_task_rq(p, cpu);
+-#ifdef CONFIG_SMP
+-	/*
+-	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+-	 * successfuly executed on another CPU. We must ensure that updates of
+-	 * per-task data have been completed by this moment.
+-	 */
+-	smp_wmb();
+-	task_thread_info(p)->cpu = cpu;
+-#endif
+-}
+-
+ #include "sched_stats.h"
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+@@ -1895,14 +1871,13 @@ static void update_avg(u64 *avg, u64 sample)
+ 	*avg += diff >> 3;
+ }
+ 
+-static void
+-enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
++static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
+ {
+ 	if (wakeup)
+ 		p->se.start_runtime = p->se.sum_exec_runtime;
+ 
+ 	sched_info_queued(p);
+-	p->sched_class->enqueue_task(rq, p, wakeup, head);
++	p->sched_class->enqueue_task(rq, p, wakeup);
+ 	p->se.on_rq = 1;
+ }
+ 
+@@ -1978,7 +1953,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+ 	if (task_contributes_to_load(p))
+ 		rq->nr_uninterruptible--;
+ 
+-	enqueue_task(rq, p, wakeup, false);
++	enqueue_task(rq, p, wakeup);
+ 	inc_nr_running(rq);
+ }
+ 
+@@ -2003,6 +1978,20 @@ inline int task_curr(const struct task_struct *p)
+ 	return cpu_curr(task_cpu(p)) == p;
+ }
+ 
++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++	set_task_rq(p, cpu);
++#ifdef CONFIG_SMP
++	/*
++	 * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
++	 * successfuly executed on another CPU. We must ensure that updates of
++	 * per-task data have been completed by this moment.
++	 */
++	smp_wmb();
++	task_thread_info(p)->cpu = cpu;
++#endif
++}
++
+ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ 				       const struct sched_class *prev_class,
+ 				       int oldprio, int running)
+@@ -2029,15 +2018,21 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+  */
+ void kthread_bind(struct task_struct *p, unsigned int cpu)
+ {
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
+ 	/* Must have done schedule() in kthread() before we set_task_cpu */
+ 	if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+ 		WARN_ON(1);
+ 		return;
+ 	}
+ 
++	spin_lock_irqsave(&rq->lock, flags);
++	set_task_cpu(p, cpu);
+ 	p->cpus_allowed = cpumask_of_cpu(cpu);
+ 	p->rt.nr_cpus_allowed = 1;
+ 	p->flags |= PF_THREAD_BOUND;
++	spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ EXPORT_SYMBOL(kthread_bind);
+ 
+@@ -2075,23 +2070,35 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ {
+ 	int old_cpu = task_cpu(p);
++	struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
++	struct cfs_rq *old_cfsrq = task_cfs_rq(p),
++		      *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
++	u64 clock_offset;
+ 
+-#ifdef CONFIG_SCHED_DEBUG
+-	/*
+-	 * We should never call set_task_cpu() on a blocked task,
+-	 * ttwu() will sort out the placement.
+-	 */
+-	WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
+-			!(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+-#endif
++	clock_offset = old_rq->clock - new_rq->clock;
+ 
+ 	trace_sched_migrate_task(p, new_cpu);
+ 
++#ifdef CONFIG_SCHEDSTATS
++	if (p->se.wait_start)
++		p->se.wait_start -= clock_offset;
++	if (p->se.sleep_start)
++		p->se.sleep_start -= clock_offset;
++	if (p->se.block_start)
++		p->se.block_start -= clock_offset;
++#endif
+ 	if (old_cpu != new_cpu) {
+ 		p->se.nr_migrations++;
++		new_rq->nr_migrations_in++;
++#ifdef CONFIG_SCHEDSTATS
++		if (task_hot(p, old_rq->clock, NULL))
++			schedstat_inc(p, se.nr_forced2_migrations);
++#endif
+ 		perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+ 				     1, 1, NULL, 0);
+ 	}
++	p->se.vruntime -= old_cfsrq->min_vruntime -
++					 new_cfsrq->min_vruntime;
+ 
+ 	__set_task_cpu(p, new_cpu);
+ }
+@@ -2324,69 +2331,6 @@ void task_oncpu_function_call(struct task_struct *p,
+ 	preempt_enable();
+ }
+ 
+-#ifdef CONFIG_SMP
+-/*
+- * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
+- */
+-static int select_fallback_rq(int cpu, struct task_struct *p)
+-{
+-	int dest_cpu;
+-	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+-
+-	/* Look for allowed, online CPU in same node. */
+-	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+-		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+-			return dest_cpu;
+-
+-	/* Any allowed, online CPU? */
+-	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+-	if (dest_cpu < nr_cpu_ids)
+-		return dest_cpu;
+-
+-	/* No more Mr. Nice Guy. */
+-	if (unlikely(dest_cpu >= nr_cpu_ids)) {
+-		dest_cpu = cpuset_cpus_allowed_fallback(p);
+-		/*
+-		 * Don't tell them about moving exiting tasks or
+-		 * kernel threads (both mm NULL), since they never
+-		 * leave kernel.
+-		 */
+-		if (p->mm && printk_ratelimit()) {
+-			printk(KERN_INFO "process %d (%s) no "
+-			       "longer affine to cpu%d\n",
+-			       task_pid_nr(p), p->comm, cpu);
+-		}
+-	}
+-
+-	return dest_cpu;
+-}
+-
+-/*
+- * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
+- */
+-static inline
+-int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+-{
+-	int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+-
+-	/*
+-	 * In order not to call set_task_cpu() on a blocking task we need
+-	 * to rely on ttwu() to place the task on a valid ->cpus_allowed
+-	 * cpu.
+-	 *
+-	 * Since this is common to all placement strategies, this lives here.
+-	 *
+-	 * [ this allows ->select_task() to simply return task_cpu(p) and
+-	 *   not worry about this generic constraint ]
+-	 */
+-	if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+-		     !cpu_online(cpu)))
+-		cpu = select_fallback_rq(task_cpu(p), p);
+-
+-	return cpu;
+-}
+-#endif
+-
+ /***
+  * try_to_wake_up - wake up a thread
+  * @p: the to-be-woken-up thread
+@@ -2435,34 +2379,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
+ 	 *
+ 	 * First fix up the nr_uninterruptible count:
+ 	 */
+-	if (task_contributes_to_load(p)) {
+-		if (likely(cpu_online(orig_cpu)))
+-			rq->nr_uninterruptible--;
+-		else
+-			this_rq()->nr_uninterruptible--;
+-	}
++	if (task_contributes_to_load(p))
++		rq->nr_uninterruptible--;
+ 	p->state = TASK_WAKING;
++	task_rq_unlock(rq, &flags);
+ 
+-	if (p->sched_class->task_waking)
+-		p->sched_class->task_waking(rq, p);
+-
+-	cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
++	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+ 	if (cpu != orig_cpu)
+ 		set_task_cpu(p, cpu);
+-	__task_rq_unlock(rq);
+ 
+-	rq = cpu_rq(cpu);
+-	spin_lock(&rq->lock);
+-	update_rq_clock(rq);
++	rq = task_rq_lock(p, &flags);
++
++	if (rq != orig_rq)
++		update_rq_clock(rq);
+ 
+-	/*
+-	 * We migrated the task without holding either rq->lock, however
+-	 * since the task is not on the task list itself, nobody else
+-	 * will try and migrate the task, hence the rq should match the
+-	 * cpu we just moved it to.
+-	 */
+-	WARN_ON(task_cpu(p) != cpu);
+ 	WARN_ON(p->state != TASK_WAKING);
++	cpu = task_cpu(p);
+ 
+ #ifdef CONFIG_SCHEDSTATS
+ 	schedstat_inc(rq, ttwu_count);
+@@ -2515,8 +2447,8 @@ out_running:
+ 
+ 	p->state = TASK_RUNNING;
+ #ifdef CONFIG_SMP
+-	if (p->sched_class->task_woken)
+-		p->sched_class->task_woken(rq, p);
++	if (p->sched_class->task_wake_up)
++		p->sched_class->task_wake_up(rq, p);
+ 
+ 	if (unlikely(rq->idle_stamp)) {
+ 		u64 delta = rq->clock - rq->idle_stamp;
+@@ -2596,6 +2528,7 @@ static void __sched_fork(struct task_struct *p)
+ 	p->se.nr_failed_migrations_running	= 0;
+ 	p->se.nr_failed_migrations_hot		= 0;
+ 	p->se.nr_forced_migrations		= 0;
++	p->se.nr_forced2_migrations		= 0;
+ 
+ 	p->se.nr_wakeups			= 0;
+ 	p->se.nr_wakeups_sync			= 0;
+@@ -2616,6 +2549,14 @@ static void __sched_fork(struct task_struct *p)
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ 	INIT_HLIST_HEAD(&p->preempt_notifiers);
+ #endif
++
++	/*
++	 * We mark the process as running here, but have not actually
++	 * inserted it onto the runqueue yet. This guarantees that
++	 * nobody will actually run it, and a signal or other external
++	 * event cannot wake it up and insert it on the runqueue either.
++	 */
++	p->state = TASK_RUNNING;
+ }
+ 
+ /*
+@@ -2626,12 +2567,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ 	int cpu = get_cpu();
+ 
+ 	__sched_fork(p);
+-	/*
+-	 * We mark the process as running here. This guarantees that
+-	 * nobody will actually run it, and a signal or other external
+-	 * event cannot wake it up and insert it on the runqueue either.
+-	 */
+-	p->state = TASK_RUNNING;
+ 
+ 	/*
+ 	 * Revert to default priority/policy on fork if requested.
+@@ -2663,9 +2598,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ 	if (!rt_prio(p->prio))
+ 		p->sched_class = &fair_sched_class;
+ 
+-	if (p->sched_class->task_fork)
+-		p->sched_class->task_fork(p);
+-
++#ifdef CONFIG_SMP
++	cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
++#endif
+ 	set_task_cpu(p, cpu);
+ 
+ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+@@ -2695,38 +2630,28 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+ {
+ 	unsigned long flags;
+ 	struct rq *rq;
+-	int cpu = get_cpu();
+-
+-#ifdef CONFIG_SMP
+-	rq = task_rq_lock(p, &flags);
+-	p->state = TASK_WAKING;
+-
+-	/*
+-	 * Fork balancing, do it here and not earlier because:
+-	 *  - cpus_allowed can change in the fork path
+-	 *  - any previously selected cpu might disappear through hotplug
+-	 *
+-	 * We set TASK_WAKING so that select_task_rq() can drop rq->lock
+-	 * without people poking at ->cpus_allowed.
+-	 */
+-	cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
+-	set_task_cpu(p, cpu);
+-
+-	p->state = TASK_RUNNING;
+-	task_rq_unlock(rq, &flags);
+-#endif
+ 
+ 	rq = task_rq_lock(p, &flags);
++	BUG_ON(p->state != TASK_RUNNING);
+ 	update_rq_clock(rq);
+-	activate_task(rq, p, 0);
++
++	if (!p->sched_class->task_new || !current->se.on_rq) {
++		activate_task(rq, p, 0);
++	} else {
++		/*
++		 * Let the scheduling class do new task startup
++		 * management (if any):
++		 */
++		p->sched_class->task_new(rq, p);
++		inc_nr_running(rq);
++	}
+ 	trace_sched_wakeup_new(rq, p, 1);
+ 	check_preempt_curr(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+-	if (p->sched_class->task_woken)
+-		p->sched_class->task_woken(rq, p);
++	if (p->sched_class->task_wake_up)
++		p->sched_class->task_wake_up(rq, p);
+ #endif
+ 	task_rq_unlock(rq, &flags);
+-	put_cpu();
+ }
+ 
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+@@ -3113,6 +3038,15 @@ static void calc_load_account_active(struct rq *this_rq)
+ }
+ 
+ /*
++ * Externally visible per-cpu scheduler statistics:
++ * cpu_nr_migrations(cpu) - number of migrations into that cpu
++ */
++u64 cpu_nr_migrations(int cpu)
++{
++	return cpu_rq(cpu)->nr_migrations_in;
++}
++
++/*
+  * Update rq->cpu_load[] statistics. This function is usually called every
+  * scheduler tick (TICK_NSEC).
+  */
+@@ -3194,28 +3128,24 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ }
+ 
+ /*
+- * sched_exec - execve() is a valuable balancing opportunity, because at
+- * this point the task has the smallest effective memory and cache footprint.
++ * If dest_cpu is allowed for this process, migrate the task to it.
++ * This is accomplished by forcing the cpu_allowed mask to only
++ * allow dest_cpu, which will force the cpu onto dest_cpu. Then
++ * the cpu_allowed mask is restored.
+  */
+-void sched_exec(void)
++static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+ {
+-	struct task_struct *p = current;
+ 	struct migration_req req;
+ 	unsigned long flags;
+ 	struct rq *rq;
+-	int dest_cpu;
+ 
+ 	rq = task_rq_lock(p, &flags);
+-	dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+-	if (dest_cpu == smp_processor_id())
+-		goto unlock;
++	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
++	    || unlikely(!cpu_active(dest_cpu)))
++		goto out;
+ 
+-	/*
+-	 * select_task_rq() can race against ->cpus_allowed
+-	 */
+-	if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
+-	    likely(cpu_active(dest_cpu)) &&
+-	    migrate_task(p, dest_cpu, &req)) {
++	/* force the process onto the specified CPU */
++	if (migrate_task(p, dest_cpu, &req)) {
+ 		/* Need to wait for migration thread (might exit: take ref). */
+ 		struct task_struct *mt = rq->migration_thread;
+ 
+@@ -3227,11 +3157,24 @@ void sched_exec(void)
+ 
+ 		return;
+ 	}
+-unlock:
++out:
+ 	task_rq_unlock(rq, &flags);
+ }
+ 
+ /*
++ * sched_exec - execve() is a valuable balancing opportunity, because at
++ * this point the task has the smallest effective memory and cache footprint.
++ */
++void sched_exec(void)
++{
++	int new_cpu, this_cpu = get_cpu();
++	new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
++	put_cpu();
++	if (new_cpu != this_cpu)
++		sched_migrate_task(current, new_cpu);
++}
++
++/*
+  * pull_task - move a task from a remote runqueue to the local runqueue.
+  * Both runqueues must be locked.
+  */
+@@ -6031,15 +5974,14 @@ EXPORT_SYMBOL(wait_for_completion_killable);
+  */
+ bool try_wait_for_completion(struct completion *x)
+ {
+-	unsigned long flags;
+ 	int ret = 1;
+ 
+-	spin_lock_irqsave(&x->wait.lock, flags);
++	spin_lock_irq(&x->wait.lock);
+ 	if (!x->done)
+ 		ret = 0;
+ 	else
+ 		x->done--;
+-	spin_unlock_irqrestore(&x->wait.lock, flags);
++	spin_unlock_irq(&x->wait.lock);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(try_wait_for_completion);
+@@ -6054,13 +5996,12 @@ EXPORT_SYMBOL(try_wait_for_completion);
+  */
+ bool completion_done(struct completion *x)
+ {
+-	unsigned long flags;
+ 	int ret = 1;
+ 
+-	spin_lock_irqsave(&x->wait.lock, flags);
++	spin_lock_irq(&x->wait.lock);
+ 	if (!x->done)
+ 		ret = 0;
+-	spin_unlock_irqrestore(&x->wait.lock, flags);
++	spin_unlock_irq(&x->wait.lock);
+ 	return ret;
+ }
+ EXPORT_SYMBOL(completion_done);
+@@ -6154,7 +6095,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
+ 	if (running)
+ 		p->sched_class->set_curr_task(rq);
+ 	if (on_rq) {
+-		enqueue_task(rq, p, 0, oldprio < prio);
++		enqueue_task(rq, p, 0);
+ 
+ 		check_class_changed(rq, p, prev_class, oldprio, running);
+ 	}
+@@ -6198,7 +6139,7 @@ void set_user_nice(struct task_struct *p, long nice)
+ 	delta = p->prio - old_prio;
+ 
+ 	if (on_rq) {
+-		enqueue_task(rq, p, 0, false);
++		enqueue_task(rq, p, 0);
+ 		/*
+ 		 * If the task increased its priority or is running and
+ 		 * lowered its priority, then reschedule its CPU:
+@@ -6589,7 +6530,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ 		return -EINVAL;
+ 
+ 	retval = -ESRCH;
+-	rcu_read_lock();
++	read_lock(&tasklist_lock);
+ 	p = find_process_by_pid(pid);
+ 	if (p) {
+ 		retval = security_task_getscheduler(p);
+@@ -6597,7 +6538,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ 			retval = p->policy
+ 				| (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
+ 	}
+-	rcu_read_unlock();
++	read_unlock(&tasklist_lock);
+ 	return retval;
+ }
+ 
+@@ -6615,7 +6556,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ 	if (!param || pid < 0)
+ 		return -EINVAL;
+ 
+-	rcu_read_lock();
++	read_lock(&tasklist_lock);
+ 	p = find_process_by_pid(pid);
+ 	retval = -ESRCH;
+ 	if (!p)
+@@ -6626,7 +6567,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ 		goto out_unlock;
+ 
+ 	lp.sched_priority = p->rt_priority;
+-	rcu_read_unlock();
++	read_unlock(&tasklist_lock);
+ 
+ 	/*
+ 	 * This one might sleep, we cannot do it with a spinlock held ...
+@@ -6636,7 +6577,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ 	return retval;
+ 
+ out_unlock:
+-	rcu_read_unlock();
++	read_unlock(&tasklist_lock);
+ 	return retval;
+ }
+ 
+@@ -6647,18 +6588,22 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+ 	int retval;
+ 
+ 	get_online_cpus();
+-	rcu_read_lock();
++	read_lock(&tasklist_lock);
+ 
+ 	p = find_process_by_pid(pid);
+ 	if (!p) {
+-		rcu_read_unlock();
++		read_unlock(&tasklist_lock);
+ 		put_online_cpus();
+ 		return -ESRCH;
+ 	}
+ 
+-	/* Prevent p going away */
++	/*
++	 * It is not safe to call set_cpus_allowed with the
++	 * tasklist_lock held. We will bump the task_struct's
++	 * usage count and then drop tasklist_lock.
++	 */
+ 	get_task_struct(p);
+-	rcu_read_unlock();
++	read_unlock(&tasklist_lock);
+ 
+ 	if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+ 		retval = -ENOMEM;
+@@ -6739,12 +6684,10 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ {
+ 	struct task_struct *p;
+-	unsigned long flags;
+-	struct rq *rq;
+ 	int retval;
+ 
+ 	get_online_cpus();
+-	rcu_read_lock();
++	read_lock(&tasklist_lock);
+ 
+ 	retval = -ESRCH;
+ 	p = find_process_by_pid(pid);
+@@ -6755,12 +6698,10 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ 	if (retval)
+ 		goto out_unlock;
+ 
+-	rq = task_rq_lock(p, &flags);
+ 	cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
+-	task_rq_unlock(rq, &flags);
+ 
+ out_unlock:
+-	rcu_read_unlock();
++	read_unlock(&tasklist_lock);
+ 	put_online_cpus();
+ 
+ 	return retval;
+@@ -6999,8 +6940,6 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ {
+ 	struct task_struct *p;
+ 	unsigned int time_slice;
+-	unsigned long flags;
+-	struct rq *rq;
+ 	int retval;
+ 	struct timespec t;
+ 
+@@ -7008,7 +6947,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ 		return -EINVAL;
+ 
+ 	retval = -ESRCH;
+-	rcu_read_lock();
++	read_lock(&tasklist_lock);
+ 	p = find_process_by_pid(pid);
+ 	if (!p)
+ 		goto out_unlock;
+@@ -7017,17 +6956,15 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ 	if (retval)
+ 		goto out_unlock;
+ 
+-	rq = task_rq_lock(p, &flags);
+-	time_slice = p->sched_class->get_rr_interval(rq, p);
+-	task_rq_unlock(rq, &flags);
++	time_slice = p->sched_class->get_rr_interval(p);
+ 
+-	rcu_read_unlock();
++	read_unlock(&tasklist_lock);
+ 	jiffies_to_timespec(time_slice, &t);
+ 	retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
+ 	return retval;
+ 
+ out_unlock:
+-	rcu_read_unlock();
++	read_unlock(&tasklist_lock);
+ 	return retval;
+ }
+ 
+@@ -7118,7 +7055,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
+ 	spin_lock_irqsave(&rq->lock, flags);
+ 
+ 	__sched_fork(idle);
+-	idle->state = TASK_RUNNING;
+ 	idle->se.exec_start = sched_clock();
+ 
+ 	cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+@@ -7213,19 +7149,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ 	struct rq *rq;
+ 	int ret = 0;
+ 
+-	/*
+-	 * Serialize against TASK_WAKING so that ttwu() and wunt() can
+-	 * drop the rq->lock and still rely on ->cpus_allowed.
+-	 */
+-again:
+-	while (task_is_waking(p))
+-		cpu_relax();
+ 	rq = task_rq_lock(p, &flags);
+-	if (task_is_waking(p)) {
+-		task_rq_unlock(rq, &flags);
+-		goto again;
+-	}
+-
+ 	if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+ 		ret = -EINVAL;
+ 		goto out;
+@@ -7254,7 +7178,7 @@ again:
+ 
+ 		get_task_struct(mt);
+ 		task_rq_unlock(rq, &flags);
+-		wake_up_process(mt);
++		wake_up_process(rq->migration_thread);
+ 		put_task_struct(mt);
+ 		wait_for_completion(&req.done);
+ 		tlb_migrate_finish(p->mm);
+@@ -7281,7 +7205,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ {
+ 	struct rq *rq_dest, *rq_src;
+-	int ret = 0;
++	int ret = 0, on_rq;
+ 
+ 	if (unlikely(!cpu_active(dest_cpu)))
+ 		return ret;
+@@ -7293,17 +7217,19 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ 	/* Already moved. */
+ 	if (task_cpu(p) != src_cpu)
+ 		goto done;
++	/* Waking up, don't get in the way of try_to_wake_up(). */
++	if (p->state == TASK_WAKING)
++		goto fail;
+ 	/* Affinity changed (again). */
+ 	if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ 		goto fail;
+ 
+-	/*
+-	 * If we're not on a rq, the next wake-up will ensure we're
+-	 * placed properly.
+-	 */
+-	if (p->se.on_rq) {
++	on_rq = p->se.on_rq;
++	if (on_rq)
+ 		deactivate_task(rq_src, p, 0);
+-		set_task_cpu(p, dest_cpu);
++
++	set_task_cpu(p, dest_cpu);
++	if (on_rq) {
+ 		activate_task(rq_dest, p, 0);
+ 		check_preempt_curr(rq_dest, p, 0);
+ 	}
+@@ -7382,29 +7308,57 @@ static int migration_thread(void *data)
+ }
+ 
+ #ifdef CONFIG_HOTPLUG_CPU
++
++static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
++{
++	int ret;
++
++	local_irq_disable();
++	ret = __migrate_task(p, src_cpu, dest_cpu);
++	local_irq_enable();
++	return ret;
++}
++
+ /*
+  * Figure out where task on dead CPU should go, use force if necessary.
+  */
+-void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
++static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+-	struct rq *rq = cpu_rq(dead_cpu);
+-	int needs_cpu, uninitialized_var(dest_cpu);
+-	unsigned long flags;
++	int dest_cpu;
++	const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
+ 
+-	local_irq_save(flags);
++again:
++	/* Look for allowed, online CPU in same node. */
++	for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
++		if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
++			goto move;
+ 
+-	spin_lock(&rq->lock);
+-	needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
+-	if (needs_cpu)
+-		dest_cpu = select_fallback_rq(dead_cpu, p);
+-	spin_unlock(&rq->lock);
+-	/*
+-	 * It can only fail if we race with set_cpus_allowed(),
+-	 * in the racer should migrate the task anyway.
+-	 */
+-	if (needs_cpu)
+-		__migrate_task(p, dead_cpu, dest_cpu);
+-	local_irq_restore(flags);
++	/* Any allowed, online CPU? */
++	dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
++	if (dest_cpu < nr_cpu_ids)
++		goto move;
++
++	/* No more Mr. Nice Guy. */
++	if (dest_cpu >= nr_cpu_ids) {
++		cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
++		dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
++
++		/*
++		 * Don't tell them about moving exiting tasks or
++		 * kernel threads (both mm NULL), since they never
++		 * leave kernel.
++		 */
++		if (p->mm && printk_ratelimit()) {
++			printk(KERN_INFO "process %d (%s) no "
++			       "longer affine to cpu%d\n",
++			       task_pid_nr(p), p->comm, dead_cpu);
++		}
++	}
++
++move:
++	/* It can have affinity changed while we were choosing. */
++	if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
++		goto again;
+ }
+ 
+ /*
+@@ -7798,23 +7752,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ 		cpu_rq(cpu)->migration_thread = NULL;
+ 		break;
+ 
+-	case CPU_POST_DEAD:
+-		/*
+-		 * Bring the migration thread down in CPU_POST_DEAD event,
+-		 * since the timers should have got migrated by now and thus
+-		 * we should not see a deadlock between trying to kill the
+-		 * migration thread and the sched_rt_period_timer.
+-		 */
+-		rq = cpu_rq(cpu);
+-		kthread_stop(rq->migration_thread);
+-		put_task_struct(rq->migration_thread);
+-		rq->migration_thread = NULL;
+-		break;
+-
+ 	case CPU_DEAD:
+ 	case CPU_DEAD_FROZEN:
++		cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
+ 		migrate_live_tasks(cpu);
+ 		rq = cpu_rq(cpu);
++		kthread_stop(rq->migration_thread);
++		put_task_struct(rq->migration_thread);
++		rq->migration_thread = NULL;
+ 		/* Idle task back to normal (off runqueue, low prio) */
+ 		spin_lock_irq(&rq->lock);
+ 		update_rq_clock(rq);
+@@ -7823,6 +7768,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ 		rq->idle->sched_class = &idle_sched_class;
+ 		migrate_dead_tasks(cpu);
+ 		spin_unlock_irq(&rq->lock);
++		cpuset_unlock();
+ 		migrate_nr_uninterruptible(rq);
+ 		BUG_ON(rq->nr_running != 0);
+ 		calc_global_load_remove(rq);
+@@ -10153,13 +10099,13 @@ void sched_move_task(struct task_struct *tsk)
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ 	if (tsk->sched_class->moved_group)
+-		tsk->sched_class->moved_group(tsk, on_rq);
++		tsk->sched_class->moved_group(tsk);
+ #endif
+ 
+ 	if (unlikely(running))
+ 		tsk->sched_class->set_curr_task(rq);
+ 	if (on_rq)
+-		enqueue_task(rq, tsk, 0, false);
++		enqueue_task(rq, tsk, 0);
+ 
+ 	task_rq_unlock(rq, &flags);
+ }
+@@ -10931,30 +10877,12 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
+ }
+ 
+ /*
+- * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
+- * in cputime_t units. As a result, cpuacct_update_stats calls
+- * percpu_counter_add with values large enough to always overflow the
+- * per cpu batch limit causing bad SMP scalability.
+- *
+- * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
+- * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
+- * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
+- */
+-#ifdef CONFIG_SMP
+-#define CPUACCT_BATCH	\
+-	min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
+-#else
+-#define CPUACCT_BATCH	0
+-#endif
+-
+-/*
+  * Charge the system/user time to the task's accounting group.
+  */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+ 		enum cpuacct_stat_index idx, cputime_t val)
+ {
+ 	struct cpuacct *ca;
+-	int batch = CPUACCT_BATCH;
+ 
+ 	if (unlikely(!cpuacct_subsys.active))
+ 		return;
+@@ -10963,7 +10891,7 @@ static void cpuacct_update_stats(struct task_struct *tsk,
+ 	ca = task_ca(tsk);
+ 
+ 	do {
+-		__percpu_counter_add(&ca->cpustat[idx], val, batch);
++		percpu_counter_add(&ca->cpustat[idx], val);
+ 		ca = ca->parent;
+ 	} while (ca);
+ 	rcu_read_unlock();
+diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
+index 6f836a8..6988cf0 100644
+--- a/kernel/sched_debug.c
++++ b/kernel/sched_debug.c
+@@ -423,6 +423,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+ 	P(se.nr_failed_migrations_running);
+ 	P(se.nr_failed_migrations_hot);
+ 	P(se.nr_forced_migrations);
++	P(se.nr_forced2_migrations);
+ 	P(se.nr_wakeups);
+ 	P(se.nr_wakeups_sync);
+ 	P(se.nr_wakeups_migrate);
+@@ -498,6 +499,7 @@ void proc_sched_set_task(struct task_struct *p)
+ 	p->se.nr_failed_migrations_running	= 0;
+ 	p->se.nr_failed_migrations_hot		= 0;
+ 	p->se.nr_forced_migrations		= 0;
++	p->se.nr_forced2_migrations		= 0;
+ 	p->se.nr_wakeups			= 0;
+ 	p->se.nr_wakeups_sync			= 0;
+ 	p->se.nr_wakeups_migrate		= 0;
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index 623b876..d80812d 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -488,7 +488,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
+ 	curr->sum_exec_runtime += delta_exec;
+ 	schedstat_add(cfs_rq, exec_clock, delta_exec);
+ 	delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+-
+ 	curr->vruntime += delta_exec_weighted;
+ 	update_min_vruntime(cfs_rq);
+ }
+@@ -744,26 +743,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ 	se->vruntime = vruntime;
+ }
+ 
+-#define ENQUEUE_WAKEUP	1
+-#define ENQUEUE_MIGRATE 2
+-
+ static void
+-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
++enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+ {
+ 	/*
+-	 * Update the normalized vruntime before updating min_vruntime
+-	 * through callig update_curr().
+-	 */
+-	if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+-		se->vruntime += cfs_rq->min_vruntime;
+-
+-	/*
+ 	 * Update run-time statistics of the 'current'.
+ 	 */
+ 	update_curr(cfs_rq);
+ 	account_entity_enqueue(cfs_rq, se);
+ 
+-	if (flags & ENQUEUE_WAKEUP) {
++	if (wakeup) {
+ 		place_entity(cfs_rq, se, 0);
+ 		enqueue_sleeper(cfs_rq, se);
+ 	}
+@@ -817,14 +806,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
+ 		__dequeue_entity(cfs_rq, se);
+ 	account_entity_dequeue(cfs_rq, se);
+ 	update_min_vruntime(cfs_rq);
+-
+-	/*
+-	 * Normalize the entity after updating the min_vruntime because the
+-	 * update can refer to the ->curr item and we need to reflect this
+-	 * movement in our normalized position.
+-	 */
+-	if (!sleep)
+-		se->vruntime -= cfs_rq->min_vruntime;
+ }
+ 
+ /*
+@@ -1031,24 +1012,17 @@ static inline void hrtick_update(struct rq *rq)
+  * increased. Here we update the fair scheduling stats and
+  * then put the task into the rbtree:
+  */
+-static void
+-enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
++static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
+ {
+ 	struct cfs_rq *cfs_rq;
+ 	struct sched_entity *se = &p->se;
+-	int flags = 0;
+-
+-	if (wakeup)
+-		flags |= ENQUEUE_WAKEUP;
+-	if (p->state == TASK_WAKING)
+-		flags |= ENQUEUE_MIGRATE;
+ 
+ 	for_each_sched_entity(se) {
+ 		if (se->on_rq)
+ 			break;
+ 		cfs_rq = cfs_rq_of(se);
+-		enqueue_entity(cfs_rq, se, flags);
+-		flags = ENQUEUE_WAKEUP;
++		enqueue_entity(cfs_rq, se, wakeup);
++		wakeup = 1;
+ 	}
+ 
+ 	hrtick_update(rq);
+@@ -1124,14 +1098,6 @@ static void yield_task_fair(struct rq *rq)
+ 
+ #ifdef CONFIG_SMP
+ 
+-static void task_waking_fair(struct rq *rq, struct task_struct *p)
+-{
+-	struct sched_entity *se = &p->se;
+-	struct cfs_rq *cfs_rq = cfs_rq_of(se);
+-
+-	se->vruntime -= cfs_rq->min_vruntime;
+-}
+-
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ /*
+  * effective_load() calculates the load change as seen from the root_task_group
+@@ -1250,7 +1216,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ 	 * effect of the currently running task from the load
+ 	 * of the current CPU:
+ 	 */
+-	rcu_read_lock();
+ 	if (sync) {
+ 		tg = task_group(current);
+ 		weight = current->se.load.weight;
+@@ -1276,7 +1241,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ 	balanced = !this_load ||
+ 		100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
+ 		imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
+-	rcu_read_unlock();
+ 
+ 	/*
+ 	 * If the currently running task will sleep within
+@@ -1384,56 +1348,6 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+ }
+ 
+ /*
+- * Try and locate an idle CPU in the sched_domain.
+- */
+-static int select_idle_sibling(struct task_struct *p, int target)
+-{
+-	int cpu = smp_processor_id();
+-	int prev_cpu = task_cpu(p);
+-	struct sched_domain *sd;
+-	int i;
+-
+-	/*
+-	 * If the task is going to be woken-up on this cpu and if it is
+-	 * already idle, then it is the right target.
+-	 */
+-	if (target == cpu && idle_cpu(cpu))
+-		return cpu;
+-
+-	/*
+-	 * If the task is going to be woken-up on the cpu where it previously
+-	 * ran and if it is currently idle, then it the right target.
+-	 */
+-	if (target == prev_cpu && idle_cpu(prev_cpu))
+-		return prev_cpu;
+-
+-	/*
+-	 * Otherwise, iterate the domains and find an elegible idle cpu.
+-	 */
+-	for_each_domain(target, sd) {
+-		if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
+-			break;
+-
+-		for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
+-			if (idle_cpu(i)) {
+-				target = i;
+-				break;
+-			}
+-		}
+-
+-		/*
+-		 * Lets stop looking for an idle sibling when we reached
+-		 * the domain that spans the current cpu and prev_cpu.
+-		 */
+-		if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
+-		    cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
+-			break;
+-	}
+-
+-	return target;
+-}
+-
+-/*
+  * sched_balance_self: balance the current task (running on cpu) in domains
+  * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
+  * SD_BALANCE_EXEC.
+@@ -1444,8 +1358,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
+  *
+  * preempt must be disabled.
+  */
+-static int
+-select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
++static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
+ {
+ 	struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+ 	int cpu = smp_processor_id();
+@@ -1462,6 +1375,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ 		new_cpu = prev_cpu;
+ 	}
+ 
++	rcu_read_lock();
+ 	for_each_domain(cpu, tmp) {
+ 		if (!(tmp->flags & SD_LOAD_BALANCE))
+ 			continue;
+@@ -1490,14 +1404,38 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ 				want_sd = 0;
+ 		}
+ 
+-		/*
+-		 * If both cpu and prev_cpu are part of this domain,
+-		 * cpu is a valid SD_WAKE_AFFINE target.
+-		 */
+-		if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
+-		    cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+-			affine_sd = tmp;
+-			want_affine = 0;
++		if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
++			int candidate = -1, i;
++
++			if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
++				candidate = cpu;
++
++			/*
++			 * Check for an idle shared cache.
++			 */
++			if (tmp->flags & SD_PREFER_SIBLING) {
++				if (candidate == cpu) {
++					if (!cpu_rq(prev_cpu)->cfs.nr_running)
++						candidate = prev_cpu;
++				}
++
++				if (candidate == -1 || candidate == cpu) {
++					for_each_cpu(i, sched_domain_span(tmp)) {
++						if (!cpumask_test_cpu(i, &p->cpus_allowed))
++							continue;
++						if (!cpu_rq(i)->cfs.nr_running) {
++							candidate = i;
++							break;
++						}
++					}
++				}
++			}
++
++			if (candidate >= 0) {
++				affine_sd = tmp;
++				want_affine = 0;
++				cpu = candidate;
++			}
+ 		}
+ 
+ 		if (!want_sd && !want_affine)
+@@ -1510,7 +1448,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ 			sd = tmp;
+ 	}
+ 
+-#ifdef CONFIG_FAIR_GROUP_SCHED
+ 	if (sched_feat(LB_SHARES_UPDATE)) {
+ 		/*
+ 		 * Pick the largest domain to update shares over
+@@ -1521,19 +1458,13 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ 				  cpumask_weight(sched_domain_span(sd))))
+ 			tmp = affine_sd;
+ 
+-		if (tmp) {
+-			spin_unlock(&rq->lock);
++		if (tmp)
+ 			update_shares(tmp);
+-			spin_lock(&rq->lock);
+-		}
+ 	}
+-#endif
+ 
+-	if (affine_sd) {
+-		if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
+-			return select_idle_sibling(p, cpu);
+-		else
+-			return select_idle_sibling(p, prev_cpu);
++	if (affine_sd && wake_affine(affine_sd, p, sync)) {
++		new_cpu = cpu;
++		goto out;
+ 	}
+ 
+ 	while (sd) {
+@@ -1575,6 +1506,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ 		/* while loop will break here if sd == NULL */
+ 	}
+ 
++out:
++	rcu_read_unlock();
+ 	return new_cpu;
+ }
+ #endif /* CONFIG_SMP */
+@@ -1978,32 +1911,28 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ }
+ 
+ /*
+- * called on fork with the child task as argument from the parent's context
+- *  - child not yet on the tasklist
+- *  - preemption disabled
++ * Share the fairness runtime between parent and child, thus the
++ * total amount of pressure for CPU stays equal - new tasks
++ * get a chance to run but frequent forkers are not allowed to
++ * monopolize the CPU. Note: the parent runqueue is locked,
++ * the child is not running yet.
+  */
+-static void task_fork_fair(struct task_struct *p)
++static void task_new_fair(struct rq *rq, struct task_struct *p)
+ {
+-	struct cfs_rq *cfs_rq = task_cfs_rq(current);
++	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ 	struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+ 	int this_cpu = smp_processor_id();
+-	struct rq *rq = this_rq();
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&rq->lock, flags);
+-
+-	update_rq_clock(rq);
+ 
+-	if (unlikely(task_cpu(p) != this_cpu))
+-		__set_task_cpu(p, this_cpu);
++	sched_info_queued(p);
+ 
+ 	update_curr(cfs_rq);
+-
+ 	if (curr)
+ 		se->vruntime = curr->vruntime;
+ 	place_entity(cfs_rq, se, 1);
+ 
+-	if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
++	/* 'curr' will be NULL if the child belongs to a different group */
++	if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
++			curr && entity_before(curr, se)) {
+ 		/*
+ 		 * Upon rescheduling, sched_class::put_prev_task() will place
+ 		 * 'current' within the tree based on its new key value.
+@@ -2012,9 +1941,7 @@ static void task_fork_fair(struct task_struct *p)
+ 		resched_task(rq->curr);
+ 	}
+ 
+-	se->vruntime -= cfs_rq->min_vruntime;
+-
+-	spin_unlock_irqrestore(&rq->lock, flags);
++	enqueue_task_fair(rq, p, 0);
+ }
+ 
+ /*
+@@ -2067,27 +1994,30 @@ static void set_curr_task_fair(struct rq *rq)
+ }
+ 
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-static void moved_group_fair(struct task_struct *p, int on_rq)
++static void moved_group_fair(struct task_struct *p)
+ {
+ 	struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ 
+ 	update_curr(cfs_rq);
+-	if (!on_rq)
+-		place_entity(cfs_rq, &p->se, 1);
++	place_entity(cfs_rq, &p->se, 1);
+ }
+ #endif
+ 
+-unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
++unsigned int get_rr_interval_fair(struct task_struct *task)
+ {
+ 	struct sched_entity *se = &task->se;
++	unsigned long flags;
++	struct rq *rq;
+ 	unsigned int rr_interval = 0;
+ 
+ 	/*
+ 	 * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+ 	 * idle runqueue:
+ 	 */
++	rq = task_rq_lock(task, &flags);
+ 	if (rq->cfs.load.weight)
+ 		rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
++	task_rq_unlock(rq, &flags);
+ 
+ 	return rr_interval;
+ }
+@@ -2113,13 +2043,11 @@ static const struct sched_class fair_sched_class = {
+ 	.move_one_task		= move_one_task_fair,
+ 	.rq_online		= rq_online_fair,
+ 	.rq_offline		= rq_offline_fair,
+-
+-	.task_waking		= task_waking_fair,
+ #endif
+ 
+ 	.set_curr_task          = set_curr_task_fair,
+ 	.task_tick		= task_tick_fair,
+-	.task_fork		= task_fork_fair,
++	.task_new		= task_new_fair,
+ 
+ 	.prio_changed		= prio_changed_fair,
+ 	.switched_to		= switched_to_fair,
+diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
+index 93ad2e7..b133a28 100644
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -6,8 +6,7 @@
+  */
+ 
+ #ifdef CONFIG_SMP
+-static int
+-select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
++static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
+ {
+ 	return task_cpu(p); /* IDLE tasks as never migrated */
+ }
+@@ -98,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
+ 		check_preempt_curr(rq, p, 0);
+ }
+ 
+-unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
++unsigned int get_rr_interval_idle(struct task_struct *task)
+ {
+ 	return 0;
+ }
+diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
+index af24fab..a4d790c 100644
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -194,7 +194,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+ 	return rt_se->my_q;
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+ 
+ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+@@ -204,7 +204,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+ 
+ 	if (rt_rq->rt_nr_running) {
+ 		if (rt_se && !on_rt_rq(rt_se))
+-			enqueue_rt_entity(rt_se, false);
++			enqueue_rt_entity(rt_se);
+ 		if (rt_rq->highest_prio.curr < curr->prio)
+ 			resched_task(curr);
+ 	}
+@@ -803,7 +803,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+ 	dec_rt_group(rt_se, rt_rq);
+ }
+ 
+-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
++static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
+ {
+ 	struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+ 	struct rt_prio_array *array = &rt_rq->active;
+@@ -819,10 +819,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ 	if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+ 		return;
+ 
+-	if (head)
+-		list_add(&rt_se->run_list, queue);
+-	else
+-		list_add_tail(&rt_se->run_list, queue);
++	list_add_tail(&rt_se->run_list, queue);
+ 	__set_bit(rt_se_prio(rt_se), array->bitmap);
+ 
+ 	inc_rt_tasks(rt_se, rt_rq);
+@@ -859,11 +856,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
+ 	}
+ }
+ 
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
+ {
+ 	dequeue_rt_stack(rt_se);
+ 	for_each_sched_rt_entity(rt_se)
+-		__enqueue_rt_entity(rt_se, head);
++		__enqueue_rt_entity(rt_se);
+ }
+ 
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+@@ -874,22 +871,21 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+ 		struct rt_rq *rt_rq = group_rt_rq(rt_se);
+ 
+ 		if (rt_rq && rt_rq->rt_nr_running)
+-			__enqueue_rt_entity(rt_se, false);
++			__enqueue_rt_entity(rt_se);
+ 	}
+ }
+ 
+ /*
+  * Adding/removing a task to/from a priority array:
+  */
+-static void
+-enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
++static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+ {
+ 	struct sched_rt_entity *rt_se = &p->rt;
+ 
+ 	if (wakeup)
+ 		rt_se->timeout = 0;
+ 
+-	enqueue_rt_entity(rt_se, head);
++	enqueue_rt_entity(rt_se);
+ 
+ 	if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+ 		enqueue_pushable_task(rq, p);
+@@ -942,9 +938,10 @@ static void yield_task_rt(struct rq *rq)
+ #ifdef CONFIG_SMP
+ static int find_lowest_rq(struct task_struct *task);
+ 
+-static int
+-select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
++static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
+ {
++	struct rq *rq = task_rq(p);
++
+ 	if (sd_flag != SD_BALANCE_WAKE)
+ 		return smp_processor_id();
+ 
+@@ -1488,7 +1485,7 @@ static void post_schedule_rt(struct rq *rq)
+  * If we are not running and we are not going to reschedule soon, we should
+  * try to push tasks away now
+  */
+-static void task_woken_rt(struct rq *rq, struct task_struct *p)
++static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+ {
+ 	if (!task_running(rq, p) &&
+ 	    !test_tsk_need_resched(rq->curr) &&
+@@ -1737,7 +1734,7 @@ static void set_curr_task_rt(struct rq *rq)
+ 	dequeue_pushable_task(rq, p);
+ }
+ 
+-unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
++unsigned int get_rr_interval_rt(struct task_struct *task)
+ {
+ 	/*
+ 	 * Time slice is 0 for SCHED_FIFO tasks
+@@ -1769,7 +1766,7 @@ static const struct sched_class rt_sched_class = {
+ 	.rq_offline             = rq_offline_rt,
+ 	.pre_schedule		= pre_schedule_rt,
+ 	.post_schedule		= post_schedule_rt,
+-	.task_woken		= task_woken_rt,
++	.task_wake_up		= task_wake_up_rt,
+ 	.switched_from		= switched_from_rt,
+ #endif
+ 

Added: dists/sid/linux-2.6/debian/patches/debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch	Mon Sep 20 23:25:04 2010	(r16338)
@@ -0,0 +1,88 @@
+From 960bb81153ec66609799b8f1c072b9266629e765 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Mon, 20 Sep 2010 23:07:08 +0100
+Subject: [PATCH] Revert "sched: Pre-compute cpumask_weight(sched_domain_span(sd))"
+
+This reverts commit 6efd9bbce0d4b02d295f28054caa74e6edf811b7
+which is an ABI breaker.
+---
+ include/linux/sched.h |    1 -
+ kernel/sched.c        |    7 ++-----
+ kernel/sched_fair.c   |    8 +++++---
+ 3 files changed, 7 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 2246de3..682d6d4 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1003,7 +1003,6 @@ struct sched_domain {
+ 	char *name;
+ #endif
+ 
+-	unsigned int span_weight;
+ 	/*
+ 	 * Span of all CPUs in this domain.
+ 	 *
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 152214d..98d4048 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -3678,7 +3678,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+ 
+ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+ {
+-	unsigned long weight = sd->span_weight;
++	unsigned long weight = cpumask_weight(sched_domain_span(sd));
+ 	unsigned long smt_gain = sd->smt_gain;
+ 
+ 	smt_gain /= weight;
+@@ -3711,7 +3711,7 @@ unsigned long scale_rt_power(int cpu)
+ 
+ static void update_cpu_power(struct sched_domain *sd, int cpu)
+ {
+-	unsigned long weight = sd->span_weight;
++	unsigned long weight = cpumask_weight(sched_domain_span(sd));
+ 	unsigned long power = SCHED_LOAD_SCALE;
+ 	struct sched_group *sdg = sd->groups;
+ 
+@@ -8166,9 +8166,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
+ 	struct rq *rq = cpu_rq(cpu);
+ 	struct sched_domain *tmp;
+ 
+-	for (tmp = sd; tmp; tmp = tmp->parent)
+-		tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
+-
+ 	/* Remove the sched domains which do not contribute to scheduling. */
+ 	for (tmp = sd; tmp; ) {
+ 		struct sched_domain *parent = tmp->parent;
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index 01e311e..623b876 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1516,7 +1516,9 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ 		 * Pick the largest domain to update shares over
+ 		 */
+ 		tmp = sd;
+-		if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
++		if (affine_sd && (!tmp ||
++				  cpumask_weight(sched_domain_span(affine_sd)) >
++				  cpumask_weight(sched_domain_span(sd))))
+ 			tmp = affine_sd;
+ 
+ 		if (tmp) {
+@@ -1562,10 +1564,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ 
+ 		/* Now try balancing at a lower domain level of new_cpu */
+ 		cpu = new_cpu;
+-		weight = sd->span_weight;
++		weight = cpumask_weight(sched_domain_span(sd));
+ 		sd = NULL;
+ 		for_each_domain(cpu, tmp) {
+-			if (weight <= tmp->span_weight)
++			if (weight <= cpumask_weight(sched_domain_span(tmp)))
+ 				break;
+ 			if (tmp->flags & sd_flag)
+ 				sd = tmp;
+-- 
+1.7.1
+

Added: dists/sid/linux-2.6/debian/patches/debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch
==============================================================================
--- /dev/null	00:00:00 1970	(empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch	Mon Sep 20 23:25:04 2010	(r16338)
@@ -0,0 +1,75 @@
+From 24b6233b4e4bdb7b45dc28e399595a79874e3ec0 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Mon, 20 Sep 2010 23:20:13 +0100
+Subject: [PATCH] sched: Avoid ABI change due to sched_class changes
+
+struct sched_class is private to the scheduler, but since it is
+defined in <linux/sched.h> it affects the symbol version of many
+exported symbols.  Hide the changes from genksyms since it should not
+consider sched_class as part of the exported ABI.
+---
+ include/linux/sched.h |   41 +++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 41 insertions(+), 0 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 682d6d4..1184379 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1071,6 +1071,7 @@ struct sched_domain;
+ #define WF_SYNC		0x01		/* waker goes to sleep after wakup */
+ #define WF_FORK		0x02		/* child wakeup after fork */
+ 
++#ifndef __GENKSYMS__
+ struct sched_class {
+ 	const struct sched_class *next;
+ 
+@@ -1126,6 +1127,46 @@ struct sched_class {
+ 	void (*moved_group) (struct task_struct *p, int on_rq);
+ #endif
+ };
++#else /* __GENKSYMS__ */
++/*
++ * struct sched_class is private to the scheduler, but since it is
++ * defined here it affects the symbol version of many exported symbols.
++ * This is a fake definition purely to keep symbol versions stable.
++ */
++struct sched_class {
++	const struct sched_class *next;
++	void (*enqueue_task) (struct rq *, struct task_struct *, int);
++	void (*dequeue_task) (struct rq *, struct task_struct *, int);
++	void (*yield_task) (struct rq *);
++	void (*check_preempt_curr) (struct rq *, struct task_struct *, int);
++	struct task_struct * (*pick_next_task) (struct rq *);
++	void (*put_prev_task) (struct rq *, struct task_struct *);
++#ifdef CONFIG_SMP
++	int  (*select_task_rq)(struct task_struct *, int, int);
++	unsigned long (*load_balance) (struct rq *, int, struct rq *,
++				       unsigned long, struct sched_domain *,
++				       enum cpu_idle_type, int *, int *);
++	int (*move_one_task) (struct rq *, int, struct rq *,
++			      struct sched_domain *, enum cpu_idle_type);
++	void (*pre_schedule) (struct rq *, struct task_struct *);
++	void (*post_schedule) (struct rq *);
++	void (*task_wake_up) (struct rq *, struct task_struct *);
++	void (*set_cpus_allowed)(struct task_struct *, const struct cpumask *);
++	void (*rq_online)(struct rq *);
++	void (*rq_offline)(struct rq *);
++#endif
++	void (*set_curr_task) (struct rq *);
++	void (*task_tick) (struct rq *, struct task_struct *, int);
++	void (*task_new) (struct rq *, struct task_struct *);
++	void (*switched_from) (struct rq *, struct task_struct *, int);
++	void (*switched_to) (struct rq *, struct task_struct *, int);
++	void (*prio_changed) (struct rq *, struct task_struct *, int, int);
++	unsigned int (*get_rr_interval) (struct task_struct *);
++#ifdef CONFIG_FAIR_GROUP_SCHED
++	void (*moved_group) (struct task_struct *);
++#endif
++};
++#endif /* __GENKSYMS__ */
+ 
+ struct load_weight {
+ 	unsigned long weight, inv_weight;
+-- 
+1.7.1
+

Modified: dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch	Mon Sep 20 20:58:50 2010	(r16337)
+++ dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch	Mon Sep 20 23:25:04 2010	(r16338)
@@ -1,4 +1,4 @@
-bwh: Adjust context in fs/ext4/ext4.h, fs/btrfs/super.h
+bwh: Adjust context in fs/ext4/ext4.h, fs/btrfs/super.h, kernel/sched.c
 
 diff -NurpP --minimal linux-2.6.32.1/arch/alpha/Kconfig linux-2.6.32.1-vs2.3.0.36.27/arch/alpha/Kconfig
 --- linux-2.6.32.1/arch/alpha/Kconfig	2009-12-03 20:01:49.000000000 +0100
@@ -15795,8 +15795,8 @@
  
  		rq_weight += weight;
 @@ -1811,6 +1841,175 @@ static void cfs_rq_set_shares(struct cfs
- 
  static void calc_load_account_active(struct rq *this_rq);
+ static void update_sysctl(void);
  
 +
 +#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_FAIR_GROUP_SCHED)

Modified: dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch	Mon Sep 20 20:58:50 2010	(r16337)
+++ dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch	Mon Sep 20 23:25:04 2010	(r16338)
@@ -15,6 +15,8 @@
 
 $ git diff debian-base..debian-pvops
 
+[bwh: Fix context in drivers/xen/events.c]
+
 diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
 index 9ec8558..3e30e60 100644
 --- a/Documentation/kernel-parameters.txt
@@ -15123,9 +15125,9 @@
  {
  	return cpu_evtchn_mask_p[cpu].bits;
 @@ -110,6 +126,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
- #define VALID_EVTCHN(chn)	((chn) != 0)
  
  static struct irq_chip xen_dynamic_chip;
+ static struct irq_chip xen_percpu_chip;
 +static struct irq_chip xen_pirq_chip;
  
  /* Constructor for packed IRQ information. */

Modified: dists/sid/linux-2.6/debian/patches/series/24
==============================================================================
--- dists/sid/linux-2.6/debian/patches/series/24	Mon Sep 20 20:58:50 2010	(r16337)
+++ dists/sid/linux-2.6/debian/patches/series/24	Mon Sep 20 23:25:04 2010	(r16338)
@@ -3,3 +3,34 @@
 + bugfix/all/SCSI-scsi_dh_emc-request-flag-cleanup.patch
 + features/arm/mach-types-update-2010-09-09.patch
 + bugfix/all/sound-use-semicolons-to-end-statements.patch
+- bugfix/x86/compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch
+- bugfix/all/wireless-extensions-fix-kernel-heap-content-leak.patch
+- bugfix/all/compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch
+- bugfix/x86/compat-test-rax-for-the-syscall-number-not-eax.patch
+- bugfix/all/tun-Dont-add-sysfs-attributes-to-devices-without-sysfs-dirs.patch
+- features/all/xen/pvhvm/0017-xen-pvhvm-make-it-clearer-that-XEN_UNPLUG_-define.patch
+- features/all/xen/pvhvm/0016-xen-pvhvm-rename-xen_emul_unplug-ignore-to-unnnec.patch
+- features/all/xen/pvhvm/0015-xen-pvhvm-allow-user-to-request-no-emulated-device.patch
+- features/all/xen/pvhvm/0013-Introduce-CONFIG_XEN_PVHVM-compile-option.patch
+- features/all/xen/pvhvm/0012-blkfront-do-not-create-a-PV-cdrom-device-if-xen_hvm.patch
+- features/all/xen/pvhvm/0009-x86-Call-HVMOP_pagetable_dying-on-exit_mmap.patch
+- features/all/xen/pvhvm/0008-x86-Unplug-emulated-disks-and-nics.patch
+- features/all/xen/pvhvm/0007-x86-Use-xen_vcpuop_clockevent-xen_clocksource-and.patch
+- features/all/xen/pvhvm/0005-xen-Add-suspend-resume-support-for-PV-on-HVM-guests.patch
+- features/all/xen/pvhvm/0004-xen-Xen-PCI-platform-device-driver.patch
+- features/all/xen/pvhvm/0003-x86-xen-event-channels-delivery-on-HVM.patch
+- debian/sched-fix-conflict-between-2.6.32.7-and-vserver.patch
++ bugfix/all/stable/2.6.32.22.patch
++ features/all/xen/pvhvm/0003-x86-xen-event-channels-delivery-on-HVM.patch
++ features/all/xen/pvhvm/0004-xen-Xen-PCI-platform-device-driver.patch
++ features/all/xen/pvhvm/0005-xen-Add-suspend-resume-support-for-PV-on-HVM-guests.patch
++ features/all/xen/pvhvm/0007-x86-Use-xen_vcpuop_clockevent-xen_clocksource-and.patch
++ features/all/xen/pvhvm/0008-x86-Unplug-emulated-disks-and-nics.patch
++ features/all/xen/pvhvm/0009-x86-Call-HVMOP_pagetable_dying-on-exit_mmap.patch
++ features/all/xen/pvhvm/0012-blkfront-do-not-create-a-PV-cdrom-device-if-xen_hvm.patch
++ features/all/xen/pvhvm/0013-Introduce-CONFIG_XEN_PVHVM-compile-option.patch
++ features/all/xen/pvhvm/0015-xen-pvhvm-allow-user-to-request-no-emulated-device.patch
++ features/all/xen/pvhvm/0016-xen-pvhvm-rename-xen_emul_unplug-ignore-to-unnnec.patch
++ features/all/xen/pvhvm/0017-xen-pvhvm-make-it-clearer-that-XEN_UNPLUG_-define.patch
++ debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch
++ debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch

Copied and modified: dists/sid/linux-2.6/debian/patches/series/24-extra (from r16335, dists/sid/linux-2.6/debian/patches/series/23-extra)
==============================================================================
--- dists/sid/linux-2.6/debian/patches/series/23-extra	Sun Sep 19 02:12:56 2010	(r16335, copy source)
+++ dists/sid/linux-2.6/debian/patches/series/24-extra	Mon Sep 20 23:25:04 2010	(r16338)
@@ -1,8 +1,10 @@
++ debian/revert-sched-2.6.32.22-changes.patch featureset=openvz
 + features/all/openvz/openvz.patch featureset=openvz
 + features/all/openvz/revert-cgroup-lite-add-cgroup-id-for-blk-cgroups.patch featureset=openvz
 + features/all/openvz/partially-revert-CPT-Replace-legacy-net-statistics.patch featureset=openvz
 + features/all/openvz/cfq-iosched-do-not-force-idling-for-sync-workload.patch featureset=openvz
 
++ debian/revert-sched-2.6.32.22-changes.patch featureset=vserver
 + features/all/vserver/vs2.3.0.36.27.patch featureset=vserver
 + features/all/vserver/s390-buildfix.patch featureset=vserver
 + features/all/vserver/ia64-buildfix.patch featureset=vserver