[kernel] r16338 - in dists/sid/linux-2.6/debian: . patches/bugfix/all/stable patches/debian patches/features/all/vserver patches/features/all/xen patches/series
Ben Hutchings
benh at alioth.debian.org
Mon Sep 20 23:25:09 UTC 2010
Author: benh
Date: Mon Sep 20 23:25:04 2010
New Revision: 16338
Log:
Add stable 2.6.32.22
Revert one ABI-breaking change in sched.
Hide sched_class changes from genksyms; this structure is really private.
Revert all remaining sched changes for OpenVZ and VServer.
Update context for Xen-pvops.
Added:
dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.22.patch
dists/sid/linux-2.6/debian/patches/debian/revert-sched-2.6.32.22-changes.patch
dists/sid/linux-2.6/debian/patches/debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch
dists/sid/linux-2.6/debian/patches/debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch
dists/sid/linux-2.6/debian/patches/series/24-extra
- copied, changed from r16335, dists/sid/linux-2.6/debian/patches/series/23-extra
Deleted:
dists/sid/linux-2.6/debian/patches/series/23-extra
Modified:
dists/sid/linux-2.6/debian/changelog
dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch
dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
dists/sid/linux-2.6/debian/patches/series/24
Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog Mon Sep 20 20:58:50 2010 (r16337)
+++ dists/sid/linux-2.6/debian/changelog Mon Sep 20 23:25:04 2010 (r16338)
@@ -5,6 +5,9 @@
* scsi_dh_emc: Fix mode select request setup (Closes: #591540)
* snd-hda-codec-via: Fix syntax error when CONFIG_SND_HDA_POWER_SAVE is
disabled (Closes: #597043)
+ * Add stable 2.6.32.22:
+ - [openvz,vserver] Revert sched changes since they conflict with
+ these featuresets
[ Martin Michlmayr ]
* ARM: update mach types.
Added: dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.22.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.22.patch Mon Sep 20 23:25:04 2010 (r16338)
@@ -0,0 +1,4236 @@
+diff --git a/Makefile b/Makefile
+index 3e7196f..1786938 100644
+diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S
+index 2c1db77..a6c66f5 100644
+--- a/arch/arm/kernel/entry-common.S
++++ b/arch/arm/kernel/entry-common.S
+@@ -382,11 +382,13 @@ ENDPROC(sys_clone_wrapper)
+
+ sys_sigreturn_wrapper:
+ add r0, sp, #S_OFF
++ mov why, #0 @ prevent syscall restart handling
+ b sys_sigreturn
+ ENDPROC(sys_sigreturn_wrapper)
+
+ sys_rt_sigreturn_wrapper:
+ add r0, sp, #S_OFF
++ mov why, #0 @ prevent syscall restart handling
+ b sys_rt_sigreturn
+ ENDPROC(sys_rt_sigreturn_wrapper)
+
+diff --git a/arch/ia64/include/asm/compat.h b/arch/ia64/include/asm/compat.h
+index dfcf75b..c8662cd 100644
+--- a/arch/ia64/include/asm/compat.h
++++ b/arch/ia64/include/asm/compat.h
+@@ -198,7 +198,7 @@ ptr_to_compat(void __user *uptr)
+ }
+
+ static __inline__ void __user *
+-compat_alloc_user_space (long len)
++arch_compat_alloc_user_space (long len)
+ {
+ struct pt_regs *regs = task_pt_regs(current);
+ return (void __user *) (((regs->r12 & 0xffffffff) & -16) - len);
+diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
+index 6c89228..4a746ea 100644
+--- a/arch/ia64/kernel/msi_ia64.c
++++ b/arch/ia64/kernel/msi_ia64.c
+@@ -25,7 +25,7 @@ static int ia64_set_msi_irq_affinity(unsigned int irq,
+ if (irq_prepare_move(irq, cpu))
+ return -1;
+
+- read_msi_msg(irq, &msg);
++ get_cached_msi_msg(irq, &msg);
+
+ addr = msg.address_lo;
+ addr &= MSI_ADDR_DEST_ID_MASK;
+diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
+index fbbfb97..9ab2617 100644
+--- a/arch/ia64/sn/kernel/msi_sn.c
++++ b/arch/ia64/sn/kernel/msi_sn.c
+@@ -174,7 +174,7 @@ static int sn_set_msi_irq_affinity(unsigned int irq,
+ * Release XIO resources for the old MSI PCI address
+ */
+
+- read_msi_msg(irq, &msg);
++ get_cached_msi_msg(irq, &msg);
+ sn_pdev = (struct pcidev_info *)sn_irq_info->irq_pciioinfo;
+ pdev = sn_pdev->pdi_linux_pcidev;
+ provider = SN_PCIDEV_BUSPROVIDER(pdev);
+diff --git a/arch/mips/include/asm/compat.h b/arch/mips/include/asm/compat.h
+index f58aed3..27505bd 100644
+--- a/arch/mips/include/asm/compat.h
++++ b/arch/mips/include/asm/compat.h
+@@ -144,7 +144,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ return (u32)(unsigned long)uptr;
+ }
+
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ struct pt_regs *regs = (struct pt_regs *)
+ ((unsigned long) current_thread_info() + THREAD_SIZE - 32) - 1;
+diff --git a/arch/parisc/include/asm/compat.h b/arch/parisc/include/asm/compat.h
+index 7f32611..7c77fa9 100644
+--- a/arch/parisc/include/asm/compat.h
++++ b/arch/parisc/include/asm/compat.h
+@@ -146,7 +146,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ return (u32)(unsigned long)uptr;
+ }
+
+-static __inline__ void __user *compat_alloc_user_space(long len)
++static __inline__ void __user *arch_compat_alloc_user_space(long len)
+ {
+ struct pt_regs *regs = ¤t->thread.regs;
+ return (void __user *)regs->gr[30];
+diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h
+index 4774c2f..8d0fff3 100644
+--- a/arch/powerpc/include/asm/compat.h
++++ b/arch/powerpc/include/asm/compat.h
+@@ -133,7 +133,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ return (u32)(unsigned long)uptr;
+ }
+
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ struct pt_regs *regs = current->thread.regs;
+ unsigned long usp = regs->gpr[1];
+diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h
+index 01a0802..0c940d3 100644
+--- a/arch/s390/include/asm/compat.h
++++ b/arch/s390/include/asm/compat.h
+@@ -180,7 +180,7 @@ static inline int is_compat_task(void)
+
+ #endif
+
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ unsigned long stack;
+
+diff --git a/arch/sparc/include/asm/compat.h b/arch/sparc/include/asm/compat.h
+index 0e70625..612bb38 100644
+--- a/arch/sparc/include/asm/compat.h
++++ b/arch/sparc/include/asm/compat.h
+@@ -166,7 +166,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ return (u32)(unsigned long)uptr;
+ }
+
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ struct pt_regs *regs = current_thread_info()->kregs;
+ unsigned long usp = regs->u_regs[UREG_I6];
+diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
+index 5294d84..4edd8eb 100644
+--- a/arch/x86/ia32/ia32entry.S
++++ b/arch/x86/ia32/ia32entry.S
+@@ -50,7 +50,12 @@
+ /*
+ * Reload arg registers from stack in case ptrace changed them.
+ * We don't reload %eax because syscall_trace_enter() returned
+- * the value it wants us to use in the table lookup.
++ * the %rax value we should see. Instead, we just truncate that
++ * value to 32 bits again as we did on entry from user mode.
++ * If it's a new value set by user_regset during entry tracing,
++ * this matches the normal truncation of the user-mode value.
++ * If it's -1 to make us punt the syscall, then (u32)-1 is still
++ * an appropriately invalid value.
+ */
+ .macro LOAD_ARGS32 offset, _r9=0
+ .if \_r9
+@@ -60,6 +65,7 @@
+ movl \offset+48(%rsp),%edx
+ movl \offset+56(%rsp),%esi
+ movl \offset+64(%rsp),%edi
++ movl %eax,%eax /* zero extension */
+ .endm
+
+ .macro CFI_STARTPROC32 simple
+@@ -153,7 +159,7 @@ ENTRY(ia32_sysenter_target)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ CFI_REMEMBER_STATE
+ jnz sysenter_tracesys
+- cmpl $(IA32_NR_syscalls-1),%eax
++ cmpq $(IA32_NR_syscalls-1),%rax
+ ja ia32_badsys
+ sysenter_do_call:
+ IA32_ARG_FIXUP
+@@ -195,7 +201,7 @@ sysexit_from_sys_call:
+ movl $AUDIT_ARCH_I386,%edi /* 1st arg: audit arch */
+ call audit_syscall_entry
+ movl RAX-ARGOFFSET(%rsp),%eax /* reload syscall number */
+- cmpl $(IA32_NR_syscalls-1),%eax
++ cmpq $(IA32_NR_syscalls-1),%rax
+ ja ia32_badsys
+ movl %ebx,%edi /* reload 1st syscall arg */
+ movl RCX-ARGOFFSET(%rsp),%esi /* reload 2nd syscall arg */
+@@ -248,7 +254,7 @@ sysenter_tracesys:
+ call syscall_trace_enter
+ LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+- cmpl $(IA32_NR_syscalls-1),%eax
++ cmpq $(IA32_NR_syscalls-1),%rax
+ ja int_ret_from_sys_call /* sysenter_tracesys has set RAX(%rsp) */
+ jmp sysenter_do_call
+ CFI_ENDPROC
+@@ -314,7 +320,7 @@ ENTRY(ia32_cstar_target)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ CFI_REMEMBER_STATE
+ jnz cstar_tracesys
+- cmpl $IA32_NR_syscalls-1,%eax
++ cmpq $IA32_NR_syscalls-1,%rax
+ ja ia32_badsys
+ cstar_do_call:
+ IA32_ARG_FIXUP 1
+@@ -367,7 +373,7 @@ cstar_tracesys:
+ LOAD_ARGS32 ARGOFFSET, 1 /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+ xchgl %ebp,%r9d
+- cmpl $(IA32_NR_syscalls-1),%eax
++ cmpq $(IA32_NR_syscalls-1),%rax
+ ja int_ret_from_sys_call /* cstar_tracesys has set RAX(%rsp) */
+ jmp cstar_do_call
+ END(ia32_cstar_target)
+@@ -425,7 +431,7 @@ ENTRY(ia32_syscall)
+ orl $TS_COMPAT,TI_status(%r10)
+ testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10)
+ jnz ia32_tracesys
+- cmpl $(IA32_NR_syscalls-1),%eax
++ cmpq $(IA32_NR_syscalls-1),%rax
+ ja ia32_badsys
+ ia32_do_call:
+ IA32_ARG_FIXUP
+@@ -444,7 +450,7 @@ ia32_tracesys:
+ call syscall_trace_enter
+ LOAD_ARGS32 ARGOFFSET /* reload args from stack in case ptrace changed it */
+ RESTORE_REST
+- cmpl $(IA32_NR_syscalls-1),%eax
++ cmpq $(IA32_NR_syscalls-1),%rax
+ ja int_ret_from_sys_call /* ia32_tracesys has set RAX(%rsp) */
+ jmp ia32_do_call
+ END(ia32_syscall)
+diff --git a/arch/x86/include/asm/compat.h b/arch/x86/include/asm/compat.h
+index 9a9c7bd..c8c9a74 100644
+--- a/arch/x86/include/asm/compat.h
++++ b/arch/x86/include/asm/compat.h
+@@ -204,7 +204,7 @@ static inline compat_uptr_t ptr_to_compat(void __user *uptr)
+ return (u32)(unsigned long)uptr;
+ }
+
+-static inline void __user *compat_alloc_user_space(long len)
++static inline void __user *arch_compat_alloc_user_space(long len)
+ {
+ struct pt_regs *regs = task_pt_regs(current);
+ return (void __user *)regs->sp - len;
+diff --git a/arch/x86/include/asm/tsc.h b/arch/x86/include/asm/tsc.h
+index c042729..1ca132f 100644
+--- a/arch/x86/include/asm/tsc.h
++++ b/arch/x86/include/asm/tsc.h
+@@ -59,5 +59,7 @@ extern void check_tsc_sync_source(int cpu);
+ extern void check_tsc_sync_target(void);
+
+ extern int notsc_setup(char *);
++extern void save_sched_clock_state(void);
++extern void restore_sched_clock_state(void);
+
+ #endif /* _ASM_X86_TSC_H */
+diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
+index 1acd1c4..0da6495 100644
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -3338,7 +3338,7 @@ static int set_msi_irq_affinity(unsigned int irq, const struct cpumask *mask)
+
+ cfg = desc->chip_data;
+
+- read_msi_msg_desc(desc, &msg);
++ get_cached_msi_msg_desc(desc, &msg);
+
+ msg.data &= ~MSI_DATA_VECTOR_MASK;
+ msg.data |= MSI_DATA_VECTOR(cfg->vector);
+diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c
+index 597683a..aaefa71 100644
+--- a/arch/x86/kernel/tsc.c
++++ b/arch/x86/kernel/tsc.c
+@@ -626,6 +626,44 @@ static void set_cyc2ns_scale(unsigned long cpu_khz, int cpu)
+ local_irq_restore(flags);
+ }
+
++static unsigned long long cyc2ns_suspend;
++
++void save_sched_clock_state(void)
++{
++ if (!sched_clock_stable)
++ return;
++
++ cyc2ns_suspend = sched_clock();
++}
++
++/*
++ * Even on processors with invariant TSC, TSC gets reset in some the
++ * ACPI system sleep states. And in some systems BIOS seem to reinit TSC to
++ * arbitrary value (still sync'd across cpu's) during resume from such sleep
++ * states. To cope up with this, recompute the cyc2ns_offset for each cpu so
++ * that sched_clock() continues from the point where it was left off during
++ * suspend.
++ */
++void restore_sched_clock_state(void)
++{
++ unsigned long long offset;
++ unsigned long flags;
++ int cpu;
++
++ if (!sched_clock_stable)
++ return;
++
++ local_irq_save(flags);
++
++ __get_cpu_var(cyc2ns_offset) = 0;
++ offset = cyc2ns_suspend - sched_clock();
++
++ for_each_possible_cpu(cpu)
++ per_cpu(cyc2ns_offset, cpu) = offset;
++
++ local_irq_restore(flags);
++}
++
+ #ifdef CONFIG_CPU_FREQ
+
+ /* Frequency scaling support. Adjust the TSC based timer when the cpu frequency
+diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c
+index c41ad50..3130a4b 100644
+--- a/arch/x86/oprofile/nmi_int.c
++++ b/arch/x86/oprofile/nmi_int.c
+@@ -518,8 +518,13 @@ static int __init init_sysfs(void)
+ int error;
+
+ error = sysdev_class_register(&oprofile_sysclass);
+- if (!error)
+- error = sysdev_register(&device_oprofile);
++ if (error)
++ return error;
++
++ error = sysdev_register(&device_oprofile);
++ if (error)
++ sysdev_class_unregister(&oprofile_sysclass);
++
+ return error;
+ }
+
+@@ -530,8 +535,10 @@ static void exit_sysfs(void)
+ }
+
+ #else
+-#define init_sysfs() do { } while (0)
+-#define exit_sysfs() do { } while (0)
++
++static inline int init_sysfs(void) { return 0; }
++static inline void exit_sysfs(void) { }
++
+ #endif /* CONFIG_PM */
+
+ static int __init p4_init(char **cpu_type)
+@@ -645,6 +652,8 @@ int __init op_nmi_init(struct oprofile_operations *ops)
+ char *cpu_type = NULL;
+ int ret = 0;
+
++ using_nmi = 0;
++
+ if (!cpu_has_apic)
+ return -ENODEV;
+
+@@ -727,7 +736,10 @@ int __init op_nmi_init(struct oprofile_operations *ops)
+
+ mux_init(ops);
+
+- init_sysfs();
++ ret = init_sysfs();
++ if (ret)
++ return ret;
++
+ using_nmi = 1;
+ printk(KERN_INFO "oprofile: using NMI interrupt.\n");
+ return 0;
+diff --git a/arch/x86/power/cpu.c b/arch/x86/power/cpu.c
+index eeeb522..fa0f651 100644
+--- a/arch/x86/power/cpu.c
++++ b/arch/x86/power/cpu.c
+@@ -112,6 +112,7 @@ static void __save_processor_state(struct saved_context *ctxt)
+ void save_processor_state(void)
+ {
+ __save_processor_state(&saved_context);
++ save_sched_clock_state();
+ }
+ #ifdef CONFIG_X86_32
+ EXPORT_SYMBOL(save_processor_state);
+@@ -253,6 +254,7 @@ static void __restore_processor_state(struct saved_context *ctxt)
+ void restore_processor_state(void)
+ {
+ __restore_processor_state(&saved_context);
++ restore_sched_clock_state();
+ }
+ #ifdef CONFIG_X86_32
+ EXPORT_SYMBOL(restore_processor_state);
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index 6a96da6..0963cd6 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -5504,6 +5504,7 @@ static int ata_host_request_pm(struct ata_host *host, pm_message_t mesg,
+ */
+ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
+ {
++ unsigned int ehi_flags = ATA_EHI_QUIET;
+ int rc;
+
+ /*
+@@ -5512,7 +5513,18 @@ int ata_host_suspend(struct ata_host *host, pm_message_t mesg)
+ */
+ ata_lpm_enable(host);
+
+- rc = ata_host_request_pm(host, mesg, 0, ATA_EHI_QUIET, 1);
++ /*
++ * On some hardware, device fails to respond after spun down
++ * for suspend. As the device won't be used before being
++ * resumed, we don't need to touch the device. Ask EH to skip
++ * the usual stuff and proceed directly to suspend.
++ *
++ * http://thread.gmane.org/gmane.linux.ide/46764
++ */
++ if (mesg.event == PM_EVENT_SUSPEND)
++ ehi_flags |= ATA_EHI_NO_AUTOPSY | ATA_EHI_NO_RECOVERY;
++
++ rc = ata_host_request_pm(host, mesg, 0, ehi_flags, 1);
+ if (rc == 0)
+ host->dev->power.power_state = mesg;
+ return rc;
+diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
+index e30b9e7..fa9bed0 100644
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -3149,6 +3149,10 @@ static int ata_eh_skip_recovery(struct ata_link *link)
+ if (link->flags & ATA_LFLAG_DISABLED)
+ return 1;
+
++ /* skip if explicitly requested */
++ if (ehc->i.flags & ATA_EHI_NO_RECOVERY)
++ return 1;
++
+ /* thaw frozen port and recover failed devices */
+ if ((ap->pflags & ATA_PFLAG_FROZEN) || ata_link_nr_enabled(link))
+ return 0;
+diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
+index 6f5093b..cf41126 100644
+--- a/drivers/ata/sata_mv.c
++++ b/drivers/ata/sata_mv.c
+@@ -1879,19 +1879,25 @@ static void mv_bmdma_start(struct ata_queued_cmd *qc)
+ * LOCKING:
+ * Inherited from caller.
+ */
+-static void mv_bmdma_stop(struct ata_queued_cmd *qc)
++static void mv_bmdma_stop_ap(struct ata_port *ap)
+ {
+- struct ata_port *ap = qc->ap;
+ void __iomem *port_mmio = mv_ap_base(ap);
+ u32 cmd;
+
+ /* clear start/stop bit */
+ cmd = readl(port_mmio + BMDMA_CMD);
+- cmd &= ~ATA_DMA_START;
+- writelfl(cmd, port_mmio + BMDMA_CMD);
++ if (cmd & ATA_DMA_START) {
++ cmd &= ~ATA_DMA_START;
++ writelfl(cmd, port_mmio + BMDMA_CMD);
++
++ /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
++ ata_sff_dma_pause(ap);
++ }
++}
+
+- /* one-PIO-cycle guaranteed wait, per spec, for HDMA1:0 transition */
+- ata_sff_dma_pause(ap);
++static void mv_bmdma_stop(struct ata_queued_cmd *qc)
++{
++ mv_bmdma_stop_ap(qc->ap);
+ }
+
+ /**
+@@ -1915,8 +1921,21 @@ static u8 mv_bmdma_status(struct ata_port *ap)
+ reg = readl(port_mmio + BMDMA_STATUS);
+ if (reg & ATA_DMA_ACTIVE)
+ status = ATA_DMA_ACTIVE;
+- else
++ else if (reg & ATA_DMA_ERR)
+ status = (reg & ATA_DMA_ERR) | ATA_DMA_INTR;
++ else {
++ /*
++ * Just because DMA_ACTIVE is 0 (DMA completed),
++ * this does _not_ mean the device is "done".
++ * So we should not yet be signalling ATA_DMA_INTR
++ * in some cases. Eg. DSM/TRIM, and perhaps others.
++ */
++ mv_bmdma_stop_ap(ap);
++ if (ioread8(ap->ioaddr.altstatus_addr) & ATA_BUSY)
++ status = 0;
++ else
++ status = ATA_DMA_INTR;
++ }
+ return status;
+ }
+
+@@ -1976,6 +1995,9 @@ static void mv_qc_prep(struct ata_queued_cmd *qc)
+
+ switch (tf->protocol) {
+ case ATA_PROT_DMA:
++ if (tf->command == ATA_CMD_DSM)
++ return;
++ /* fall-thru */
+ case ATA_PROT_NCQ:
+ break; /* continue below */
+ case ATA_PROT_PIO:
+@@ -2075,6 +2097,8 @@ static void mv_qc_prep_iie(struct ata_queued_cmd *qc)
+ if ((tf->protocol != ATA_PROT_DMA) &&
+ (tf->protocol != ATA_PROT_NCQ))
+ return;
++ if (tf->command == ATA_CMD_DSM)
++ return; /* use bmdma for this */
+
+ /* Fill in Gen IIE command request block */
+ if (!(tf->flags & ATA_TFLAG_WRITE))
+@@ -2270,6 +2294,12 @@ static unsigned int mv_qc_issue(struct ata_queued_cmd *qc)
+
+ switch (qc->tf.protocol) {
+ case ATA_PROT_DMA:
++ if (qc->tf.command == ATA_CMD_DSM) {
++ if (!ap->ops->bmdma_setup) /* no bmdma on GEN_I */
++ return AC_ERR_OTHER;
++ break; /* use bmdma for this */
++ }
++ /* fall thru */
+ case ATA_PROT_NCQ:
+ mv_start_edma(ap, port_mmio, pp, qc->tf.protocol);
+ pp->req_idx = (pp->req_idx + 1) & MV_MAX_Q_DEPTH_MASK;
+diff --git a/drivers/gpu/drm/drm_crtc_helper.c b/drivers/gpu/drm/drm_crtc_helper.c
+index 08173fc..1b8745d 100644
+diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
+index 2680db7..c3aca5c 100644
+diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
+index 176a6df..3ada62b 100644
+diff --git a/drivers/gpu/drm/i915/intel_display.c b/drivers/gpu/drm/i915/intel_display.c
+index 4f5c733..79cc437 100644
+diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
+index 1ca6574..e9add5b 100644
+--- a/drivers/hid/usbhid/hid-core.c
++++ b/drivers/hid/usbhid/hid-core.c
+@@ -1000,16 +1000,6 @@ static int usbhid_start(struct hid_device *hid)
+ }
+ }
+
+- init_waitqueue_head(&usbhid->wait);
+- INIT_WORK(&usbhid->reset_work, hid_reset);
+- INIT_WORK(&usbhid->restart_work, __usbhid_restart_queues);
+- setup_timer(&usbhid->io_retry, hid_retry_timeout, (unsigned long) hid);
+-
+- spin_lock_init(&usbhid->lock);
+-
+- usbhid->intf = intf;
+- usbhid->ifnum = interface->desc.bInterfaceNumber;
+-
+ usbhid->urbctrl = usb_alloc_urb(0, GFP_KERNEL);
+ if (!usbhid->urbctrl) {
+ ret = -ENOMEM;
+@@ -1180,6 +1170,14 @@ static int usbhid_probe(struct usb_interface *intf, const struct usb_device_id *
+
+ hid->driver_data = usbhid;
+ usbhid->hid = hid;
++ usbhid->intf = intf;
++ usbhid->ifnum = interface->desc.bInterfaceNumber;
++
++ init_waitqueue_head(&usbhid->wait);
++ INIT_WORK(&usbhid->reset_work, hid_reset);
++ INIT_WORK(&usbhid->restart_work, __usbhid_restart_queues);
++ setup_timer(&usbhid->io_retry, hid_retry_timeout, (unsigned long) hid);
++ spin_lock_init(&usbhid->lock);
+
+ ret = hid_add_device(hid);
+ if (ret) {
+diff --git a/drivers/hwmon/f75375s.c b/drivers/hwmon/f75375s.c
+index e2107e5..afebc34 100644
+--- a/drivers/hwmon/f75375s.c
++++ b/drivers/hwmon/f75375s.c
+@@ -79,7 +79,7 @@ I2C_CLIENT_INSMOD_2(f75373, f75375);
+ #define F75375_REG_PWM2_DROP_DUTY 0x6C
+
+ #define FAN_CTRL_LINEAR(nr) (4 + nr)
+-#define FAN_CTRL_MODE(nr) (5 + ((nr) * 2))
++#define FAN_CTRL_MODE(nr) (4 + ((nr) * 2))
+
+ /*
+ * Data structures and manipulation thereof
+@@ -298,7 +298,7 @@ static int set_pwm_enable_direct(struct i2c_client *client, int nr, int val)
+ return -EINVAL;
+
+ fanmode = f75375_read8(client, F75375_REG_FAN_TIMER);
+- fanmode = ~(3 << FAN_CTRL_MODE(nr));
++ fanmode &= ~(3 << FAN_CTRL_MODE(nr));
+
+ switch (val) {
+ case 0: /* Full speed */
+@@ -350,7 +350,7 @@ static ssize_t set_pwm_mode(struct device *dev, struct device_attribute *attr,
+
+ mutex_lock(&data->update_lock);
+ conf = f75375_read8(client, F75375_REG_CONFIG1);
+- conf = ~(1 << FAN_CTRL_LINEAR(nr));
++ conf &= ~(1 << FAN_CTRL_LINEAR(nr));
+
+ if (val == 0)
+ conf |= (1 << FAN_CTRL_LINEAR(nr)) ;
+diff --git a/drivers/hwmon/k8temp.c b/drivers/hwmon/k8temp.c
+index f808d18..4f84d1a 100644
+--- a/drivers/hwmon/k8temp.c
++++ b/drivers/hwmon/k8temp.c
+@@ -143,6 +143,37 @@ static struct pci_device_id k8temp_ids[] = {
+
+ MODULE_DEVICE_TABLE(pci, k8temp_ids);
+
++static int __devinit is_rev_g_desktop(u8 model)
++{
++ u32 brandidx;
++
++ if (model < 0x69)
++ return 0;
++
++ if (model == 0xc1 || model == 0x6c || model == 0x7c)
++ return 0;
++
++ /*
++ * Differentiate between AM2 and ASB1.
++ * See "Constructing the processor Name String" in "Revision
++ * Guide for AMD NPT Family 0Fh Processors" (33610).
++ */
++ brandidx = cpuid_ebx(0x80000001);
++ brandidx = (brandidx >> 9) & 0x1f;
++
++ /* Single core */
++ if ((model == 0x6f || model == 0x7f) &&
++ (brandidx == 0x7 || brandidx == 0x9 || brandidx == 0xc))
++ return 0;
++
++ /* Dual core */
++ if (model == 0x6b &&
++ (brandidx == 0xb || brandidx == 0xc))
++ return 0;
++
++ return 1;
++}
++
+ static int __devinit k8temp_probe(struct pci_dev *pdev,
+ const struct pci_device_id *id)
+ {
+@@ -179,9 +210,7 @@ static int __devinit k8temp_probe(struct pci_dev *pdev,
+ "wrong - check erratum #141\n");
+ }
+
+- if ((model >= 0x69) &&
+- !(model == 0xc1 || model == 0x6c || model == 0x7c ||
+- model == 0x6b || model == 0x6f || model == 0x7f)) {
++ if (is_rev_g_desktop(model)) {
+ /*
+ * RevG desktop CPUs (i.e. no socket S1G1 or
+ * ASB1 parts) need additional offset,
+diff --git a/drivers/input/serio/i8042.c b/drivers/input/serio/i8042.c
+index 1df02d2..16f5ab2 100644
+--- a/drivers/input/serio/i8042.c
++++ b/drivers/input/serio/i8042.c
+@@ -1412,8 +1412,8 @@ static int __init i8042_init(void)
+
+ static void __exit i8042_exit(void)
+ {
+- platform_driver_unregister(&i8042_driver);
+ platform_device_unregister(i8042_platform_device);
++ platform_driver_unregister(&i8042_driver);
+ i8042_platform_exit();
+
+ panic_blink = NULL;
+diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
+index 91991b4..f43edfd 100644
+--- a/drivers/mmc/host/tmio_mmc.c
++++ b/drivers/mmc/host/tmio_mmc.c
+@@ -161,6 +161,7 @@ tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
+ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
+ {
+ struct mmc_data *data = host->data;
++ void *sg_virt;
+ unsigned short *buf;
+ unsigned int count;
+ unsigned long flags;
+@@ -170,8 +171,8 @@ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
+ return;
+ }
+
+- buf = (unsigned short *)(tmio_mmc_kmap_atomic(host, &flags) +
+- host->sg_off);
++ sg_virt = tmio_mmc_kmap_atomic(host->sg_ptr, &flags);
++ buf = (unsigned short *)(sg_virt + host->sg_off);
+
+ count = host->sg_ptr->length - host->sg_off;
+ if (count > data->blksz)
+@@ -188,7 +189,7 @@ static inline void tmio_mmc_pio_irq(struct tmio_mmc_host *host)
+
+ host->sg_off += count;
+
+- tmio_mmc_kunmap_atomic(host, &flags);
++ tmio_mmc_kunmap_atomic(sg_virt, &flags);
+
+ if (host->sg_off == host->sg_ptr->length)
+ tmio_mmc_next_sg(host);
+diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
+index 9fa9985..ee8fa89 100644
+--- a/drivers/mmc/host/tmio_mmc.h
++++ b/drivers/mmc/host/tmio_mmc.h
+@@ -102,10 +102,7 @@
+
+ #define ack_mmc_irqs(host, i) \
+ do { \
+- u32 mask;\
+- mask = sd_ctrl_read32((host), CTL_STATUS); \
+- mask &= ~((i) & TMIO_MASK_IRQ); \
+- sd_ctrl_write32((host), CTL_STATUS, mask); \
++ sd_ctrl_write32((host), CTL_STATUS, ~(i)); \
+ } while (0)
+
+
+@@ -200,19 +197,17 @@ static inline int tmio_mmc_next_sg(struct tmio_mmc_host *host)
+ return --host->sg_len;
+ }
+
+-static inline char *tmio_mmc_kmap_atomic(struct tmio_mmc_host *host,
++static inline char *tmio_mmc_kmap_atomic(struct scatterlist *sg,
+ unsigned long *flags)
+ {
+- struct scatterlist *sg = host->sg_ptr;
+-
+ local_irq_save(*flags);
+ return kmap_atomic(sg_page(sg), KM_BIO_SRC_IRQ) + sg->offset;
+ }
+
+-static inline void tmio_mmc_kunmap_atomic(struct tmio_mmc_host *host,
++static inline void tmio_mmc_kunmap_atomic(void *virt,
+ unsigned long *flags)
+ {
+- kunmap_atomic(sg_page(host->sg_ptr), KM_BIO_SRC_IRQ);
++ kunmap_atomic(virt, KM_BIO_SRC_IRQ);
+ local_irq_restore(*flags);
+ }
+
+diff --git a/drivers/net/tun.c b/drivers/net/tun.c
+index 4fdfa2a..0f77aca 100644
+--- a/drivers/net/tun.c
++++ b/drivers/net/tun.c
+@@ -1006,7 +1006,8 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
+ if (err < 0)
+ goto err_free_sk;
+
+- if (device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
++ if (!net_eq(dev_net(tun->dev), &init_net) ||
++ device_create_file(&tun->dev->dev, &dev_attr_tun_flags) ||
+ device_create_file(&tun->dev->dev, &dev_attr_owner) ||
+ device_create_file(&tun->dev->dev, &dev_attr_group))
+ printk(KERN_ERR "Failed to create tun sysfs files\n");
+diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c
+index ce166ae..2c4914a 100644
+--- a/drivers/net/wireless/ath/ath5k/base.c
++++ b/drivers/net/wireless/ath/ath5k/base.c
+@@ -1288,6 +1288,10 @@ ath5k_txbuf_setup(struct ath5k_softc *sc, struct ath5k_buf *bf,
+ PCI_DMA_TODEVICE);
+
+ rate = ieee80211_get_tx_rate(sc->hw, info);
++ if (!rate) {
++ ret = -EINVAL;
++ goto err_unmap;
++ }
+
+ if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+ flags |= AR5K_TXDESC_NOACK;
+diff --git a/drivers/net/wireless/ath/ath9k/eeprom.h b/drivers/net/wireless/ath/ath9k/eeprom.h
+index 4fe33f7..a5daa0d 100644
+--- a/drivers/net/wireless/ath/ath9k/eeprom.h
++++ b/drivers/net/wireless/ath/ath9k/eeprom.h
+@@ -60,7 +60,7 @@
+
+ #define SD_NO_CTL 0xE0
+ #define NO_CTL 0xff
+-#define CTL_MODE_M 7
++#define CTL_MODE_M 0xf
+ #define CTL_11A 0
+ #define CTL_11B 1
+ #define CTL_11G 2
+diff --git a/drivers/net/wireless/ath/regd.h b/drivers/net/wireless/ath/regd.h
+index c1dd857..21cf521 100644
+--- a/drivers/net/wireless/ath/regd.h
++++ b/drivers/net/wireless/ath/regd.h
+@@ -31,7 +31,6 @@ enum ctl_group {
+ #define NO_CTL 0xff
+ #define SD_NO_CTL 0xE0
+ #define NO_CTL 0xff
+-#define CTL_MODE_M 7
+ #define CTL_11A 0
+ #define CTL_11B 1
+ #define CTL_11G 2
+diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
+index 9d147de..0edd7b4 100644
+--- a/drivers/net/wireless/p54/txrx.c
++++ b/drivers/net/wireless/p54/txrx.c
+@@ -445,7 +445,7 @@ static void p54_rx_frame_sent(struct p54_common *priv, struct sk_buff *skb)
+ }
+
+ if (!(info->flags & IEEE80211_TX_CTL_NO_ACK) &&
+- (!payload->status))
++ !(payload->status & P54_TX_FAILED))
+ info->flags |= IEEE80211_TX_STAT_ACK;
+ if (payload->status & P54_TX_PSM_CANCELLED)
+ info->flags |= IEEE80211_TX_STAT_TX_FILTERED;
+diff --git a/drivers/oprofile/buffer_sync.c b/drivers/oprofile/buffer_sync.c
+index c9e2ae9..5c4df24 100644
+--- a/drivers/oprofile/buffer_sync.c
++++ b/drivers/oprofile/buffer_sync.c
+@@ -140,16 +140,6 @@ static struct notifier_block module_load_nb = {
+ .notifier_call = module_load_notify,
+ };
+
+-
+-static void end_sync(void)
+-{
+- end_cpu_work();
+- /* make sure we don't leak task structs */
+- process_task_mortuary();
+- process_task_mortuary();
+-}
+-
+-
+ int sync_start(void)
+ {
+ int err;
+@@ -157,7 +147,7 @@ int sync_start(void)
+ if (!zalloc_cpumask_var(&marked_cpus, GFP_KERNEL))
+ return -ENOMEM;
+
+- start_cpu_work();
++ mutex_lock(&buffer_mutex);
+
+ err = task_handoff_register(&task_free_nb);
+ if (err)
+@@ -172,7 +162,10 @@ int sync_start(void)
+ if (err)
+ goto out4;
+
++ start_cpu_work();
++
+ out:
++ mutex_unlock(&buffer_mutex);
+ return err;
+ out4:
+ profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
+@@ -181,7 +174,6 @@ out3:
+ out2:
+ task_handoff_unregister(&task_free_nb);
+ out1:
+- end_sync();
+ free_cpumask_var(marked_cpus);
+ goto out;
+ }
+@@ -189,11 +181,20 @@ out1:
+
+ void sync_stop(void)
+ {
++ /* flush buffers */
++ mutex_lock(&buffer_mutex);
++ end_cpu_work();
+ unregister_module_notifier(&module_load_nb);
+ profile_event_unregister(PROFILE_MUNMAP, &munmap_nb);
+ profile_event_unregister(PROFILE_TASK_EXIT, &task_exit_nb);
+ task_handoff_unregister(&task_free_nb);
+- end_sync();
++ mutex_unlock(&buffer_mutex);
++ flush_scheduled_work();
++
++ /* make sure we don't leak task structs */
++ process_task_mortuary();
++ process_task_mortuary();
++
+ free_cpumask_var(marked_cpus);
+ }
+
+diff --git a/drivers/oprofile/cpu_buffer.c b/drivers/oprofile/cpu_buffer.c
+index 1f1f5a8..5e2ac4a 100644
+--- a/drivers/oprofile/cpu_buffer.c
++++ b/drivers/oprofile/cpu_buffer.c
+@@ -121,8 +121,6 @@ void end_cpu_work(void)
+
+ cancel_delayed_work(&b->work);
+ }
+-
+- flush_scheduled_work();
+ }
+
+ /*
+diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
+index f9cf317..0fb1d05 100644
+--- a/drivers/pci/msi.c
++++ b/drivers/pci/msi.c
+@@ -195,6 +195,9 @@ void unmask_msi_irq(unsigned int irq)
+ void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+ {
+ struct msi_desc *entry = get_irq_desc_msi(desc);
++
++ BUG_ON(entry->dev->current_state != PCI_D0);
++
+ if (entry->msi_attrib.is_msix) {
+ void __iomem *base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+@@ -228,10 +231,32 @@ void read_msi_msg(unsigned int irq, struct msi_msg *msg)
+ read_msi_msg_desc(desc, msg);
+ }
+
++void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
++{
++ struct msi_desc *entry = get_irq_desc_msi(desc);
++
++ /* Assert that the cache is valid, assuming that
++ * valid messages are not all-zeroes. */
++ BUG_ON(!(entry->msg.address_hi | entry->msg.address_lo |
++ entry->msg.data));
++
++ *msg = entry->msg;
++}
++
++void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
++{
++ struct irq_desc *desc = irq_to_desc(irq);
++
++ get_cached_msi_msg_desc(desc, msg);
++}
++
+ void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg)
+ {
+ struct msi_desc *entry = get_irq_desc_msi(desc);
+- if (entry->msi_attrib.is_msix) {
++
++ if (entry->dev->current_state != PCI_D0) {
++ /* Don't touch the hardware now */
++ } else if (entry->msi_attrib.is_msix) {
+ void __iomem *base;
+ base = entry->mask_base +
+ entry->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE;
+diff --git a/drivers/power/apm_power.c b/drivers/power/apm_power.c
+index 936bae5..dc628cb 100644
+--- a/drivers/power/apm_power.c
++++ b/drivers/power/apm_power.c
+@@ -233,6 +233,7 @@ static int calculate_capacity(enum apm_source source)
+ empty_design_prop = POWER_SUPPLY_PROP_ENERGY_EMPTY_DESIGN;
+ now_prop = POWER_SUPPLY_PROP_ENERGY_NOW;
+ avg_prop = POWER_SUPPLY_PROP_ENERGY_AVG;
++ break;
+ case SOURCE_VOLTAGE:
+ full_prop = POWER_SUPPLY_PROP_VOLTAGE_MAX;
+ empty_prop = POWER_SUPPLY_PROP_VOLTAGE_MIN;
+diff --git a/drivers/staging/hv/RingBuffer.c b/drivers/staging/hv/RingBuffer.c
+index f69ae33..3a38103 100644
+--- a/drivers/staging/hv/RingBuffer.c
++++ b/drivers/staging/hv/RingBuffer.c
+@@ -192,7 +192,7 @@ Description:
+ static inline u64
+ GetRingBufferIndices(RING_BUFFER_INFO* RingInfo)
+ {
+- return ((u64)RingInfo->RingBuffer->WriteIndex << 32) || RingInfo->RingBuffer->ReadIndex;
++ return (u64)RingInfo->RingBuffer->WriteIndex << 32;
+ }
+
+
+diff --git a/drivers/staging/hv/StorVscApi.h b/drivers/staging/hv/StorVscApi.h
+index 69c1406..3d8ff08 100644
+--- a/drivers/staging/hv/StorVscApi.h
++++ b/drivers/staging/hv/StorVscApi.h
+@@ -28,10 +28,10 @@
+ #include "VmbusApi.h"
+
+ /* Defines */
+-#define STORVSC_RING_BUFFER_SIZE (10*PAGE_SIZE)
++#define STORVSC_RING_BUFFER_SIZE (20*PAGE_SIZE)
+ #define BLKVSC_RING_BUFFER_SIZE (20*PAGE_SIZE)
+
+-#define STORVSC_MAX_IO_REQUESTS 64
++#define STORVSC_MAX_IO_REQUESTS 128
+
+ /*
+ * In Hyper-V, each port/path/target maps to 1 scsi host adapter. In
+diff --git a/drivers/staging/hv/netvsc_drv.c b/drivers/staging/hv/netvsc_drv.c
+index 4c3c8bc..547261d 100644
+--- a/drivers/staging/hv/netvsc_drv.c
++++ b/drivers/staging/hv/netvsc_drv.c
+@@ -392,6 +392,9 @@ static const struct net_device_ops device_ops = {
+ .ndo_start_xmit = netvsc_start_xmit,
+ .ndo_get_stats = netvsc_get_stats,
+ .ndo_set_multicast_list = netvsc_set_multicast_list,
++ .ndo_change_mtu = eth_change_mtu,
++ .ndo_validate_addr = eth_validate_addr,
++ .ndo_set_mac_address = eth_mac_addr,
+ };
+
+ static int netvsc_probe(struct device *device)
+diff --git a/drivers/staging/hv/storvsc_drv.c b/drivers/staging/hv/storvsc_drv.c
+index d49dc21..2a4b147 100644
+--- a/drivers/staging/hv/storvsc_drv.c
++++ b/drivers/staging/hv/storvsc_drv.c
+@@ -532,7 +532,7 @@ static unsigned int copy_to_bounce_buffer(struct scatterlist *orig_sgl,
+
+ ASSERT(orig_sgl[i].offset + orig_sgl[i].length <= PAGE_SIZE);
+
+- if (j == 0)
++ if (bounce_addr == 0)
+ bounce_addr = (unsigned long)kmap_atomic(sg_page((&bounce_sgl[j])), KM_IRQ0);
+
+ while (srclen) {
+@@ -593,7 +593,7 @@ static unsigned int copy_from_bounce_buffer(struct scatterlist *orig_sgl,
+ destlen = orig_sgl[i].length;
+ ASSERT(orig_sgl[i].offset + orig_sgl[i].length <= PAGE_SIZE);
+
+- if (j == 0)
++ if (bounce_addr == 0)
+ bounce_addr = (unsigned long)kmap_atomic(sg_page((&bounce_sgl[j])), KM_IRQ0);
+
+ while (destlen) {
+@@ -652,6 +652,7 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
+ unsigned int request_size = 0;
+ int i;
+ struct scatterlist *sgl;
++ unsigned int sg_count = 0;
+
+ DPRINT_ENTER(STORVSC_DRV);
+
+@@ -736,6 +737,7 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
+ request->DataBuffer.Length = scsi_bufflen(scmnd);
+ if (scsi_sg_count(scmnd)) {
+ sgl = (struct scatterlist *)scsi_sglist(scmnd);
++ sg_count = scsi_sg_count(scmnd);
+
+ /* check if we need to bounce the sgl */
+ if (do_bounce_buffer(sgl, scsi_sg_count(scmnd)) != -1) {
+@@ -770,11 +772,12 @@ static int storvsc_queuecommand(struct scsi_cmnd *scmnd,
+ scsi_sg_count(scmnd));
+
+ sgl = cmd_request->bounce_sgl;
++ sg_count = cmd_request->bounce_sgl_count;
+ }
+
+ request->DataBuffer.Offset = sgl[0].offset;
+
+- for (i = 0; i < scsi_sg_count(scmnd); i++) {
++ for (i = 0; i < sg_count; i++) {
+ DPRINT_DBG(STORVSC_DRV, "sgl[%d] len %d offset %d \n",
+ i, sgl[i].length, sgl[i].offset);
+ request->DataBuffer.PfnArray[i] =
+diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c
+index 0e64037..e3017c4 100644
+--- a/drivers/usb/class/cdc-acm.c
++++ b/drivers/usb/class/cdc-acm.c
+@@ -971,7 +971,8 @@ static int acm_probe(struct usb_interface *intf,
+ }
+
+ if (!buflen) {
+- if (intf->cur_altsetting->endpoint->extralen &&
++ if (intf->cur_altsetting->endpoint &&
++ intf->cur_altsetting->endpoint->extralen &&
+ intf->cur_altsetting->endpoint->extra) {
+ dev_dbg(&intf->dev,
+ "Seeking extra descriptors on endpoint\n");
+@@ -1464,6 +1465,17 @@ err_out:
+ }
+
+ #endif /* CONFIG_PM */
++
++#define NOKIA_PCSUITE_ACM_INFO(x) \
++ USB_DEVICE_AND_INTERFACE_INFO(0x0421, x, \
++ USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \
++ USB_CDC_ACM_PROTO_VENDOR)
++
++#define SAMSUNG_PCSUITE_ACM_INFO(x) \
++ USB_DEVICE_AND_INTERFACE_INFO(0x04e7, x, \
++ USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM, \
++ USB_CDC_ACM_PROTO_VENDOR)
++
+ /*
+ * USB driver structure.
+ */
+@@ -1521,6 +1533,76 @@ static struct usb_device_id acm_ids[] = {
+ { USB_DEVICE(0x1bbb, 0x0003), /* Alcatel OT-I650 */
+ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
+ },
++ { USB_DEVICE(0x1576, 0x03b1), /* Maretron USB100 */
++ .driver_info = NO_UNION_NORMAL, /* reports zero length descriptor */
++ },
++
++ /* Nokia S60 phones expose two ACM channels. The first is
++ * a modem and is picked up by the standard AT-command
++ * information below. The second is 'vendor-specific' but
++ * is treated as a serial device at the S60 end, so we want
++ * to expose it on Linux too. */
++ { NOKIA_PCSUITE_ACM_INFO(0x042D), }, /* Nokia 3250 */
++ { NOKIA_PCSUITE_ACM_INFO(0x04D8), }, /* Nokia 5500 Sport */
++ { NOKIA_PCSUITE_ACM_INFO(0x04C9), }, /* Nokia E50 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0419), }, /* Nokia E60 */
++ { NOKIA_PCSUITE_ACM_INFO(0x044D), }, /* Nokia E61 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0001), }, /* Nokia E61i */
++ { NOKIA_PCSUITE_ACM_INFO(0x0475), }, /* Nokia E62 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0508), }, /* Nokia E65 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0418), }, /* Nokia E70 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0425), }, /* Nokia N71 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0486), }, /* Nokia N73 */
++ { NOKIA_PCSUITE_ACM_INFO(0x04DF), }, /* Nokia N75 */
++ { NOKIA_PCSUITE_ACM_INFO(0x000e), }, /* Nokia N77 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0445), }, /* Nokia N80 */
++ { NOKIA_PCSUITE_ACM_INFO(0x042F), }, /* Nokia N91 & N91 8GB */
++ { NOKIA_PCSUITE_ACM_INFO(0x048E), }, /* Nokia N92 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0420), }, /* Nokia N93 */
++ { NOKIA_PCSUITE_ACM_INFO(0x04E6), }, /* Nokia N93i */
++ { NOKIA_PCSUITE_ACM_INFO(0x04B2), }, /* Nokia 5700 XpressMusic */
++ { NOKIA_PCSUITE_ACM_INFO(0x0134), }, /* Nokia 6110 Navigator (China) */
++ { NOKIA_PCSUITE_ACM_INFO(0x046E), }, /* Nokia 6110 Navigator */
++ { NOKIA_PCSUITE_ACM_INFO(0x002f), }, /* Nokia 6120 classic & */
++ { NOKIA_PCSUITE_ACM_INFO(0x0088), }, /* Nokia 6121 classic */
++ { NOKIA_PCSUITE_ACM_INFO(0x00fc), }, /* Nokia 6124 classic */
++ { NOKIA_PCSUITE_ACM_INFO(0x0042), }, /* Nokia E51 */
++ { NOKIA_PCSUITE_ACM_INFO(0x00b0), }, /* Nokia E66 */
++ { NOKIA_PCSUITE_ACM_INFO(0x00ab), }, /* Nokia E71 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0481), }, /* Nokia N76 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0007), }, /* Nokia N81 & N81 8GB */
++ { NOKIA_PCSUITE_ACM_INFO(0x0071), }, /* Nokia N82 */
++ { NOKIA_PCSUITE_ACM_INFO(0x04F0), }, /* Nokia N95 & N95-3 NAM */
++ { NOKIA_PCSUITE_ACM_INFO(0x0070), }, /* Nokia N95 8GB */
++ { NOKIA_PCSUITE_ACM_INFO(0x00e9), }, /* Nokia 5320 XpressMusic */
++ { NOKIA_PCSUITE_ACM_INFO(0x0099), }, /* Nokia 6210 Navigator, RM-367 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0128), }, /* Nokia 6210 Navigator, RM-419 */
++ { NOKIA_PCSUITE_ACM_INFO(0x008f), }, /* Nokia 6220 Classic */
++ { NOKIA_PCSUITE_ACM_INFO(0x00a0), }, /* Nokia 6650 */
++ { NOKIA_PCSUITE_ACM_INFO(0x007b), }, /* Nokia N78 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0094), }, /* Nokia N85 */
++ { NOKIA_PCSUITE_ACM_INFO(0x003a), }, /* Nokia N96 & N96-3 */
++ { NOKIA_PCSUITE_ACM_INFO(0x00e9), }, /* Nokia 5320 XpressMusic */
++ { NOKIA_PCSUITE_ACM_INFO(0x0108), }, /* Nokia 5320 XpressMusic 2G */
++ { NOKIA_PCSUITE_ACM_INFO(0x01f5), }, /* Nokia N97, RM-505 */
++ { NOKIA_PCSUITE_ACM_INFO(0x02e3), }, /* Nokia 5230, RM-588 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0178), }, /* Nokia E63 */
++ { NOKIA_PCSUITE_ACM_INFO(0x010e), }, /* Nokia E75 */
++ { NOKIA_PCSUITE_ACM_INFO(0x02d9), }, /* Nokia 6760 Slide */
++ { NOKIA_PCSUITE_ACM_INFO(0x01d0), }, /* Nokia E52 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0223), }, /* Nokia E72 */
++ { NOKIA_PCSUITE_ACM_INFO(0x0275), }, /* Nokia X6 */
++ { NOKIA_PCSUITE_ACM_INFO(0x026c), }, /* Nokia N97 Mini */
++ { NOKIA_PCSUITE_ACM_INFO(0x0154), }, /* Nokia 5800 XpressMusic */
++ { NOKIA_PCSUITE_ACM_INFO(0x04ce), }, /* Nokia E90 */
++ { NOKIA_PCSUITE_ACM_INFO(0x01d4), }, /* Nokia E55 */
++ { SAMSUNG_PCSUITE_ACM_INFO(0x6651), }, /* Samsung GTi8510 (INNOV8) */
++
++ /* NOTE: non-Nokia COMM/ACM/0xff is likely MSFT RNDIS... NOT a modem! */
++
++ /* control interfaces without any protocol set */
++ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
++ USB_CDC_PROTO_NONE) },
+
+ /* control interfaces with various AT-command sets */
+ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
+@@ -1536,7 +1618,6 @@ static struct usb_device_id acm_ids[] = {
+ { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
+ USB_CDC_ACM_PROTO_AT_CDMA) },
+
+- /* NOTE: COMM/ACM/0xff is likely MSFT RNDIS ... NOT a modem!! */
+ { }
+ };
+
+diff --git a/drivers/usb/gadget/rndis.c b/drivers/usb/gadget/rndis.c
+index 48267bc..33ac6ac 100644
+--- a/drivers/usb/gadget/rndis.c
++++ b/drivers/usb/gadget/rndis.c
+@@ -291,9 +291,13 @@ gen_ndis_query_resp (int configNr, u32 OID, u8 *buf, unsigned buf_len,
+ /* mandatory */
+ case OID_GEN_VENDOR_DESCRIPTION:
+ pr_debug("%s: OID_GEN_VENDOR_DESCRIPTION\n", __func__);
+- length = strlen (rndis_per_dev_params [configNr].vendorDescr);
+- memcpy (outbuf,
+- rndis_per_dev_params [configNr].vendorDescr, length);
++ if ( rndis_per_dev_params [configNr].vendorDescr ) {
++ length = strlen (rndis_per_dev_params [configNr].vendorDescr);
++ memcpy (outbuf,
++ rndis_per_dev_params [configNr].vendorDescr, length);
++ } else {
++ outbuf[0] = 0;
++ }
+ retval = 0;
+ break;
+
+diff --git a/drivers/usb/host/ehci-ppc-of.c b/drivers/usb/host/ehci-ppc-of.c
+index 36f96da..ab26c2b 100644
+--- a/drivers/usb/host/ehci-ppc-of.c
++++ b/drivers/usb/host/ehci-ppc-of.c
+@@ -192,17 +192,19 @@ ehci_hcd_ppc_of_probe(struct of_device *op, const struct of_device_id *match)
+ }
+
+ rv = usb_add_hcd(hcd, irq, 0);
+- if (rv == 0)
+- return 0;
++ if (rv)
++ goto err_ehci;
++
++ return 0;
+
++err_ehci:
++ if (ehci->has_amcc_usb23)
++ iounmap(ehci->ohci_hcctrl_reg);
+ iounmap(hcd->regs);
+ err_ioremap:
+ irq_dispose_mapping(irq);
+ err_irq:
+ release_mem_region(hcd->rsrc_start, hcd->rsrc_len);
+-
+- if (ehci->has_amcc_usb23)
+- iounmap(ehci->ohci_hcctrl_reg);
+ err_rmr:
+ usb_put_hcd(hcd);
+
+diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
+index 99bde5f..93c4923 100644
+--- a/drivers/usb/serial/cp210x.c
++++ b/drivers/usb/serial/cp210x.c
+@@ -90,6 +90,7 @@ static struct usb_device_id id_table [] = {
+ { USB_DEVICE(0x10C4, 0x8149) }, /* West Mountain Radio Computerized Battery Analyzer */
+ { USB_DEVICE(0x10C4, 0x814A) }, /* West Mountain Radio RIGblaster P&P */
+ { USB_DEVICE(0x10C4, 0x814B) }, /* West Mountain Radio RIGtalk */
++ { USB_DEVICE(0x10C4, 0x8156) }, /* B&G H3000 link cable */
+ { USB_DEVICE(0x10C4, 0x815E) }, /* Helicomm IP-Link 1220-DVM */
+ { USB_DEVICE(0x10C4, 0x818B) }, /* AVIT Research USB to TTL */
+ { USB_DEVICE(0x10C4, 0x819F) }, /* MJS USB Toslink Switcher */
+@@ -111,6 +112,7 @@ static struct usb_device_id id_table [] = {
+ { USB_DEVICE(0x10C4, 0x83A8) }, /* Amber Wireless AMB2560 */
+ { USB_DEVICE(0x10C4, 0x8411) }, /* Kyocera GPS Module */
+ { USB_DEVICE(0x10C4, 0x846E) }, /* BEI USB Sensor Interface (VCP) */
++ { USB_DEVICE(0x10C4, 0x8477) }, /* Balluff RFID */
+ { USB_DEVICE(0x10C4, 0xEA60) }, /* Silicon Labs factory default */
+ { USB_DEVICE(0x10C4, 0xEA61) }, /* Silicon Labs factory default */
+ { USB_DEVICE(0x10C4, 0xEA71) }, /* Infinity GPS-MIC-1 Radio Monophone */
+@@ -124,14 +126,14 @@ static struct usb_device_id id_table [] = {
+ { USB_DEVICE(0x1555, 0x0004) }, /* Owen AC4 USB-RS485 Converter */
+ { USB_DEVICE(0x166A, 0x0303) }, /* Clipsal 5500PCU C-Bus USB interface */
+ { USB_DEVICE(0x16D6, 0x0001) }, /* Jablotron serial interface */
+- { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
+- { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
+- { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+- { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
+ { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */
+ { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */
+ { USB_DEVICE(0x16DC, 0x0012) }, /* W-IE-NE-R Plein & Baus GmbH MPOD Multi Channel Power Supply */
+ { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
++ { USB_DEVICE(0x17F4, 0xAAAA) }, /* Wavesense Jazz blood glucose meter */
++ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
++ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
++ { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
+ { } /* Terminating Entry */
+ };
+
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index 813ec3d..a7044b1 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -759,6 +759,14 @@ static struct usb_device_id id_table_combined [] = {
+ { USB_DEVICE(FTDI_VID, SEGWAY_RMP200_PID) },
+ { USB_DEVICE(IONICS_VID, IONICS_PLUGCOMPUTER_PID),
+ .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_24_MASTER_WING_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_PC_WING_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_USB_DMX_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MIDI_TIMECODE_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MINI_WING_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MAXI_WING_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_MEDIA_WING_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_CHAMSYS_WING_PID) },
+ { }, /* Optional parameter entry */
+ { } /* Terminating entry */
+ };
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index 52c3b68..30d3011 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -135,6 +135,18 @@
+ #define FTDI_NDI_AURORA_SCU_PID 0xDA74 /* NDI Aurora SCU */
+
+ /*
++ * ChamSys Limited (www.chamsys.co.uk) USB wing/interface product IDs
++ */
++#define FTDI_CHAMSYS_24_MASTER_WING_PID 0xDAF8
++#define FTDI_CHAMSYS_PC_WING_PID 0xDAF9
++#define FTDI_CHAMSYS_USB_DMX_PID 0xDAFA
++#define FTDI_CHAMSYS_MIDI_TIMECODE_PID 0xDAFB
++#define FTDI_CHAMSYS_MINI_WING_PID 0xDAFC
++#define FTDI_CHAMSYS_MAXI_WING_PID 0xDAFD
++#define FTDI_CHAMSYS_MEDIA_WING_PID 0xDAFE
++#define FTDI_CHAMSYS_WING_PID 0xDAFF
++
++/*
+ * Westrex International devices submitted by Cory Lee
+ */
+ #define FTDI_WESTREX_MODEL_777_PID 0xDC00 /* Model 777 */
+diff --git a/drivers/usb/serial/mos7840.c b/drivers/usb/serial/mos7840.c
+index a861cd2..cf79fb2 100644
+--- a/drivers/usb/serial/mos7840.c
++++ b/drivers/usb/serial/mos7840.c
+@@ -120,15 +120,20 @@
+ * by making a change here, in moschip_port_id_table, and in
+ * moschip_id_table_combined
+ */
+-#define USB_VENDOR_ID_BANDB 0x0856
+-#define BANDB_DEVICE_ID_USO9ML2_2 0xAC22
+-#define BANDB_DEVICE_ID_USO9ML2_4 0xAC24
+-#define BANDB_DEVICE_ID_US9ML2_2 0xAC29
+-#define BANDB_DEVICE_ID_US9ML2_4 0xAC30
+-#define BANDB_DEVICE_ID_USPTL4_2 0xAC31
+-#define BANDB_DEVICE_ID_USPTL4_4 0xAC32
+-#define BANDB_DEVICE_ID_USOPTL4_2 0xAC42
+-#define BANDB_DEVICE_ID_USOPTL4_4 0xAC44
++#define USB_VENDOR_ID_BANDB 0x0856
++#define BANDB_DEVICE_ID_USO9ML2_2 0xAC22
++#define BANDB_DEVICE_ID_USO9ML2_2P 0xBC00
++#define BANDB_DEVICE_ID_USO9ML2_4 0xAC24
++#define BANDB_DEVICE_ID_USO9ML2_4P 0xBC01
++#define BANDB_DEVICE_ID_US9ML2_2 0xAC29
++#define BANDB_DEVICE_ID_US9ML2_4 0xAC30
++#define BANDB_DEVICE_ID_USPTL4_2 0xAC31
++#define BANDB_DEVICE_ID_USPTL4_4 0xAC32
++#define BANDB_DEVICE_ID_USOPTL4_2 0xAC42
++#define BANDB_DEVICE_ID_USOPTL4_2P 0xBC02
++#define BANDB_DEVICE_ID_USOPTL4_4 0xAC44
++#define BANDB_DEVICE_ID_USOPTL4_4P 0xBC03
++#define BANDB_DEVICE_ID_USOPTL2_4 0xAC24
+
+ /* This driver also supports
+ * ATEN UC2324 device using Moschip MCS7840
+@@ -184,13 +189,18 @@ static struct usb_device_id moschip_port_id_table[] = {
+ {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)},
+ {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2P)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4P)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_2)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_4)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_2)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_4)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)},
+ {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)},
+ {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)},
+ {} /* terminating entry */
+@@ -200,13 +210,18 @@ static __devinitdata struct usb_device_id moschip_id_table_combined[] = {
+ {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7840)},
+ {USB_DEVICE(USB_VENDOR_ID_MOSCHIP, MOSCHIP_DEVICE_ID_7820)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_2P)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USO9ML2_4P)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_2)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_US9ML2_4)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_2)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USPTL4_4)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_2P)},
+ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL4_4P)},
++ {USB_DEVICE(USB_VENDOR_ID_BANDB, BANDB_DEVICE_ID_USOPTL2_4)},
+ {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2324)},
+ {USB_DEVICE(USB_VENDOR_ID_ATENINTL, ATENINTL_DEVICE_ID_UC2322)},
+ {} /* terminating entry */
+@@ -280,12 +295,19 @@ static int mos7840_get_reg_sync(struct usb_serial_port *port, __u16 reg,
+ {
+ struct usb_device *dev = port->serial->dev;
+ int ret = 0;
++ u8 *buf;
++
++ buf = kmalloc(VENDOR_READ_LENGTH, GFP_KERNEL);
++ if (!buf)
++ return -ENOMEM;
+
+ ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+- MCS_RD_RTYPE, 0, reg, val, VENDOR_READ_LENGTH,
++ MCS_RD_RTYPE, 0, reg, buf, VENDOR_READ_LENGTH,
+ MOS_WDR_TIMEOUT);
++ *val = buf[0];
+ dbg("mos7840_get_reg_sync offset is %x, return val %x", reg, *val);
+- *val = (*val) & 0x00ff;
++
++ kfree(buf);
+ return ret;
+ }
+
+@@ -338,6 +360,11 @@ static int mos7840_get_uart_reg(struct usb_serial_port *port, __u16 reg,
+ struct usb_device *dev = port->serial->dev;
+ int ret = 0;
+ __u16 Wval;
++ u8 *buf;
++
++ buf = kmalloc(VENDOR_READ_LENGTH, GFP_KERNEL);
++ if (!buf)
++ return -ENOMEM;
+
+ /* dbg("application number is %4x",
+ (((__u16)port->number - (__u16)(port->serial->minor))+1)<<8); */
+@@ -361,9 +388,11 @@ static int mos7840_get_uart_reg(struct usb_serial_port *port, __u16 reg,
+ }
+ }
+ ret = usb_control_msg(dev, usb_rcvctrlpipe(dev, 0), MCS_RDREQ,
+- MCS_RD_RTYPE, Wval, reg, val, VENDOR_READ_LENGTH,
++ MCS_RD_RTYPE, Wval, reg, buf, VENDOR_READ_LENGTH,
+ MOS_WDR_TIMEOUT);
+- *val = (*val) & 0x00ff;
++ *val = buf[0];
++
++ kfree(buf);
+ return ret;
+ }
+
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index 30e0467..a4dc7bf 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -106,6 +106,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
+ #define VALID_EVTCHN(chn) ((chn) != 0)
+
+ static struct irq_chip xen_dynamic_chip;
++static struct irq_chip xen_percpu_chip;
+
+ /* Constructor for packed IRQ information. */
+ static struct irq_info mk_unbound_info(void)
+@@ -362,7 +363,7 @@ int bind_evtchn_to_irq(unsigned int evtchn)
+ irq = find_unbound_irq();
+
+ set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+- handle_level_irq, "event");
++ handle_edge_irq, "event");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_evtchn_info(evtchn);
+@@ -388,8 +389,8 @@ static int bind_ipi_to_irq(unsigned int ipi, unsigned int cpu)
+ if (irq < 0)
+ goto out;
+
+- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+- handle_level_irq, "ipi");
++ set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
++ handle_percpu_irq, "ipi");
+
+ bind_ipi.vcpu = cpu;
+ if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
+@@ -429,8 +430,8 @@ static int bind_virq_to_irq(unsigned int virq, unsigned int cpu)
+
+ irq = find_unbound_irq();
+
+- set_irq_chip_and_handler_name(irq, &xen_dynamic_chip,
+- handle_level_irq, "virq");
++ set_irq_chip_and_handler_name(irq, &xen_percpu_chip,
++ handle_percpu_irq, "virq");
+
+ evtchn_to_irq[evtchn] = irq;
+ irq_info[irq] = mk_virq_info(evtchn, virq);
+@@ -929,6 +930,16 @@ static struct irq_chip xen_dynamic_chip __read_mostly = {
+ .retrigger = retrigger_dynirq,
+ };
+
++static struct irq_chip en_percpu_chip __read_mostly = {
++ .name = "xen-percpu",
++
++ .disable = disable_dynirq,
++ .mask = disable_dynirq,
++ .unmask = enable_dynirq,
++
++ .ack = ack_dynirq,
++};
++
+ void __init xen_init_IRQ(void)
+ {
+ int i;
+diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
+index c4e8353..42b60b0 100644
+--- a/fs/binfmt_misc.c
++++ b/fs/binfmt_misc.c
+@@ -723,7 +723,7 @@ static int __init init_misc_binfmt(void)
+ {
+ int err = register_filesystem(&bm_fs_type);
+ if (!err) {
+- err = register_binfmt(&misc_format);
++ err = insert_binfmt(&misc_format);
+ if (err)
+ unregister_filesystem(&bm_fs_type);
+ }
+diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
+index 51d9e33..650546f 100644
+--- a/fs/fuse/dev.c
++++ b/fs/fuse/dev.c
+@@ -1158,6 +1158,14 @@ __acquires(&fc->lock)
+ }
+ }
+
++static void end_queued_requests(struct fuse_conn *fc)
++{
++ fc->max_background = UINT_MAX;
++ flush_bg_queue(fc);
++ end_requests(fc, &fc->pending);
++ end_requests(fc, &fc->processing);
++}
++
+ /*
+ * Abort all requests.
+ *
+@@ -1184,8 +1192,7 @@ void fuse_abort_conn(struct fuse_conn *fc)
+ fc->connected = 0;
+ fc->blocked = 0;
+ end_io_requests(fc);
+- end_requests(fc, &fc->pending);
+- end_requests(fc, &fc->processing);
++ end_queued_requests(fc);
+ wake_up_all(&fc->waitq);
+ wake_up_all(&fc->blocked_waitq);
+ kill_fasync(&fc->fasync, SIGIO, POLL_IN);
+@@ -1200,8 +1207,9 @@ int fuse_dev_release(struct inode *inode, struct file *file)
+ if (fc) {
+ spin_lock(&fc->lock);
+ fc->connected = 0;
+- end_requests(fc, &fc->pending);
+- end_requests(fc, &fc->processing);
++ fc->blocked = 0;
++ end_queued_requests(fc);
++ wake_up_all(&fc->blocked_waitq);
+ spin_unlock(&fc->lock);
+ fuse_conn_put(fc);
+ }
+diff --git a/fs/nfs/client.c b/fs/nfs/client.c
+index 127ed5c..19cbbf7 100644
+--- a/fs/nfs/client.c
++++ b/fs/nfs/client.c
+@@ -273,7 +273,7 @@ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+ sin1->sin6_scope_id != sin2->sin6_scope_id)
+ return 0;
+
+- return ipv6_addr_equal(&sin1->sin6_addr, &sin1->sin6_addr);
++ return ipv6_addr_equal(&sin1->sin6_addr, &sin2->sin6_addr);
+ }
+ #else /* !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE) */
+ static int nfs_sockaddr_match_ipaddr6(const struct sockaddr *sa1,
+diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
+index 4c827d8..3fcb479 100644
+--- a/fs/ocfs2/inode.c
++++ b/fs/ocfs2/inode.c
+@@ -485,7 +485,11 @@ static int ocfs2_read_locked_inode(struct inode *inode,
+ OCFS2_BH_IGNORE_CACHE);
+ } else {
+ status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
+- if (!status)
++ /*
++ * If buffer is in jbd, then its checksum may not have been
++ * computed as yet.
++ */
++ if (!status && !buffer_jbd(bh))
+ status = ocfs2_validate_inode_block(osb->sb, bh);
+ }
+ if (status < 0) {
+diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
+index f5ea468..7118a38 100644
+--- a/fs/sysfs/file.c
++++ b/fs/sysfs/file.c
+@@ -340,7 +340,7 @@ static int sysfs_open_file(struct inode *inode, struct file *file)
+ char *p;
+
+ p = d_path(&file->f_path, last_sysfs_file, sizeof(last_sysfs_file));
+- if (p)
++ if (!IS_ERR(p))
+ memmove(last_sysfs_file, p, strlen(p) + 1);
+
+ /* need attr_sd for attr and ops, its parent for kobj */
+diff --git a/include/linux/compat.h b/include/linux/compat.h
+index af931ee..cab23f2 100644
+--- a/include/linux/compat.h
++++ b/include/linux/compat.h
+@@ -309,5 +309,7 @@ asmlinkage long compat_sys_newfstatat(unsigned int dfd, char __user * filename,
+ asmlinkage long compat_sys_openat(unsigned int dfd, const char __user *filename,
+ int flags, int mode);
+
++extern void __user *compat_alloc_user_space(unsigned long len);
++
+ #endif /* CONFIG_COMPAT */
+ #endif /* _LINUX_COMPAT_H */
+diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
+index a5740fc..a73454a 100644
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,8 +21,7 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+-extern void cpuset_cpus_allowed_locked(struct task_struct *p,
+- struct cpumask *mask);
++extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -69,9 +68,6 @@ struct seq_file;
+ extern void cpuset_task_status_allowed(struct seq_file *m,
+ struct task_struct *task);
+
+-extern void cpuset_lock(void);
+-extern void cpuset_unlock(void);
+-
+ extern int cpuset_mem_spread_node(void);
+
+ static inline int cpuset_do_page_mem_spread(void)
+@@ -105,10 +101,11 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
+ {
+ cpumask_copy(mask, cpu_possible_mask);
+ }
+-static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
+- struct cpumask *mask)
++
++static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
+ {
+- cpumask_copy(mask, cpu_possible_mask);
++ cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
++ return cpumask_any(cpu_active_mask);
+ }
+
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+@@ -157,9 +154,6 @@ static inline void cpuset_task_status_allowed(struct seq_file *m,
+ {
+ }
+
+-static inline void cpuset_lock(void) {}
+-static inline void cpuset_unlock(void) {}
+-
+ static inline int cpuset_mem_spread_node(void)
+ {
+ return 0;
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index b0f6d97..a069916 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -339,6 +339,7 @@ enum {
+ ATA_EHI_HOTPLUGGED = (1 << 0), /* could have been hotplugged */
+ ATA_EHI_NO_AUTOPSY = (1 << 2), /* no autopsy */
+ ATA_EHI_QUIET = (1 << 3), /* be quiet */
++ ATA_EHI_NO_RECOVERY = (1 << 4), /* no recovery */
+
+ ATA_EHI_DID_SOFTRESET = (1 << 16), /* already soft-reset this port */
+ ATA_EHI_DID_HARDRESET = (1 << 17), /* already soft-reset this port */
+diff --git a/include/linux/msi.h b/include/linux/msi.h
+index 6991ab5..91b05c1 100644
+--- a/include/linux/msi.h
++++ b/include/linux/msi.h
+@@ -14,8 +14,10 @@ struct irq_desc;
+ extern void mask_msi_irq(unsigned int irq);
+ extern void unmask_msi_irq(unsigned int irq);
+ extern void read_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
++extern void get_cached_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+ extern void write_msi_msg_desc(struct irq_desc *desc, struct msi_msg *msg);
+ extern void read_msi_msg(unsigned int irq, struct msi_msg *msg);
++extern void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
+ extern void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+
+ struct msi_desc {
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index cc24beb..957a25f 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -145,7 +145,6 @@ extern unsigned long this_cpu_load(void);
+
+
+ extern void calc_global_load(void);
+-extern u64 cpu_nr_migrations(int cpu);
+
+ extern unsigned long get_parent_ip(unsigned long addr);
+
+@@ -1001,6 +1000,7 @@ struct sched_domain {
+ char *name;
+ #endif
+
++ unsigned int span_weight;
+ /*
+ * Span of all CPUs in this domain.
+ *
+@@ -1072,7 +1072,8 @@ struct sched_domain;
+ struct sched_class {
+ const struct sched_class *next;
+
+- void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
++ void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
++ bool head);
+ void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ void (*yield_task) (struct rq *rq);
+
+@@ -1082,7 +1083,8 @@ struct sched_class {
+ void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+
+ #ifdef CONFIG_SMP
+- int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
++ int (*select_task_rq)(struct rq *rq, struct task_struct *p,
++ int sd_flag, int flags);
+
+ unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
+ struct rq *busiest, unsigned long max_load_move,
+@@ -1094,7 +1096,8 @@ struct sched_class {
+ enum cpu_idle_type idle);
+ void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ void (*post_schedule) (struct rq *this_rq);
+- void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
++ void (*task_waking) (struct rq *this_rq, struct task_struct *task);
++ void (*task_woken) (struct rq *this_rq, struct task_struct *task);
+
+ void (*set_cpus_allowed)(struct task_struct *p,
+ const struct cpumask *newmask);
+@@ -1105,7 +1108,7 @@ struct sched_class {
+
+ void (*set_curr_task) (struct rq *rq);
+ void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+- void (*task_new) (struct rq *rq, struct task_struct *p);
++ void (*task_fork) (struct task_struct *p);
+
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ int running);
+@@ -1114,10 +1117,11 @@ struct sched_class {
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ int oldprio, int running);
+
+- unsigned int (*get_rr_interval) (struct task_struct *task);
++ unsigned int (*get_rr_interval) (struct rq *rq,
++ struct task_struct *task);
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+- void (*moved_group) (struct task_struct *p);
++ void (*moved_group) (struct task_struct *p, int on_rq);
+ #endif
+ };
+
+@@ -1178,7 +1182,6 @@ struct sched_entity {
+ u64 nr_failed_migrations_running;
+ u64 nr_failed_migrations_hot;
+ u64 nr_forced_migrations;
+- u64 nr_forced2_migrations;
+
+ u64 nr_wakeups;
+ u64 nr_wakeups_sync;
+@@ -1886,6 +1889,7 @@ extern void sched_clock_idle_sleep_event(void);
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+ #ifdef CONFIG_HOTPLUG_CPU
++extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
+ extern void idle_task_exit(void);
+ #else
+ static inline void idle_task_exit(void) {}
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 57e6357..5b81156 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
+ | 1*SD_WAKE_AFFINE \
+ | 1*SD_SHARE_CPUPOWER \
+ | 0*SD_POWERSAVINGS_BALANCE \
+- | 0*SD_SHARE_PKG_RESOURCES \
++ | 1*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ | 0*SD_PREFER_SIBLING \
+ , \
+diff --git a/kernel/compat.c b/kernel/compat.c
+index 180d188..8bc5578 100644
+--- a/kernel/compat.c
++++ b/kernel/compat.c
+@@ -25,6 +25,7 @@
+ #include <linux/posix-timers.h>
+ #include <linux/times.h>
+ #include <linux/ptrace.h>
++#include <linux/module.h>
+
+ #include <asm/uaccess.h>
+
+@@ -1136,3 +1137,24 @@ compat_sys_sysinfo(struct compat_sysinfo __user *info)
+
+ return 0;
+ }
++
++/*
++ * Allocate user-space memory for the duration of a single system call,
++ * in order to marshall parameters inside a compat thunk.
++ */
++void __user *compat_alloc_user_space(unsigned long len)
++{
++ void __user *ptr;
++
++ /* If len would occupy more than half of the entire compat space... */
++ if (unlikely(len > (((compat_uptr_t)~0) >> 1)))
++ return NULL;
++
++ ptr = arch_compat_alloc_user_space(len);
++
++ if (unlikely(!access_ok(VERIFY_WRITE, ptr, len)))
++ return NULL;
++
++ return ptr;
++}
++EXPORT_SYMBOL_GPL(compat_alloc_user_space);
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 291ac58..7e8b6ac 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -151,7 +151,7 @@ static inline void check_for_tasks(int cpu)
+
+ write_lock_irq(&tasklist_lock);
+ for_each_process(p) {
+- if (task_cpu(p) == cpu &&
++ if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
+ (!cputime_eq(p->utime, cputime_zero) ||
+ !cputime_eq(p->stime, cputime_zero)))
+ printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+@@ -163,6 +163,7 @@ static inline void check_for_tasks(int cpu)
+ }
+
+ struct take_cpu_down_param {
++ struct task_struct *caller;
+ unsigned long mod;
+ void *hcpu;
+ };
+@@ -171,6 +172,7 @@ struct take_cpu_down_param {
+ static int __ref take_cpu_down(void *_param)
+ {
+ struct take_cpu_down_param *param = _param;
++ unsigned int cpu = (unsigned long)param->hcpu;
+ int err;
+
+ /* Ensure this CPU doesn't handle any more interrupts. */
+@@ -181,6 +183,8 @@ static int __ref take_cpu_down(void *_param)
+ raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ param->hcpu);
+
++ if (task_cpu(param->caller) == cpu)
++ move_task_off_dead_cpu(cpu, param->caller);
+ /* Force idle task to run as soon as we yield: it should
+ immediately notice cpu is offline and die quickly. */
+ sched_idle_next();
+@@ -191,10 +195,10 @@ static int __ref take_cpu_down(void *_param)
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+ int err, nr_calls = 0;
+- cpumask_var_t old_allowed;
+ void *hcpu = (void *)(long)cpu;
+ unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ struct take_cpu_down_param tcd_param = {
++ .caller = current,
+ .mod = mod,
+ .hcpu = hcpu,
+ };
+@@ -205,10 +209,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ if (!cpu_online(cpu))
+ return -EINVAL;
+
+- if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
+- return -ENOMEM;
+-
+ cpu_hotplug_begin();
++ set_cpu_active(cpu, false);
+ err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+ hcpu, -1, &nr_calls);
+ if (err == NOTIFY_BAD) {
+@@ -223,10 +225,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ goto out_release;
+ }
+
+- /* Ensure that we are not runnable on dying cpu */
+- cpumask_copy(old_allowed, ¤t->cpus_allowed);
+- set_cpus_allowed_ptr(current, cpu_active_mask);
+-
+ err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ if (err) {
+ set_cpu_active(cpu, true);
+@@ -235,7 +233,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ hcpu) == NOTIFY_BAD)
+ BUG();
+
+- goto out_allowed;
++ goto out_release;
+ }
+ BUG_ON(cpu_online(cpu));
+
+@@ -253,8 +251,6 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+
+ check_for_tasks(cpu);
+
+-out_allowed:
+- set_cpus_allowed_ptr(current, old_allowed);
+ out_release:
+ cpu_hotplug_done();
+ if (!err) {
+@@ -262,7 +258,6 @@ out_release:
+ hcpu) == NOTIFY_BAD)
+ BUG();
+ }
+- free_cpumask_var(old_allowed);
+ return err;
+ }
+
+@@ -280,18 +275,6 @@ int __ref cpu_down(unsigned int cpu)
+ goto out;
+ }
+
+- set_cpu_active(cpu, false);
+-
+- /*
+- * Make sure the all cpus did the reschedule and are not
+- * using stale version of the cpu_active_mask.
+- * This is not strictly necessary becuase stop_machine()
+- * that we run down the line already provides the required
+- * synchronization. But it's really a side effect and we do not
+- * want to depend on the innards of the stop_machine here.
+- */
+- synchronize_sched();
+-
+ err = _cpu_down(cpu, 0);
+
+ out:
+@@ -382,19 +365,12 @@ int disable_nonboot_cpus(void)
+ return error;
+ cpu_maps_update_begin();
+ first_cpu = cpumask_first(cpu_online_mask);
+- /* We take down all of the non-boot CPUs in one shot to avoid races
++ /*
++ * We take down all of the non-boot CPUs in one shot to avoid races
+ * with the userspace trying to use the CPU hotplug at the same time
+ */
+ cpumask_clear(frozen_cpus);
+
+- for_each_online_cpu(cpu) {
+- if (cpu == first_cpu)
+- continue;
+- set_cpu_active(cpu, false);
+- }
+-
+- synchronize_sched();
+-
+ printk("Disabling non-boot CPUs ...\n");
+ for_each_online_cpu(cpu) {
+ if (cpu == first_cpu)
+diff --git a/kernel/cpuset.c b/kernel/cpuset.c
+index a81a910..b120fd0 100644
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -2145,19 +2145,52 @@ void __init cpuset_init_smp(void)
+ void cpuset_cpus_allowed(struct task_struct *tsk, struct cpumask *pmask)
+ {
+ mutex_lock(&callback_mutex);
+- cpuset_cpus_allowed_locked(tsk, pmask);
++ task_lock(tsk);
++ guarantee_online_cpus(task_cs(tsk), pmask);
++ task_unlock(tsk);
+ mutex_unlock(&callback_mutex);
+ }
+
+-/**
+- * cpuset_cpus_allowed_locked - return cpus_allowed mask from a tasks cpuset.
+- * Must be called with callback_mutex held.
+- **/
+-void cpuset_cpus_allowed_locked(struct task_struct *tsk, struct cpumask *pmask)
++int cpuset_cpus_allowed_fallback(struct task_struct *tsk)
+ {
+- task_lock(tsk);
+- guarantee_online_cpus(task_cs(tsk), pmask);
+- task_unlock(tsk);
++ const struct cpuset *cs;
++ int cpu;
++
++ rcu_read_lock();
++ cs = task_cs(tsk);
++ if (cs)
++ cpumask_copy(&tsk->cpus_allowed, cs->cpus_allowed);
++ rcu_read_unlock();
++
++ /*
++ * We own tsk->cpus_allowed, nobody can change it under us.
++ *
++ * But we used cs && cs->cpus_allowed lockless and thus can
++ * race with cgroup_attach_task() or update_cpumask() and get
++ * the wrong tsk->cpus_allowed. However, both cases imply the
++ * subsequent cpuset_change_cpumask()->set_cpus_allowed_ptr()
++ * which takes task_rq_lock().
++ *
++ * If we are called after it dropped the lock we must see all
++ * changes in tsk_cs()->cpus_allowed. Otherwise we can temporary
++ * set any mask even if it is not right from task_cs() pov,
++ * the pending set_cpus_allowed_ptr() will fix things.
++ */
++
++ cpu = cpumask_any_and(&tsk->cpus_allowed, cpu_active_mask);
++ if (cpu >= nr_cpu_ids) {
++ /*
++ * Either tsk->cpus_allowed is wrong (see above) or it
++ * is actually empty. The latter case is only possible
++ * if we are racing with remove_tasks_in_empty_cpuset().
++ * Like above we can temporary set any mask and rely on
++ * set_cpus_allowed_ptr() as synchronization point.
++ */
++ cpumask_copy(&tsk->cpus_allowed, cpu_possible_mask);
++ cpu = cpumask_any(cpu_active_mask);
++ }
++
++ return cpu;
+ }
+
+ void cpuset_init_current_mems_allowed(void)
+@@ -2346,22 +2379,6 @@ int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
+ }
+
+ /**
+- * cpuset_lock - lock out any changes to cpuset structures
+- *
+- * The out of memory (oom) code needs to mutex_lock cpusets
+- * from being changed while it scans the tasklist looking for a
+- * task in an overlapping cpuset. Expose callback_mutex via this
+- * cpuset_lock() routine, so the oom code can lock it, before
+- * locking the task list. The tasklist_lock is a spinlock, so
+- * must be taken inside callback_mutex.
+- */
+-
+-void cpuset_lock(void)
+-{
+- mutex_lock(&callback_mutex);
+-}
+-
+-/**
+ * cpuset_unlock - release lock on cpuset changes
+ *
+ * Undo the lock taken in a previous cpuset_lock() call.
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 9f3b066..4bde56f 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1233,21 +1233,6 @@ static struct task_struct *copy_process(unsigned long clone_flags,
+ /* Need tasklist lock for parent etc handling! */
+ write_lock_irq(&tasklist_lock);
+
+- /*
+- * The task hasn't been attached yet, so its cpus_allowed mask will
+- * not be changed, nor will its assigned CPU.
+- *
+- * The cpus_allowed mask of the parent may have changed after it was
+- * copied first time - so re-copy it here, then check the child's CPU
+- * to ensure it is on a valid CPU (and if not, just force it back to
+- * parent's CPU). This avoids alot of nasty races.
+- */
+- p->cpus_allowed = current->cpus_allowed;
+- p->rt.nr_cpus_allowed = current->rt.nr_cpus_allowed;
+- if (unlikely(!cpu_isset(task_cpu(p), p->cpus_allowed) ||
+- !cpu_online(task_cpu(p))))
+- set_task_cpu(p, smp_processor_id());
+-
+ /* CLONE_PARENT re-uses the old parent */
+ if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
+ p->real_parent = current->real_parent;
+diff --git a/kernel/gcov/fs.c b/kernel/gcov/fs.c
+index ef3c3f8..f83972b 100644
+--- a/kernel/gcov/fs.c
++++ b/kernel/gcov/fs.c
+@@ -33,10 +33,11 @@
+ * @children: child nodes
+ * @all: list head for list of all nodes
+ * @parent: parent node
+- * @info: associated profiling data structure if not a directory
+- * @ghost: when an object file containing profiling data is unloaded we keep a
+- * copy of the profiling data here to allow collecting coverage data
+- * for cleanup code. Such a node is called a "ghost".
++ * @loaded_info: array of pointers to profiling data sets for loaded object
++ * files.
++ * @num_loaded: number of profiling data sets for loaded object files.
++ * @unloaded_info: accumulated copy of profiling data sets for unloaded
++ * object files. Used only when gcov_persist=1.
+ * @dentry: main debugfs entry, either a directory or data file
+ * @links: associated symbolic links
+ * @name: data file basename
+@@ -51,10 +52,11 @@ struct gcov_node {
+ struct list_head children;
+ struct list_head all;
+ struct gcov_node *parent;
+- struct gcov_info *info;
+- struct gcov_info *ghost;
++ struct gcov_info **loaded_info;
++ struct gcov_info *unloaded_info;
+ struct dentry *dentry;
+ struct dentry **links;
++ int num_loaded;
+ char name[0];
+ };
+
+@@ -136,16 +138,37 @@ static const struct seq_operations gcov_seq_ops = {
+ };
+
+ /*
+- * Return the profiling data set for a given node. This can either be the
+- * original profiling data structure or a duplicate (also called "ghost")
+- * in case the associated object file has been unloaded.
++ * Return a profiling data set associated with the given node. This is
++ * either a data set for a loaded object file or a data set copy in case
++ * all associated object files have been unloaded.
+ */
+ static struct gcov_info *get_node_info(struct gcov_node *node)
+ {
+- if (node->info)
+- return node->info;
++ if (node->num_loaded > 0)
++ return node->loaded_info[0];
+
+- return node->ghost;
++ return node->unloaded_info;
++}
++
++/*
++ * Return a newly allocated profiling data set which contains the sum of
++ * all profiling data associated with the given node.
++ */
++static struct gcov_info *get_accumulated_info(struct gcov_node *node)
++{
++ struct gcov_info *info;
++ int i = 0;
++
++ if (node->unloaded_info)
++ info = gcov_info_dup(node->unloaded_info);
++ else
++ info = gcov_info_dup(node->loaded_info[i++]);
++ if (!info)
++ return NULL;
++ for (; i < node->num_loaded; i++)
++ gcov_info_add(info, node->loaded_info[i]);
++
++ return info;
+ }
+
+ /*
+@@ -163,9 +186,10 @@ static int gcov_seq_open(struct inode *inode, struct file *file)
+ mutex_lock(&node_lock);
+ /*
+ * Read from a profiling data copy to minimize reference tracking
+- * complexity and concurrent access.
++ * complexity and concurrent access and to keep accumulating multiple
++ * profiling data sets associated with one node simple.
+ */
+- info = gcov_info_dup(get_node_info(node));
++ info = get_accumulated_info(node);
+ if (!info)
+ goto out_unlock;
+ iter = gcov_iter_new(info);
+@@ -225,12 +249,25 @@ static struct gcov_node *get_node_by_name(const char *name)
+ return NULL;
+ }
+
++/*
++ * Reset all profiling data associated with the specified node.
++ */
++static void reset_node(struct gcov_node *node)
++{
++ int i;
++
++ if (node->unloaded_info)
++ gcov_info_reset(node->unloaded_info);
++ for (i = 0; i < node->num_loaded; i++)
++ gcov_info_reset(node->loaded_info[i]);
++}
++
+ static void remove_node(struct gcov_node *node);
+
+ /*
+ * write() implementation for gcov data files. Reset profiling data for the
+- * associated file. If the object file has been unloaded (i.e. this is
+- * a "ghost" node), remove the debug fs node as well.
++ * corresponding file. If all associated object files have been unloaded,
++ * remove the debug fs node as well.
+ */
+ static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
+ size_t len, loff_t *pos)
+@@ -245,10 +282,10 @@ static ssize_t gcov_seq_write(struct file *file, const char __user *addr,
+ node = get_node_by_name(info->filename);
+ if (node) {
+ /* Reset counts or remove node for unloaded modules. */
+- if (node->ghost)
++ if (node->num_loaded == 0)
+ remove_node(node);
+ else
+- gcov_info_reset(node->info);
++ reset_node(node);
+ }
+ /* Reset counts for open file. */
+ gcov_info_reset(info);
+@@ -378,7 +415,10 @@ static void init_node(struct gcov_node *node, struct gcov_info *info,
+ INIT_LIST_HEAD(&node->list);
+ INIT_LIST_HEAD(&node->children);
+ INIT_LIST_HEAD(&node->all);
+- node->info = info;
++ if (node->loaded_info) {
++ node->loaded_info[0] = info;
++ node->num_loaded = 1;
++ }
+ node->parent = parent;
+ if (name)
+ strcpy(node->name, name);
+@@ -394,9 +434,13 @@ static struct gcov_node *new_node(struct gcov_node *parent,
+ struct gcov_node *node;
+
+ node = kzalloc(sizeof(struct gcov_node) + strlen(name) + 1, GFP_KERNEL);
+- if (!node) {
+- pr_warning("out of memory\n");
+- return NULL;
++ if (!node)
++ goto err_nomem;
++ if (info) {
++ node->loaded_info = kcalloc(1, sizeof(struct gcov_info *),
++ GFP_KERNEL);
++ if (!node->loaded_info)
++ goto err_nomem;
+ }
+ init_node(node, info, name, parent);
+ /* Differentiate between gcov data file nodes and directory nodes. */
+@@ -416,6 +460,11 @@ static struct gcov_node *new_node(struct gcov_node *parent,
+ list_add(&node->all, &all_head);
+
+ return node;
++
++err_nomem:
++ kfree(node);
++ pr_warning("out of memory\n");
++ return NULL;
+ }
+
+ /* Remove symbolic links associated with node. */
+@@ -441,8 +490,9 @@ static void release_node(struct gcov_node *node)
+ list_del(&node->all);
+ debugfs_remove(node->dentry);
+ remove_links(node);
+- if (node->ghost)
+- gcov_info_free(node->ghost);
++ kfree(node->loaded_info);
++ if (node->unloaded_info)
++ gcov_info_free(node->unloaded_info);
+ kfree(node);
+ }
+
+@@ -477,7 +527,7 @@ static struct gcov_node *get_child_by_name(struct gcov_node *parent,
+
+ /*
+ * write() implementation for reset file. Reset all profiling data to zero
+- * and remove ghost nodes.
++ * and remove nodes for which all associated object files are unloaded.
+ */
+ static ssize_t reset_write(struct file *file, const char __user *addr,
+ size_t len, loff_t *pos)
+@@ -487,8 +537,8 @@ static ssize_t reset_write(struct file *file, const char __user *addr,
+ mutex_lock(&node_lock);
+ restart:
+ list_for_each_entry(node, &all_head, all) {
+- if (node->info)
+- gcov_info_reset(node->info);
++ if (node->num_loaded > 0)
++ reset_node(node);
+ else if (list_empty(&node->children)) {
+ remove_node(node);
+ /* Several nodes may have gone - restart loop. */
+@@ -564,37 +614,115 @@ err_remove:
+ }
+
+ /*
+- * The profiling data set associated with this node is being unloaded. Store a
+- * copy of the profiling data and turn this node into a "ghost".
++ * Associate a profiling data set with an existing node. Needs to be called
++ * with node_lock held.
+ */
+-static int ghost_node(struct gcov_node *node)
++static void add_info(struct gcov_node *node, struct gcov_info *info)
+ {
+- node->ghost = gcov_info_dup(node->info);
+- if (!node->ghost) {
+- pr_warning("could not save data for '%s' (out of memory)\n",
+- node->info->filename);
+- return -ENOMEM;
++ struct gcov_info **loaded_info;
++ int num = node->num_loaded;
++
++ /*
++ * Prepare new array. This is done first to simplify cleanup in
++ * case the new data set is incompatible, the node only contains
++ * unloaded data sets and there's not enough memory for the array.
++ */
++ loaded_info = kcalloc(num + 1, sizeof(struct gcov_info *), GFP_KERNEL);
++ if (!loaded_info) {
++ pr_warning("could not add '%s' (out of memory)\n",
++ info->filename);
++ return;
++ }
++ memcpy(loaded_info, node->loaded_info,
++ num * sizeof(struct gcov_info *));
++ loaded_info[num] = info;
++ /* Check if the new data set is compatible. */
++ if (num == 0) {
++ /*
++ * A module was unloaded, modified and reloaded. The new
++ * data set replaces the copy of the last one.
++ */
++ if (!gcov_info_is_compatible(node->unloaded_info, info)) {
++ pr_warning("discarding saved data for %s "
++ "(incompatible version)\n", info->filename);
++ gcov_info_free(node->unloaded_info);
++ node->unloaded_info = NULL;
++ }
++ } else {
++ /*
++ * Two different versions of the same object file are loaded.
++ * The initial one takes precedence.
++ */
++ if (!gcov_info_is_compatible(node->loaded_info[0], info)) {
++ pr_warning("could not add '%s' (incompatible "
++ "version)\n", info->filename);
++ kfree(loaded_info);
++ return;
++ }
+ }
+- node->info = NULL;
++ /* Overwrite previous array. */
++ kfree(node->loaded_info);
++ node->loaded_info = loaded_info;
++ node->num_loaded = num + 1;
++}
+
+- return 0;
++/*
++ * Return the index of a profiling data set associated with a node.
++ */
++static int get_info_index(struct gcov_node *node, struct gcov_info *info)
++{
++ int i;
++
++ for (i = 0; i < node->num_loaded; i++) {
++ if (node->loaded_info[i] == info)
++ return i;
++ }
++ return -ENOENT;
+ }
+
+ /*
+- * Profiling data for this node has been loaded again. Add profiling data
+- * from previous instantiation and turn this node into a regular node.
++ * Save the data of a profiling data set which is being unloaded.
+ */
+-static void revive_node(struct gcov_node *node, struct gcov_info *info)
++static void save_info(struct gcov_node *node, struct gcov_info *info)
+ {
+- if (gcov_info_is_compatible(node->ghost, info))
+- gcov_info_add(info, node->ghost);
++ if (node->unloaded_info)
++ gcov_info_add(node->unloaded_info, info);
+ else {
+- pr_warning("discarding saved data for '%s' (version changed)\n",
++ node->unloaded_info = gcov_info_dup(info);
++ if (!node->unloaded_info) {
++ pr_warning("could not save data for '%s' "
++ "(out of memory)\n", info->filename);
++ }
++ }
++}
++
++/*
++ * Disassociate a profiling data set from a node. Needs to be called with
++ * node_lock held.
++ */
++static void remove_info(struct gcov_node *node, struct gcov_info *info)
++{
++ int i;
++
++ i = get_info_index(node, info);
++ if (i < 0) {
++ pr_warning("could not remove '%s' (not found)\n",
+ info->filename);
++ return;
+ }
+- gcov_info_free(node->ghost);
+- node->ghost = NULL;
+- node->info = info;
++ if (gcov_persist)
++ save_info(node, info);
++ /* Shrink array. */
++ node->loaded_info[i] = node->loaded_info[node->num_loaded - 1];
++ node->num_loaded--;
++ if (node->num_loaded > 0)
++ return;
++ /* Last loaded data set was removed. */
++ kfree(node->loaded_info);
++ node->loaded_info = NULL;
++ node->num_loaded = 0;
++ if (!node->unloaded_info)
++ remove_node(node);
+ }
+
+ /*
+@@ -609,30 +737,18 @@ void gcov_event(enum gcov_action action, struct gcov_info *info)
+ node = get_node_by_name(info->filename);
+ switch (action) {
+ case GCOV_ADD:
+- /* Add new node or revive ghost. */
+- if (!node) {
++ if (node)
++ add_info(node, info);
++ else
+ add_node(info);
+- break;
+- }
+- if (gcov_persist)
+- revive_node(node, info);
+- else {
+- pr_warning("could not add '%s' (already exists)\n",
+- info->filename);
+- }
+ break;
+ case GCOV_REMOVE:
+- /* Remove node or turn into ghost. */
+- if (!node) {
++ if (node)
++ remove_info(node, info);
++ else {
+ pr_warning("could not remove '%s' (not found)\n",
+ info->filename);
+- break;
+ }
+- if (gcov_persist) {
+- if (!ghost_node(node))
+- break;
+- }
+- remove_node(node);
+ break;
+ }
+ mutex_unlock(&node_lock);
+diff --git a/kernel/groups.c b/kernel/groups.c
+index 2b45b2e..f0c2528 100644
+--- a/kernel/groups.c
++++ b/kernel/groups.c
+@@ -143,10 +143,9 @@ int groups_search(const struct group_info *group_info, gid_t grp)
+ right = group_info->ngroups;
+ while (left < right) {
+ unsigned int mid = (left+right)/2;
+- int cmp = grp - GROUP_AT(group_info, mid);
+- if (cmp > 0)
++ if (grp > GROUP_AT(group_info, mid))
+ left = mid + 1;
+- else if (cmp < 0)
++ else if (grp < GROUP_AT(group_info, mid))
+ right = mid;
+ else
+ return 1;
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 9990074..152214d 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -542,7 +542,6 @@ struct rq {
+ struct load_weight load;
+ unsigned long nr_load_updates;
+ u64 nr_switches;
+- u64 nr_migrations_in;
+
+ struct cfs_rq cfs;
+ struct rt_rq rt;
+@@ -943,14 +942,25 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+ /*
++ * Check whether the task is waking, we use this to synchronize ->cpus_allowed
++ * against ttwu().
++ */
++static inline int task_is_waking(struct task_struct *p)
++{
++ return unlikely(p->state == TASK_WAKING);
++}
++
++/*
+ * __task_rq_lock - lock the runqueue a given task resides on.
+ * Must be called interrupts disabled.
+ */
+ static inline struct rq *__task_rq_lock(struct task_struct *p)
+ __acquires(rq->lock)
+ {
++ struct rq *rq;
++
+ for (;;) {
+- struct rq *rq = task_rq(p);
++ rq = task_rq(p);
+ spin_lock(&rq->lock);
+ if (likely(rq == task_rq(p)))
+ return rq;
+@@ -1822,6 +1832,20 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
+ static void calc_load_account_active(struct rq *this_rq);
+ static void update_sysctl(void);
+
++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++ set_task_rq(p, cpu);
++#ifdef CONFIG_SMP
++ /*
++ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
++ * successfuly executed on another CPU. We must ensure that updates of
++ * per-task data have been completed by this moment.
++ */
++ smp_wmb();
++ task_thread_info(p)->cpu = cpu;
++#endif
++}
++
+ #include "sched_stats.h"
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+@@ -1871,13 +1895,14 @@ static void update_avg(u64 *avg, u64 sample)
+ *avg += diff >> 3;
+ }
+
+-static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ if (wakeup)
+ p->se.start_runtime = p->se.sum_exec_runtime;
+
+ sched_info_queued(p);
+- p->sched_class->enqueue_task(rq, p, wakeup);
++ p->sched_class->enqueue_task(rq, p, wakeup, head);
+ p->se.on_rq = 1;
+ }
+
+@@ -1953,7 +1978,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+ if (task_contributes_to_load(p))
+ rq->nr_uninterruptible--;
+
+- enqueue_task(rq, p, wakeup);
++ enqueue_task(rq, p, wakeup, false);
+ inc_nr_running(rq);
+ }
+
+@@ -1978,20 +2003,6 @@ inline int task_curr(const struct task_struct *p)
+ return cpu_curr(task_cpu(p)) == p;
+ }
+
+-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+-{
+- set_task_rq(p, cpu);
+-#ifdef CONFIG_SMP
+- /*
+- * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+- * successfuly executed on another CPU. We must ensure that updates of
+- * per-task data have been completed by this moment.
+- */
+- smp_wmb();
+- task_thread_info(p)->cpu = cpu;
+-#endif
+-}
+-
+ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ const struct sched_class *prev_class,
+ int oldprio, int running)
+@@ -2018,21 +2029,15 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ */
+ void kthread_bind(struct task_struct *p, unsigned int cpu)
+ {
+- struct rq *rq = cpu_rq(cpu);
+- unsigned long flags;
+-
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+ WARN_ON(1);
+ return;
+ }
+
+- spin_lock_irqsave(&rq->lock, flags);
+- set_task_cpu(p, cpu);
+ p->cpus_allowed = cpumask_of_cpu(cpu);
+ p->rt.nr_cpus_allowed = 1;
+ p->flags |= PF_THREAD_BOUND;
+- spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ EXPORT_SYMBOL(kthread_bind);
+
+@@ -2070,35 +2075,23 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ {
+ int old_cpu = task_cpu(p);
+- struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
+- struct cfs_rq *old_cfsrq = task_cfs_rq(p),
+- *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
+- u64 clock_offset;
+
+- clock_offset = old_rq->clock - new_rq->clock;
++#ifdef CONFIG_SCHED_DEBUG
++ /*
++ * We should never call set_task_cpu() on a blocked task,
++ * ttwu() will sort out the placement.
++ */
++ WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
++ !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
++#endif
+
+ trace_sched_migrate_task(p, new_cpu);
+
+-#ifdef CONFIG_SCHEDSTATS
+- if (p->se.wait_start)
+- p->se.wait_start -= clock_offset;
+- if (p->se.sleep_start)
+- p->se.sleep_start -= clock_offset;
+- if (p->se.block_start)
+- p->se.block_start -= clock_offset;
+-#endif
+ if (old_cpu != new_cpu) {
+ p->se.nr_migrations++;
+- new_rq->nr_migrations_in++;
+-#ifdef CONFIG_SCHEDSTATS
+- if (task_hot(p, old_rq->clock, NULL))
+- schedstat_inc(p, se.nr_forced2_migrations);
+-#endif
+ perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+ 1, 1, NULL, 0);
+ }
+- p->se.vruntime -= old_cfsrq->min_vruntime -
+- new_cfsrq->min_vruntime;
+
+ __set_task_cpu(p, new_cpu);
+ }
+@@ -2331,6 +2324,69 @@ void task_oncpu_function_call(struct task_struct *p,
+ preempt_enable();
+ }
+
++#ifdef CONFIG_SMP
++/*
++ * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
++ */
++static int select_fallback_rq(int cpu, struct task_struct *p)
++{
++ int dest_cpu;
++ const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
++
++ /* Look for allowed, online CPU in same node. */
++ for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
++ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
++ return dest_cpu;
++
++ /* Any allowed, online CPU? */
++ dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
++ if (dest_cpu < nr_cpu_ids)
++ return dest_cpu;
++
++ /* No more Mr. Nice Guy. */
++ if (unlikely(dest_cpu >= nr_cpu_ids)) {
++ dest_cpu = cpuset_cpus_allowed_fallback(p);
++ /*
++ * Don't tell them about moving exiting tasks or
++ * kernel threads (both mm NULL), since they never
++ * leave kernel.
++ */
++ if (p->mm && printk_ratelimit()) {
++ printk(KERN_INFO "process %d (%s) no "
++ "longer affine to cpu%d\n",
++ task_pid_nr(p), p->comm, cpu);
++ }
++ }
++
++ return dest_cpu;
++}
++
++/*
++ * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
++ */
++static inline
++int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
++{
++ int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
++
++ /*
++ * In order not to call set_task_cpu() on a blocking task we need
++ * to rely on ttwu() to place the task on a valid ->cpus_allowed
++ * cpu.
++ *
++ * Since this is common to all placement strategies, this lives here.
++ *
++ * [ this allows ->select_task() to simply return task_cpu(p) and
++ * not worry about this generic constraint ]
++ */
++ if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
++ !cpu_online(cpu)))
++ cpu = select_fallback_rq(task_cpu(p), p);
++
++ return cpu;
++}
++#endif
++
+ /***
+ * try_to_wake_up - wake up a thread
+ * @p: the to-be-woken-up thread
+@@ -2379,22 +2435,34 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
+ *
+ * First fix up the nr_uninterruptible count:
+ */
+- if (task_contributes_to_load(p))
+- rq->nr_uninterruptible--;
++ if (task_contributes_to_load(p)) {
++ if (likely(cpu_online(orig_cpu)))
++ rq->nr_uninterruptible--;
++ else
++ this_rq()->nr_uninterruptible--;
++ }
+ p->state = TASK_WAKING;
+- task_rq_unlock(rq, &flags);
+
+- cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
++ if (p->sched_class->task_waking)
++ p->sched_class->task_waking(rq, p);
++
++ cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
+ if (cpu != orig_cpu)
+ set_task_cpu(p, cpu);
++ __task_rq_unlock(rq);
+
+- rq = task_rq_lock(p, &flags);
+-
+- if (rq != orig_rq)
+- update_rq_clock(rq);
++ rq = cpu_rq(cpu);
++ spin_lock(&rq->lock);
++ update_rq_clock(rq);
+
++ /*
++ * We migrated the task without holding either rq->lock, however
++ * since the task is not on the task list itself, nobody else
++ * will try and migrate the task, hence the rq should match the
++ * cpu we just moved it to.
++ */
++ WARN_ON(task_cpu(p) != cpu);
+ WARN_ON(p->state != TASK_WAKING);
+- cpu = task_cpu(p);
+
+ #ifdef CONFIG_SCHEDSTATS
+ schedstat_inc(rq, ttwu_count);
+@@ -2447,8 +2515,8 @@ out_running:
+
+ p->state = TASK_RUNNING;
+ #ifdef CONFIG_SMP
+- if (p->sched_class->task_wake_up)
+- p->sched_class->task_wake_up(rq, p);
++ if (p->sched_class->task_woken)
++ p->sched_class->task_woken(rq, p);
+
+ if (unlikely(rq->idle_stamp)) {
+ u64 delta = rq->clock - rq->idle_stamp;
+@@ -2528,7 +2596,6 @@ static void __sched_fork(struct task_struct *p)
+ p->se.nr_failed_migrations_running = 0;
+ p->se.nr_failed_migrations_hot = 0;
+ p->se.nr_forced_migrations = 0;
+- p->se.nr_forced2_migrations = 0;
+
+ p->se.nr_wakeups = 0;
+ p->se.nr_wakeups_sync = 0;
+@@ -2549,14 +2616,6 @@ static void __sched_fork(struct task_struct *p)
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ INIT_HLIST_HEAD(&p->preempt_notifiers);
+ #endif
+-
+- /*
+- * We mark the process as running here, but have not actually
+- * inserted it onto the runqueue yet. This guarantees that
+- * nobody will actually run it, and a signal or other external
+- * event cannot wake it up and insert it on the runqueue either.
+- */
+- p->state = TASK_RUNNING;
+ }
+
+ /*
+@@ -2567,6 +2626,12 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ int cpu = get_cpu();
+
+ __sched_fork(p);
++ /*
++ * We mark the process as running here. This guarantees that
++ * nobody will actually run it, and a signal or other external
++ * event cannot wake it up and insert it on the runqueue either.
++ */
++ p->state = TASK_RUNNING;
+
+ /*
+ * Revert to default priority/policy on fork if requested.
+@@ -2598,9 +2663,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ if (!rt_prio(p->prio))
+ p->sched_class = &fair_sched_class;
+
+-#ifdef CONFIG_SMP
+- cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
+-#endif
++ if (p->sched_class->task_fork)
++ p->sched_class->task_fork(p);
++
+ set_task_cpu(p, cpu);
+
+ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+@@ -2630,28 +2695,38 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+ {
+ unsigned long flags;
+ struct rq *rq;
++ int cpu = get_cpu();
+
++#ifdef CONFIG_SMP
+ rq = task_rq_lock(p, &flags);
+- BUG_ON(p->state != TASK_RUNNING);
+- update_rq_clock(rq);
++ p->state = TASK_WAKING;
+
+- if (!p->sched_class->task_new || !current->se.on_rq) {
+- activate_task(rq, p, 0);
+- } else {
+- /*
+- * Let the scheduling class do new task startup
+- * management (if any):
+- */
+- p->sched_class->task_new(rq, p);
+- inc_nr_running(rq);
+- }
++ /*
++ * Fork balancing, do it here and not earlier because:
++ * - cpus_allowed can change in the fork path
++ * - any previously selected cpu might disappear through hotplug
++ *
++ * We set TASK_WAKING so that select_task_rq() can drop rq->lock
++ * without people poking at ->cpus_allowed.
++ */
++ cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
++ set_task_cpu(p, cpu);
++
++ p->state = TASK_RUNNING;
++ task_rq_unlock(rq, &flags);
++#endif
++
++ rq = task_rq_lock(p, &flags);
++ update_rq_clock(rq);
++ activate_task(rq, p, 0);
+ trace_sched_wakeup_new(rq, p, 1);
+ check_preempt_curr(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+- if (p->sched_class->task_wake_up)
+- p->sched_class->task_wake_up(rq, p);
++ if (p->sched_class->task_woken)
++ p->sched_class->task_woken(rq, p);
+ #endif
+ task_rq_unlock(rq, &flags);
++ put_cpu();
+ }
+
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+@@ -3038,15 +3113,6 @@ static void calc_load_account_active(struct rq *this_rq)
+ }
+
+ /*
+- * Externally visible per-cpu scheduler statistics:
+- * cpu_nr_migrations(cpu) - number of migrations into that cpu
+- */
+-u64 cpu_nr_migrations(int cpu)
+-{
+- return cpu_rq(cpu)->nr_migrations_in;
+-}
+-
+-/*
+ * Update rq->cpu_load[] statistics. This function is usually called every
+ * scheduler tick (TICK_NSEC).
+ */
+@@ -3128,24 +3194,28 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ }
+
+ /*
+- * If dest_cpu is allowed for this process, migrate the task to it.
+- * This is accomplished by forcing the cpu_allowed mask to only
+- * allow dest_cpu, which will force the cpu onto dest_cpu. Then
+- * the cpu_allowed mask is restored.
++ * sched_exec - execve() is a valuable balancing opportunity, because at
++ * this point the task has the smallest effective memory and cache footprint.
+ */
+-static void sched_migrate_task(struct task_struct *p, int dest_cpu)
++void sched_exec(void)
+ {
++ struct task_struct *p = current;
+ struct migration_req req;
+ unsigned long flags;
+ struct rq *rq;
++ int dest_cpu;
+
+ rq = task_rq_lock(p, &flags);
+- if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
+- || unlikely(!cpu_active(dest_cpu)))
+- goto out;
++ dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
++ if (dest_cpu == smp_processor_id())
++ goto unlock;
+
+- /* force the process onto the specified CPU */
+- if (migrate_task(p, dest_cpu, &req)) {
++ /*
++ * select_task_rq() can race against ->cpus_allowed
++ */
++ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
++ likely(cpu_active(dest_cpu)) &&
++ migrate_task(p, dest_cpu, &req)) {
+ /* Need to wait for migration thread (might exit: take ref). */
+ struct task_struct *mt = rq->migration_thread;
+
+@@ -3157,24 +3227,11 @@ static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+
+ return;
+ }
+-out:
++unlock:
+ task_rq_unlock(rq, &flags);
+ }
+
+ /*
+- * sched_exec - execve() is a valuable balancing opportunity, because at
+- * this point the task has the smallest effective memory and cache footprint.
+- */
+-void sched_exec(void)
+-{
+- int new_cpu, this_cpu = get_cpu();
+- new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
+- put_cpu();
+- if (new_cpu != this_cpu)
+- sched_migrate_task(current, new_cpu);
+-}
+-
+-/*
+ * pull_task - move a task from a remote runqueue to the local runqueue.
+ * Both runqueues must be locked.
+ */
+@@ -3621,7 +3678,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+
+ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+ {
+- unsigned long weight = cpumask_weight(sched_domain_span(sd));
++ unsigned long weight = sd->span_weight;
+ unsigned long smt_gain = sd->smt_gain;
+
+ smt_gain /= weight;
+@@ -3654,7 +3711,7 @@ unsigned long scale_rt_power(int cpu)
+
+ static void update_cpu_power(struct sched_domain *sd, int cpu)
+ {
+- unsigned long weight = cpumask_weight(sched_domain_span(sd));
++ unsigned long weight = sd->span_weight;
+ unsigned long power = SCHED_LOAD_SCALE;
+ struct sched_group *sdg = sd->groups;
+
+@@ -5974,14 +6031,15 @@ EXPORT_SYMBOL(wait_for_completion_killable);
+ */
+ bool try_wait_for_completion(struct completion *x)
+ {
++ unsigned long flags;
+ int ret = 1;
+
+- spin_lock_irq(&x->wait.lock);
++ spin_lock_irqsave(&x->wait.lock, flags);
+ if (!x->done)
+ ret = 0;
+ else
+ x->done--;
+- spin_unlock_irq(&x->wait.lock);
++ spin_unlock_irqrestore(&x->wait.lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL(try_wait_for_completion);
+@@ -5996,12 +6054,13 @@ EXPORT_SYMBOL(try_wait_for_completion);
+ */
+ bool completion_done(struct completion *x)
+ {
++ unsigned long flags;
+ int ret = 1;
+
+- spin_lock_irq(&x->wait.lock);
++ spin_lock_irqsave(&x->wait.lock, flags);
+ if (!x->done)
+ ret = 0;
+- spin_unlock_irq(&x->wait.lock);
++ spin_unlock_irqrestore(&x->wait.lock, flags);
+ return ret;
+ }
+ EXPORT_SYMBOL(completion_done);
+@@ -6095,7 +6154,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
+ if (running)
+ p->sched_class->set_curr_task(rq);
+ if (on_rq) {
+- enqueue_task(rq, p, 0);
++ enqueue_task(rq, p, 0, oldprio < prio);
+
+ check_class_changed(rq, p, prev_class, oldprio, running);
+ }
+@@ -6139,7 +6198,7 @@ void set_user_nice(struct task_struct *p, long nice)
+ delta = p->prio - old_prio;
+
+ if (on_rq) {
+- enqueue_task(rq, p, 0);
++ enqueue_task(rq, p, 0, false);
+ /*
+ * If the task increased its priority or is running and
+ * lowered its priority, then reschedule its CPU:
+@@ -6530,7 +6589,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ return -EINVAL;
+
+ retval = -ESRCH;
+- read_lock(&tasklist_lock);
++ rcu_read_lock();
+ p = find_process_by_pid(pid);
+ if (p) {
+ retval = security_task_getscheduler(p);
+@@ -6538,7 +6597,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ retval = p->policy
+ | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
+ }
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+ return retval;
+ }
+
+@@ -6556,7 +6615,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ if (!param || pid < 0)
+ return -EINVAL;
+
+- read_lock(&tasklist_lock);
++ rcu_read_lock();
+ p = find_process_by_pid(pid);
+ retval = -ESRCH;
+ if (!p)
+@@ -6567,7 +6626,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ goto out_unlock;
+
+ lp.sched_priority = p->rt_priority;
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+
+ /*
+ * This one might sleep, we cannot do it with a spinlock held ...
+@@ -6577,7 +6636,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ return retval;
+
+ out_unlock:
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+ return retval;
+ }
+
+@@ -6588,22 +6647,18 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+ int retval;
+
+ get_online_cpus();
+- read_lock(&tasklist_lock);
++ rcu_read_lock();
+
+ p = find_process_by_pid(pid);
+ if (!p) {
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+ put_online_cpus();
+ return -ESRCH;
+ }
+
+- /*
+- * It is not safe to call set_cpus_allowed with the
+- * tasklist_lock held. We will bump the task_struct's
+- * usage count and then drop tasklist_lock.
+- */
++ /* Prevent p going away */
+ get_task_struct(p);
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+
+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+ retval = -ENOMEM;
+@@ -6684,10 +6739,12 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ {
+ struct task_struct *p;
++ unsigned long flags;
++ struct rq *rq;
+ int retval;
+
+ get_online_cpus();
+- read_lock(&tasklist_lock);
++ rcu_read_lock();
+
+ retval = -ESRCH;
+ p = find_process_by_pid(pid);
+@@ -6698,10 +6755,12 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ if (retval)
+ goto out_unlock;
+
++ rq = task_rq_lock(p, &flags);
+ cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
++ task_rq_unlock(rq, &flags);
+
+ out_unlock:
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+ put_online_cpus();
+
+ return retval;
+@@ -6940,6 +6999,8 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ {
+ struct task_struct *p;
+ unsigned int time_slice;
++ unsigned long flags;
++ struct rq *rq;
+ int retval;
+ struct timespec t;
+
+@@ -6947,7 +7008,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ return -EINVAL;
+
+ retval = -ESRCH;
+- read_lock(&tasklist_lock);
++ rcu_read_lock();
+ p = find_process_by_pid(pid);
+ if (!p)
+ goto out_unlock;
+@@ -6956,15 +7017,17 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ if (retval)
+ goto out_unlock;
+
+- time_slice = p->sched_class->get_rr_interval(p);
++ rq = task_rq_lock(p, &flags);
++ time_slice = p->sched_class->get_rr_interval(rq, p);
++ task_rq_unlock(rq, &flags);
+
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+ jiffies_to_timespec(time_slice, &t);
+ retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
+ return retval;
+
+ out_unlock:
+- read_unlock(&tasklist_lock);
++ rcu_read_unlock();
+ return retval;
+ }
+
+@@ -7055,6 +7118,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
+ spin_lock_irqsave(&rq->lock, flags);
+
+ __sched_fork(idle);
++ idle->state = TASK_RUNNING;
+ idle->se.exec_start = sched_clock();
+
+ cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+@@ -7149,7 +7213,19 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ struct rq *rq;
+ int ret = 0;
+
++ /*
++ * Serialize against TASK_WAKING so that ttwu() and wunt() can
++ * drop the rq->lock and still rely on ->cpus_allowed.
++ */
++again:
++ while (task_is_waking(p))
++ cpu_relax();
+ rq = task_rq_lock(p, &flags);
++ if (task_is_waking(p)) {
++ task_rq_unlock(rq, &flags);
++ goto again;
++ }
++
+ if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+ ret = -EINVAL;
+ goto out;
+@@ -7178,7 +7254,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+
+ get_task_struct(mt);
+ task_rq_unlock(rq, &flags);
+- wake_up_process(rq->migration_thread);
++ wake_up_process(mt);
+ put_task_struct(mt);
+ wait_for_completion(&req.done);
+ tlb_migrate_finish(p->mm);
+@@ -7205,7 +7281,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ {
+ struct rq *rq_dest, *rq_src;
+- int ret = 0, on_rq;
++ int ret = 0;
+
+ if (unlikely(!cpu_active(dest_cpu)))
+ return ret;
+@@ -7217,19 +7293,17 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ /* Already moved. */
+ if (task_cpu(p) != src_cpu)
+ goto done;
+- /* Waking up, don't get in the way of try_to_wake_up(). */
+- if (p->state == TASK_WAKING)
+- goto fail;
+ /* Affinity changed (again). */
+ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ goto fail;
+
+- on_rq = p->se.on_rq;
+- if (on_rq)
++ /*
++ * If we're not on a rq, the next wake-up will ensure we're
++ * placed properly.
++ */
++ if (p->se.on_rq) {
+ deactivate_task(rq_src, p, 0);
+-
+- set_task_cpu(p, dest_cpu);
+- if (on_rq) {
++ set_task_cpu(p, dest_cpu);
+ activate_task(rq_dest, p, 0);
+ check_preempt_curr(rq_dest, p, 0);
+ }
+@@ -7308,57 +7382,29 @@ static int migration_thread(void *data)
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
+-
+-static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
+-{
+- int ret;
+-
+- local_irq_disable();
+- ret = __migrate_task(p, src_cpu, dest_cpu);
+- local_irq_enable();
+- return ret;
+-}
+-
+ /*
+ * Figure out where task on dead CPU should go, use force if necessary.
+ */
+-static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
++void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+- int dest_cpu;
+- const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
+-
+-again:
+- /* Look for allowed, online CPU in same node. */
+- for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+- goto move;
+-
+- /* Any allowed, online CPU? */
+- dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+- if (dest_cpu < nr_cpu_ids)
+- goto move;
+-
+- /* No more Mr. Nice Guy. */
+- if (dest_cpu >= nr_cpu_ids) {
+- cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
+- dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
++ struct rq *rq = cpu_rq(dead_cpu);
++ int needs_cpu, uninitialized_var(dest_cpu);
++ unsigned long flags;
+
+- /*
+- * Don't tell them about moving exiting tasks or
+- * kernel threads (both mm NULL), since they never
+- * leave kernel.
+- */
+- if (p->mm && printk_ratelimit()) {
+- printk(KERN_INFO "process %d (%s) no "
+- "longer affine to cpu%d\n",
+- task_pid_nr(p), p->comm, dead_cpu);
+- }
+- }
++ local_irq_save(flags);
+
+-move:
+- /* It can have affinity changed while we were choosing. */
+- if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
+- goto again;
++ spin_lock(&rq->lock);
++ needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
++ if (needs_cpu)
++ dest_cpu = select_fallback_rq(dead_cpu, p);
++ spin_unlock(&rq->lock);
++ /*
++ * It can only fail if we race with set_cpus_allowed(),
++ * in the racer should migrate the task anyway.
++ */
++ if (needs_cpu)
++ __migrate_task(p, dead_cpu, dest_cpu);
++ local_irq_restore(flags);
+ }
+
+ /*
+@@ -7752,14 +7798,23 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ cpu_rq(cpu)->migration_thread = NULL;
+ break;
+
+- case CPU_DEAD:
+- case CPU_DEAD_FROZEN:
+- cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
+- migrate_live_tasks(cpu);
++ case CPU_POST_DEAD:
++ /*
++ * Bring the migration thread down in CPU_POST_DEAD event,
++ * since the timers should have got migrated by now and thus
++ * we should not see a deadlock between trying to kill the
++ * migration thread and the sched_rt_period_timer.
++ */
+ rq = cpu_rq(cpu);
+ kthread_stop(rq->migration_thread);
+ put_task_struct(rq->migration_thread);
+ rq->migration_thread = NULL;
++ break;
++
++ case CPU_DEAD:
++ case CPU_DEAD_FROZEN:
++ migrate_live_tasks(cpu);
++ rq = cpu_rq(cpu);
+ /* Idle task back to normal (off runqueue, low prio) */
+ spin_lock_irq(&rq->lock);
+ update_rq_clock(rq);
+@@ -7768,7 +7823,6 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ rq->idle->sched_class = &idle_sched_class;
+ migrate_dead_tasks(cpu);
+ spin_unlock_irq(&rq->lock);
+- cpuset_unlock();
+ migrate_nr_uninterruptible(rq);
+ BUG_ON(rq->nr_running != 0);
+ calc_global_load_remove(rq);
+@@ -8112,6 +8166,9 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
+ struct rq *rq = cpu_rq(cpu);
+ struct sched_domain *tmp;
+
++ for (tmp = sd; tmp; tmp = tmp->parent)
++ tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
++
+ /* Remove the sched domains which do not contribute to scheduling. */
+ for (tmp = sd; tmp; ) {
+ struct sched_domain *parent = tmp->parent;
+@@ -10099,13 +10156,13 @@ void sched_move_task(struct task_struct *tsk)
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ if (tsk->sched_class->moved_group)
+- tsk->sched_class->moved_group(tsk);
++ tsk->sched_class->moved_group(tsk, on_rq);
+ #endif
+
+ if (unlikely(running))
+ tsk->sched_class->set_curr_task(rq);
+ if (on_rq)
+- enqueue_task(rq, tsk, 0);
++ enqueue_task(rq, tsk, 0, false);
+
+ task_rq_unlock(rq, &flags);
+ }
+@@ -10877,12 +10934,30 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
+ }
+
+ /*
++ * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
++ * in cputime_t units. As a result, cpuacct_update_stats calls
++ * percpu_counter_add with values large enough to always overflow the
++ * per cpu batch limit causing bad SMP scalability.
++ *
++ * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
++ * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
++ * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
++ */
++#ifdef CONFIG_SMP
++#define CPUACCT_BATCH \
++ min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
++#else
++#define CPUACCT_BATCH 0
++#endif
++
++/*
+ * Charge the system/user time to the task's accounting group.
+ */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val)
+ {
+ struct cpuacct *ca;
++ int batch = CPUACCT_BATCH;
+
+ if (unlikely(!cpuacct_subsys.active))
+ return;
+@@ -10891,7 +10966,7 @@ static void cpuacct_update_stats(struct task_struct *tsk,
+ ca = task_ca(tsk);
+
+ do {
+- percpu_counter_add(&ca->cpustat[idx], val);
++ __percpu_counter_add(&ca->cpustat[idx], val, batch);
+ ca = ca->parent;
+ } while (ca);
+ rcu_read_unlock();
+diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
+index 6988cf0..6f836a8 100644
+--- a/kernel/sched_debug.c
++++ b/kernel/sched_debug.c
+@@ -423,7 +423,6 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+ P(se.nr_failed_migrations_running);
+ P(se.nr_failed_migrations_hot);
+ P(se.nr_forced_migrations);
+- P(se.nr_forced2_migrations);
+ P(se.nr_wakeups);
+ P(se.nr_wakeups_sync);
+ P(se.nr_wakeups_migrate);
+@@ -499,7 +498,6 @@ void proc_sched_set_task(struct task_struct *p)
+ p->se.nr_failed_migrations_running = 0;
+ p->se.nr_failed_migrations_hot = 0;
+ p->se.nr_forced_migrations = 0;
+- p->se.nr_forced2_migrations = 0;
+ p->se.nr_wakeups = 0;
+ p->se.nr_wakeups_sync = 0;
+ p->se.nr_wakeups_migrate = 0;
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index d80812d..01e311e 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -488,6 +488,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
+ curr->sum_exec_runtime += delta_exec;
+ schedstat_add(cfs_rq, exec_clock, delta_exec);
+ delta_exec_weighted = calc_delta_fair(delta_exec, curr);
++
+ curr->vruntime += delta_exec_weighted;
+ update_min_vruntime(cfs_rq);
+ }
+@@ -743,16 +744,26 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ se->vruntime = vruntime;
+ }
+
++#define ENQUEUE_WAKEUP 1
++#define ENQUEUE_MIGRATE 2
++
+ static void
+-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
++enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
+ {
+ /*
++ * Update the normalized vruntime before updating min_vruntime
++ * through callig update_curr().
++ */
++ if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
++ se->vruntime += cfs_rq->min_vruntime;
++
++ /*
+ * Update run-time statistics of the 'current'.
+ */
+ update_curr(cfs_rq);
+ account_entity_enqueue(cfs_rq, se);
+
+- if (wakeup) {
++ if (flags & ENQUEUE_WAKEUP) {
+ place_entity(cfs_rq, se, 0);
+ enqueue_sleeper(cfs_rq, se);
+ }
+@@ -806,6 +817,14 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
+ __dequeue_entity(cfs_rq, se);
+ account_entity_dequeue(cfs_rq, se);
+ update_min_vruntime(cfs_rq);
++
++ /*
++ * Normalize the entity after updating the min_vruntime because the
++ * update can refer to the ->curr item and we need to reflect this
++ * movement in our normalized position.
++ */
++ if (!sleep)
++ se->vruntime -= cfs_rq->min_vruntime;
+ }
+
+ /*
+@@ -1012,17 +1031,24 @@ static inline void hrtick_update(struct rq *rq)
+ * increased. Here we update the fair scheduling stats and
+ * then put the task into the rbtree:
+ */
+-static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &p->se;
++ int flags = 0;
++
++ if (wakeup)
++ flags |= ENQUEUE_WAKEUP;
++ if (p->state == TASK_WAKING)
++ flags |= ENQUEUE_MIGRATE;
+
+ for_each_sched_entity(se) {
+ if (se->on_rq)
+ break;
+ cfs_rq = cfs_rq_of(se);
+- enqueue_entity(cfs_rq, se, wakeup);
+- wakeup = 1;
++ enqueue_entity(cfs_rq, se, flags);
++ flags = ENQUEUE_WAKEUP;
+ }
+
+ hrtick_update(rq);
+@@ -1098,6 +1124,14 @@ static void yield_task_fair(struct rq *rq)
+
+ #ifdef CONFIG_SMP
+
++static void task_waking_fair(struct rq *rq, struct task_struct *p)
++{
++ struct sched_entity *se = &p->se;
++ struct cfs_rq *cfs_rq = cfs_rq_of(se);
++
++ se->vruntime -= cfs_rq->min_vruntime;
++}
++
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ /*
+ * effective_load() calculates the load change as seen from the root_task_group
+@@ -1216,6 +1250,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ * effect of the currently running task from the load
+ * of the current CPU:
+ */
++ rcu_read_lock();
+ if (sync) {
+ tg = task_group(current);
+ weight = current->se.load.weight;
+@@ -1241,6 +1276,7 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ balanced = !this_load ||
+ 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
+ imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
++ rcu_read_unlock();
+
+ /*
+ * If the currently running task will sleep within
+@@ -1348,6 +1384,56 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+ }
+
+ /*
++ * Try and locate an idle CPU in the sched_domain.
++ */
++static int select_idle_sibling(struct task_struct *p, int target)
++{
++ int cpu = smp_processor_id();
++ int prev_cpu = task_cpu(p);
++ struct sched_domain *sd;
++ int i;
++
++ /*
++ * If the task is going to be woken-up on this cpu and if it is
++ * already idle, then it is the right target.
++ */
++ if (target == cpu && idle_cpu(cpu))
++ return cpu;
++
++ /*
++ * If the task is going to be woken-up on the cpu where it previously
++ * ran and if it is currently idle, then it the right target.
++ */
++ if (target == prev_cpu && idle_cpu(prev_cpu))
++ return prev_cpu;
++
++ /*
++ * Otherwise, iterate the domains and find an elegible idle cpu.
++ */
++ for_each_domain(target, sd) {
++ if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
++ break;
++
++ for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
++ if (idle_cpu(i)) {
++ target = i;
++ break;
++ }
++ }
++
++ /*
++ * Lets stop looking for an idle sibling when we reached
++ * the domain that spans the current cpu and prev_cpu.
++ */
++ if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
++ cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
++ break;
++ }
++
++ return target;
++}
++
++/*
+ * sched_balance_self: balance the current task (running on cpu) in domains
+ * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
+ * SD_BALANCE_EXEC.
+@@ -1358,7 +1444,8 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+ *
+ * preempt must be disabled.
+ */
+-static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
++static int
++select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
+ {
+ struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+ int cpu = smp_processor_id();
+@@ -1375,7 +1462,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ new_cpu = prev_cpu;
+ }
+
+- rcu_read_lock();
+ for_each_domain(cpu, tmp) {
+ if (!(tmp->flags & SD_LOAD_BALANCE))
+ continue;
+@@ -1404,38 +1490,14 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ want_sd = 0;
+ }
+
+- if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
+- int candidate = -1, i;
+-
+- if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
+- candidate = cpu;
+-
+- /*
+- * Check for an idle shared cache.
+- */
+- if (tmp->flags & SD_PREFER_SIBLING) {
+- if (candidate == cpu) {
+- if (!cpu_rq(prev_cpu)->cfs.nr_running)
+- candidate = prev_cpu;
+- }
+-
+- if (candidate == -1 || candidate == cpu) {
+- for_each_cpu(i, sched_domain_span(tmp)) {
+- if (!cpumask_test_cpu(i, &p->cpus_allowed))
+- continue;
+- if (!cpu_rq(i)->cfs.nr_running) {
+- candidate = i;
+- break;
+- }
+- }
+- }
+- }
+-
+- if (candidate >= 0) {
+- affine_sd = tmp;
+- want_affine = 0;
+- cpu = candidate;
+- }
++ /*
++ * If both cpu and prev_cpu are part of this domain,
++ * cpu is a valid SD_WAKE_AFFINE target.
++ */
++ if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
++ cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
++ affine_sd = tmp;
++ want_affine = 0;
+ }
+
+ if (!want_sd && !want_affine)
+@@ -1448,23 +1510,28 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ sd = tmp;
+ }
+
++#ifdef CONFIG_FAIR_GROUP_SCHED
+ if (sched_feat(LB_SHARES_UPDATE)) {
+ /*
+ * Pick the largest domain to update shares over
+ */
+ tmp = sd;
+- if (affine_sd && (!tmp ||
+- cpumask_weight(sched_domain_span(affine_sd)) >
+- cpumask_weight(sched_domain_span(sd))))
++ if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
+ tmp = affine_sd;
+
+- if (tmp)
++ if (tmp) {
++ spin_unlock(&rq->lock);
+ update_shares(tmp);
++ spin_lock(&rq->lock);
++ }
+ }
++#endif
+
+- if (affine_sd && wake_affine(affine_sd, p, sync)) {
+- new_cpu = cpu;
+- goto out;
++ if (affine_sd) {
++ if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
++ return select_idle_sibling(p, cpu);
++ else
++ return select_idle_sibling(p, prev_cpu);
+ }
+
+ while (sd) {
+@@ -1495,10 +1562,10 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+
+ /* Now try balancing at a lower domain level of new_cpu */
+ cpu = new_cpu;
+- weight = cpumask_weight(sched_domain_span(sd));
++ weight = sd->span_weight;
+ sd = NULL;
+ for_each_domain(cpu, tmp) {
+- if (weight <= cpumask_weight(sched_domain_span(tmp)))
++ if (weight <= tmp->span_weight)
+ break;
+ if (tmp->flags & sd_flag)
+ sd = tmp;
+@@ -1506,8 +1573,6 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
+ /* while loop will break here if sd == NULL */
+ }
+
+-out:
+- rcu_read_unlock();
+ return new_cpu;
+ }
+ #endif /* CONFIG_SMP */
+@@ -1911,28 +1976,32 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ }
+
+ /*
+- * Share the fairness runtime between parent and child, thus the
+- * total amount of pressure for CPU stays equal - new tasks
+- * get a chance to run but frequent forkers are not allowed to
+- * monopolize the CPU. Note: the parent runqueue is locked,
+- * the child is not running yet.
++ * called on fork with the child task as argument from the parent's context
++ * - child not yet on the tasklist
++ * - preemption disabled
+ */
+-static void task_new_fair(struct rq *rq, struct task_struct *p)
++static void task_fork_fair(struct task_struct *p)
+ {
+- struct cfs_rq *cfs_rq = task_cfs_rq(p);
++ struct cfs_rq *cfs_rq = task_cfs_rq(current);
+ struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+ int this_cpu = smp_processor_id();
++ struct rq *rq = this_rq();
++ unsigned long flags;
++
++ spin_lock_irqsave(&rq->lock, flags);
++
++ update_rq_clock(rq);
+
+- sched_info_queued(p);
++ if (unlikely(task_cpu(p) != this_cpu))
++ __set_task_cpu(p, this_cpu);
+
+ update_curr(cfs_rq);
++
+ if (curr)
+ se->vruntime = curr->vruntime;
+ place_entity(cfs_rq, se, 1);
+
+- /* 'curr' will be NULL if the child belongs to a different group */
+- if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
+- curr && entity_before(curr, se)) {
++ if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
+ /*
+ * Upon rescheduling, sched_class::put_prev_task() will place
+ * 'current' within the tree based on its new key value.
+@@ -1941,7 +2010,9 @@ static void task_new_fair(struct rq *rq, struct task_struct *p)
+ resched_task(rq->curr);
+ }
+
+- enqueue_task_fair(rq, p, 0);
++ se->vruntime -= cfs_rq->min_vruntime;
++
++ spin_unlock_irqrestore(&rq->lock, flags);
+ }
+
+ /*
+@@ -1994,30 +2065,27 @@ static void set_curr_task_fair(struct rq *rq)
+ }
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-static void moved_group_fair(struct task_struct *p)
++static void moved_group_fair(struct task_struct *p, int on_rq)
+ {
+ struct cfs_rq *cfs_rq = task_cfs_rq(p);
+
+ update_curr(cfs_rq);
+- place_entity(cfs_rq, &p->se, 1);
++ if (!on_rq)
++ place_entity(cfs_rq, &p->se, 1);
+ }
+ #endif
+
+-unsigned int get_rr_interval_fair(struct task_struct *task)
++unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
+ {
+ struct sched_entity *se = &task->se;
+- unsigned long flags;
+- struct rq *rq;
+ unsigned int rr_interval = 0;
+
+ /*
+ * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+ * idle runqueue:
+ */
+- rq = task_rq_lock(task, &flags);
+ if (rq->cfs.load.weight)
+ rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
+- task_rq_unlock(rq, &flags);
+
+ return rr_interval;
+ }
+@@ -2043,11 +2111,13 @@ static const struct sched_class fair_sched_class = {
+ .move_one_task = move_one_task_fair,
+ .rq_online = rq_online_fair,
+ .rq_offline = rq_offline_fair,
++
++ .task_waking = task_waking_fair,
+ #endif
+
+ .set_curr_task = set_curr_task_fair,
+ .task_tick = task_tick_fair,
+- .task_new = task_new_fair,
++ .task_fork = task_fork_fair,
+
+ .prio_changed = prio_changed_fair,
+ .switched_to = switched_to_fair,
+diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
+index b133a28..93ad2e7 100644
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -6,7 +6,8 @@
+ */
+
+ #ifdef CONFIG_SMP
+-static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+ return task_cpu(p); /* IDLE tasks as never migrated */
+ }
+@@ -97,7 +98,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
+ check_preempt_curr(rq, p, 0);
+ }
+
+-unsigned int get_rr_interval_idle(struct task_struct *task)
++unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
+ {
+ return 0;
+ }
+diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
+index a4d790c..af24fab 100644
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -194,7 +194,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+ return rt_se->my_q;
+ }
+
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+
+ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+@@ -204,7 +204,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+
+ if (rt_rq->rt_nr_running) {
+ if (rt_se && !on_rt_rq(rt_se))
+- enqueue_rt_entity(rt_se);
++ enqueue_rt_entity(rt_se, false);
+ if (rt_rq->highest_prio.curr < curr->prio)
+ resched_task(curr);
+ }
+@@ -803,7 +803,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+ dec_rt_group(rt_se, rt_rq);
+ }
+
+-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+ struct rt_prio_array *array = &rt_rq->active;
+@@ -819,7 +819,10 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
+ if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+ return;
+
+- list_add_tail(&rt_se->run_list, queue);
++ if (head)
++ list_add(&rt_se->run_list, queue);
++ else
++ list_add_tail(&rt_se->run_list, queue);
+ __set_bit(rt_se_prio(rt_se), array->bitmap);
+
+ inc_rt_tasks(rt_se, rt_rq);
+@@ -856,11 +859,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
+ }
+ }
+
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ {
+ dequeue_rt_stack(rt_se);
+ for_each_sched_rt_entity(rt_se)
+- __enqueue_rt_entity(rt_se);
++ __enqueue_rt_entity(rt_se, head);
+ }
+
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+@@ -871,21 +874,22 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+ struct rt_rq *rt_rq = group_rt_rq(rt_se);
+
+ if (rt_rq && rt_rq->rt_nr_running)
+- __enqueue_rt_entity(rt_se);
++ __enqueue_rt_entity(rt_se, false);
+ }
+ }
+
+ /*
+ * Adding/removing a task to/from a priority array:
+ */
+-static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
++static void
++enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
+ {
+ struct sched_rt_entity *rt_se = &p->rt;
+
+ if (wakeup)
+ rt_se->timeout = 0;
+
+- enqueue_rt_entity(rt_se);
++ enqueue_rt_entity(rt_se, head);
+
+ if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+ enqueue_pushable_task(rq, p);
+@@ -938,10 +942,9 @@ static void yield_task_rt(struct rq *rq)
+ #ifdef CONFIG_SMP
+ static int find_lowest_rq(struct task_struct *task);
+
+-static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
++static int
++select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
+ {
+- struct rq *rq = task_rq(p);
+-
+ if (sd_flag != SD_BALANCE_WAKE)
+ return smp_processor_id();
+
+@@ -1485,7 +1488,7 @@ static void post_schedule_rt(struct rq *rq)
+ * If we are not running and we are not going to reschedule soon, we should
+ * try to push tasks away now
+ */
+-static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
++static void task_woken_rt(struct rq *rq, struct task_struct *p)
+ {
+ if (!task_running(rq, p) &&
+ !test_tsk_need_resched(rq->curr) &&
+@@ -1734,7 +1737,7 @@ static void set_curr_task_rt(struct rq *rq)
+ dequeue_pushable_task(rq, p);
+ }
+
+-unsigned int get_rr_interval_rt(struct task_struct *task)
++unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
+ {
+ /*
+ * Time slice is 0 for SCHED_FIFO tasks
+@@ -1766,7 +1769,7 @@ static const struct sched_class rt_sched_class = {
+ .rq_offline = rq_offline_rt,
+ .pre_schedule = pre_schedule_rt,
+ .post_schedule = post_schedule_rt,
+- .task_wake_up = task_wake_up_rt,
++ .task_woken = task_woken_rt,
+ .switched_from = switched_from_rt,
+ #endif
+
+diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
+index 0cccb6c..22cf21e 100644
+--- a/kernel/trace/ftrace.c
++++ b/kernel/trace/ftrace.c
+@@ -369,11 +369,18 @@ static int function_stat_show(struct seq_file *m, void *v)
+ {
+ struct ftrace_profile *rec = v;
+ char str[KSYM_SYMBOL_LEN];
++ int ret = 0;
+ #ifdef CONFIG_FUNCTION_GRAPH_TRACER
+- static DEFINE_MUTEX(mutex);
+ static struct trace_seq s;
+ unsigned long long avg;
+ #endif
++ mutex_lock(&ftrace_profile_lock);
++
++ /* we raced with function_profile_reset() */
++ if (unlikely(rec->counter == 0)) {
++ ret = -EBUSY;
++ goto out;
++ }
+
+ kallsyms_lookup(rec->ip, NULL, NULL, NULL, str);
+ seq_printf(m, " %-30.30s %10lu", str, rec->counter);
+@@ -383,17 +390,17 @@ static int function_stat_show(struct seq_file *m, void *v)
+ avg = rec->time;
+ do_div(avg, rec->counter);
+
+- mutex_lock(&mutex);
+ trace_seq_init(&s);
+ trace_print_graph_duration(rec->time, &s);
+ trace_seq_puts(&s, " ");
+ trace_print_graph_duration(avg, &s);
+ trace_print_seq(m, &s);
+- mutex_unlock(&mutex);
+ #endif
+ seq_putc(m, '\n');
++out:
++ mutex_unlock(&ftrace_profile_lock);
+
+- return 0;
++ return ret;
+ }
+
+ static void ftrace_profile_reset(struct ftrace_profile_stat *stat)
+@@ -1473,6 +1480,8 @@ static void *t_start(struct seq_file *m, loff_t *pos)
+ if (*pos > 0)
+ return t_hash_start(m, pos);
+ iter->flags |= FTRACE_ITER_PRINTALL;
++ /* reset in case of seek/pread */
++ iter->flags &= ~FTRACE_ITER_HASH;
+ return iter;
+ }
+
+@@ -2393,7 +2402,7 @@ static const struct file_operations ftrace_filter_fops = {
+ .open = ftrace_filter_open,
+ .read = seq_read,
+ .write = ftrace_filter_write,
+- .llseek = ftrace_regex_lseek,
++ .llseek = no_llseek,
+ .release = ftrace_filter_release,
+ };
+
+diff --git a/mm/bounce.c b/mm/bounce.c
+index a2b76a5..1d5fa08 100644
+--- a/mm/bounce.c
++++ b/mm/bounce.c
+@@ -115,8 +115,8 @@ static void copy_to_high_bio_irq(struct bio *to, struct bio *from)
+ */
+ vfrom = page_address(fromvec->bv_page) + tovec->bv_offset;
+
+- flush_dcache_page(tovec->bv_page);
+ bounce_copy_vec(tovec, vfrom);
++ flush_dcache_page(tovec->bv_page);
+ }
+ }
+
+diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
+index 2047465..6d27a5b 100644
+--- a/mm/memory_hotplug.c
++++ b/mm/memory_hotplug.c
+@@ -551,19 +551,19 @@ static inline int pageblock_free(struct page *page)
+ /* Return the start of the next active pageblock after a given page */
+ static struct page *next_active_pageblock(struct page *page)
+ {
+- int pageblocks_stride;
+-
+ /* Ensure the starting page is pageblock-aligned */
+ BUG_ON(page_to_pfn(page) & (pageblock_nr_pages - 1));
+
+- /* Move forward by at least 1 * pageblock_nr_pages */
+- pageblocks_stride = 1;
+-
+ /* If the entire pageblock is free, move to the end of free page */
+- if (pageblock_free(page))
+- pageblocks_stride += page_order(page) - pageblock_order;
++ if (pageblock_free(page)) {
++ int order;
++ /* be careful. we don't have locks, page_order can be changed.*/
++ order = page_order(page);
++ if ((order < MAX_ORDER) && (order >= pageblock_order))
++ return page + (1 << order);
++ }
+
+- return page + (pageblocks_stride * pageblock_nr_pages);
++ return page + pageblock_nr_pages;
+ }
+
+ /* Checks if this range of memory is likely to be hot-removable. */
+diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c
+index 315ead3..cfef331 100644
+--- a/net/irda/irlan/irlan_common.c
++++ b/net/irda/irlan/irlan_common.c
+@@ -1101,7 +1101,7 @@ int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len)
+ memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */
+ le16_to_cpus(&val_len); n+=2;
+
+- if (val_len > 1016) {
++ if (val_len >= 1016) {
+ IRDA_DEBUG(2, "%s(), parameter length to long\n", __func__ );
+ return -RSP_INVALID_COMMAND_FORMAT;
+ }
+diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
+index 2370ab4..4c32700 100644
+--- a/net/sunrpc/auth_gss/auth_gss.c
++++ b/net/sunrpc/auth_gss/auth_gss.c
+@@ -717,17 +717,18 @@ gss_pipe_release(struct inode *inode)
+ struct rpc_inode *rpci = RPC_I(inode);
+ struct gss_upcall_msg *gss_msg;
+
++restart:
+ spin_lock(&inode->i_lock);
+- while (!list_empty(&rpci->in_downcall)) {
++ list_for_each_entry(gss_msg, &rpci->in_downcall, list) {
+
+- gss_msg = list_entry(rpci->in_downcall.next,
+- struct gss_upcall_msg, list);
++ if (!list_empty(&gss_msg->msg.list))
++ continue;
+ gss_msg->msg.errno = -EPIPE;
+ atomic_inc(&gss_msg->count);
+ __gss_unhash_msg(gss_msg);
+ spin_unlock(&inode->i_lock);
+ gss_release_msg(gss_msg);
+- spin_lock(&inode->i_lock);
++ goto restart;
+ }
+ spin_unlock(&inode->i_lock);
+
+diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c
+index 27a2378..ea1e6de 100644
+--- a/net/sunrpc/rpc_pipe.c
++++ b/net/sunrpc/rpc_pipe.c
+@@ -47,7 +47,7 @@ static void rpc_purge_list(struct rpc_inode *rpci, struct list_head *head,
+ return;
+ do {
+ msg = list_entry(head->next, struct rpc_pipe_msg, list);
+- list_del(&msg->list);
++ list_del_init(&msg->list);
+ msg->errno = err;
+ destroy_msg(msg);
+ } while (!list_empty(head));
+@@ -207,7 +207,7 @@ rpc_pipe_release(struct inode *inode, struct file *filp)
+ if (msg != NULL) {
+ spin_lock(&inode->i_lock);
+ msg->errno = -EAGAIN;
+- list_del(&msg->list);
++ list_del_init(&msg->list);
+ spin_unlock(&inode->i_lock);
+ rpci->ops->destroy_msg(msg);
+ }
+@@ -267,7 +267,7 @@ rpc_pipe_read(struct file *filp, char __user *buf, size_t len, loff_t *offset)
+ if (res < 0 || msg->len == msg->copied) {
+ filp->private_data = NULL;
+ spin_lock(&inode->i_lock);
+- list_del(&msg->list);
++ list_del_init(&msg->list);
+ spin_unlock(&inode->i_lock);
+ rpci->ops->destroy_msg(msg);
+ }
+diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
+index 6a60c5a..62cfc0c 100644
+--- a/net/wireless/wext-compat.c
++++ b/net/wireless/wext-compat.c
+@@ -1358,6 +1358,9 @@ int cfg80211_wext_giwessid(struct net_device *dev,
+ {
+ struct wireless_dev *wdev = dev->ieee80211_ptr;
+
++ data->flags = 0;
++ data->length = 0;
++
+ switch (wdev->iftype) {
+ case NL80211_IFTYPE_ADHOC:
+ return cfg80211_ibss_wext_giwessid(dev, info, data, ssid);
+diff --git a/net/wireless/wext.c b/net/wireless/wext.c
+index 60fe577..fddcf9c 100644
+--- a/net/wireless/wext.c
++++ b/net/wireless/wext.c
+@@ -854,6 +854,22 @@ static int ioctl_standard_iw_point(struct iw_point *iwp, unsigned int cmd,
+ }
+ }
+
++ if (IW_IS_GET(cmd) && !(descr->flags & IW_DESCR_FLAG_NOMAX)) {
++ /*
++ * If this is a GET, but not NOMAX, it means that the extra
++ * data is not bounded by userspace, but by max_tokens. Thus
++ * set the length to max_tokens. This matches the extra data
++ * allocation.
++ * The driver should fill it with the number of tokens it
++ * provided, and it may check iwp->length rather than having
++ * knowledge of max_tokens. If the driver doesn't change the
++ * iwp->length, this ioctl just copies back max_token tokens
++ * filled with zeroes. Hopefully the driver isn't claiming
++ * them to be valid data.
++ */
++ iwp->length = descr->max_tokens;
++ }
++
+ err = handler(dev, info, (union iwreq_data *) iwp, extra);
+
+ iwp->length += essid_compat;
+diff --git a/sound/core/seq/oss/seq_oss_init.c b/sound/core/seq/oss/seq_oss_init.c
+index d0d721c..1f133fe 100644
+--- a/sound/core/seq/oss/seq_oss_init.c
++++ b/sound/core/seq/oss/seq_oss_init.c
+@@ -280,13 +280,10 @@ snd_seq_oss_open(struct file *file, int level)
+ return 0;
+
+ _error:
+- snd_seq_oss_writeq_delete(dp->writeq);
+- snd_seq_oss_readq_delete(dp->readq);
+ snd_seq_oss_synth_cleanup(dp);
+ snd_seq_oss_midi_cleanup(dp);
+- delete_port(dp);
+ delete_seq_queue(dp->queue);
+- kfree(dp);
++ delete_port(dp);
+
+ return rc;
+ }
+@@ -349,8 +346,10 @@ create_port(struct seq_oss_devinfo *dp)
+ static int
+ delete_port(struct seq_oss_devinfo *dp)
+ {
+- if (dp->port < 0)
++ if (dp->port < 0) {
++ kfree(dp);
+ return 0;
++ }
+
+ debug_printk(("delete_port %i\n", dp->port));
+ return snd_seq_event_port_detach(dp->cseq, dp->port);
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index 7b4e74d..06c118c 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -6589,7 +6589,7 @@ static struct hda_input_mux alc883_lenovo_nb0763_capture_source = {
+ .num_items = 4,
+ .items = {
+ { "Mic", 0x0 },
+- { "iMic", 0x1 },
++ { "Int Mic", 0x1 },
+ { "Line", 0x2 },
+ { "CD", 0x4 },
+ },
+@@ -8038,8 +8038,8 @@ static struct snd_kcontrol_new alc883_lenovo_nb0763_mixer[] = {
+ HDA_CODEC_MUTE("CD Playback Switch", 0x0b, 0x04, HDA_INPUT),
+ HDA_CODEC_VOLUME("Mic Playback Volume", 0x0b, 0x0, HDA_INPUT),
+ HDA_CODEC_MUTE("Mic Playback Switch", 0x0b, 0x0, HDA_INPUT),
+- HDA_CODEC_VOLUME("iMic Playback Volume", 0x0b, 0x1, HDA_INPUT),
+- HDA_CODEC_MUTE("iMic Playback Switch", 0x0b, 0x1, HDA_INPUT),
++ HDA_CODEC_VOLUME("Int Mic Playback Volume", 0x0b, 0x1, HDA_INPUT),
++ HDA_CODEC_MUTE("Int Mic Playback Switch", 0x0b, 0x1, HDA_INPUT),
+ { } /* end */
+ };
+
+@@ -12389,6 +12389,9 @@ static int alc268_new_analog_output(struct alc_spec *spec, hda_nid_t nid,
+ dac = 0x02;
+ break;
+ case 0x15:
++ case 0x1a: /* ALC259/269 only */
++ case 0x1b: /* ALC259/269 only */
++ case 0x21: /* ALC269vb has this pin, too */
+ dac = 0x03;
+ break;
+ default:
+diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h
+index a31a8cd..3c6d141 100644
+--- a/tools/perf/util/callchain.h
++++ b/tools/perf/util/callchain.h
+@@ -49,6 +49,7 @@ static inline void callchain_init(struct callchain_node *node)
+ INIT_LIST_HEAD(&node->children);
+ INIT_LIST_HEAD(&node->val);
+
++ node->children_hit = 0;
+ node->parent = NULL;
+ node->hit = 0;
+ }
Added: dists/sid/linux-2.6/debian/patches/debian/revert-sched-2.6.32.22-changes.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/revert-sched-2.6.32.22-changes.patch Mon Sep 20 23:25:04 2010 (r16338)
@@ -0,0 +1,1802 @@
+Subject: [PATCH] Revert 2.6.32.22 sched changes
+
+Revert all changes between 2.6.32.21 and 2.6.32.22 in the files:
+
+kernel/sched*.c
+kernel/cpu.c
+include/linux/cpuset.h
+include/linux/sched.h
+include/linux/topology.h
+
+This is a temporary measure for OpenVZ and VServer until they are
+rebased on top of 2.6.32.22.
+
+diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
+index a73454a..a5740fc 100644
+--- a/include/linux/cpuset.h
++++ b/include/linux/cpuset.h
+@@ -21,7 +21,8 @@ extern int number_of_cpusets; /* How many cpusets are defined in system? */
+ extern int cpuset_init(void);
+ extern void cpuset_init_smp(void);
+ extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask);
+-extern int cpuset_cpus_allowed_fallback(struct task_struct *p);
++extern void cpuset_cpus_allowed_locked(struct task_struct *p,
++ struct cpumask *mask);
+ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
+ #define cpuset_current_mems_allowed (current->mems_allowed)
+ void cpuset_init_current_mems_allowed(void);
+@@ -68,6 +69,9 @@ struct seq_file;
+ extern void cpuset_task_status_allowed(struct seq_file *m,
+ struct task_struct *task);
+
++extern void cpuset_lock(void);
++extern void cpuset_unlock(void);
++
+ extern int cpuset_mem_spread_node(void);
+
+ static inline int cpuset_do_page_mem_spread(void)
+@@ -101,11 +105,10 @@ static inline void cpuset_cpus_allowed(struct task_struct *p,
+ {
+ cpumask_copy(mask, cpu_possible_mask);
+ }
+-
+-static inline int cpuset_cpus_allowed_fallback(struct task_struct *p)
++static inline void cpuset_cpus_allowed_locked(struct task_struct *p,
++ struct cpumask *mask)
+ {
+- cpumask_copy(&p->cpus_allowed, cpu_possible_mask);
+- return cpumask_any(cpu_active_mask);
++ cpumask_copy(mask, cpu_possible_mask);
+ }
+
+ static inline nodemask_t cpuset_mems_allowed(struct task_struct *p)
+@@ -154,6 +157,9 @@ static inline void cpuset_task_status_allowed(struct seq_file *m,
+ {
+ }
+
++static inline void cpuset_lock(void) {}
++static inline void cpuset_unlock(void) {}
++
+ static inline int cpuset_mem_spread_node(void)
+ {
+ return 0;
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 1184379..4b3dbc7 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -145,6 +145,7 @@ extern unsigned long this_cpu_load(void);
+
+
+ extern void calc_global_load(void);
++extern u64 cpu_nr_migrations(int cpu);
+
+ extern unsigned long get_parent_ip(unsigned long addr);
+
+@@ -1071,12 +1072,10 @@ struct sched_domain;
+ #define WF_SYNC 0x01 /* waker goes to sleep after wakup */
+ #define WF_FORK 0x02 /* child wakeup after fork */
+
+-#ifndef __GENKSYMS__
+ struct sched_class {
+ const struct sched_class *next;
+
+- void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup,
+- bool head);
++ void (*enqueue_task) (struct rq *rq, struct task_struct *p, int wakeup);
+ void (*dequeue_task) (struct rq *rq, struct task_struct *p, int sleep);
+ void (*yield_task) (struct rq *rq);
+
+@@ -1086,8 +1085,7 @@ struct sched_class {
+ void (*put_prev_task) (struct rq *rq, struct task_struct *p);
+
+ #ifdef CONFIG_SMP
+- int (*select_task_rq)(struct rq *rq, struct task_struct *p,
+- int sd_flag, int flags);
++ int (*select_task_rq)(struct task_struct *p, int sd_flag, int flags);
+
+ unsigned long (*load_balance) (struct rq *this_rq, int this_cpu,
+ struct rq *busiest, unsigned long max_load_move,
+@@ -1099,8 +1097,7 @@ struct sched_class {
+ enum cpu_idle_type idle);
+ void (*pre_schedule) (struct rq *this_rq, struct task_struct *task);
+ void (*post_schedule) (struct rq *this_rq);
+- void (*task_waking) (struct rq *this_rq, struct task_struct *task);
+- void (*task_woken) (struct rq *this_rq, struct task_struct *task);
++ void (*task_wake_up) (struct rq *this_rq, struct task_struct *task);
+
+ void (*set_cpus_allowed)(struct task_struct *p,
+ const struct cpumask *newmask);
+@@ -1111,7 +1108,7 @@ struct sched_class {
+
+ void (*set_curr_task) (struct rq *rq);
+ void (*task_tick) (struct rq *rq, struct task_struct *p, int queued);
+- void (*task_fork) (struct task_struct *p);
++ void (*task_new) (struct rq *rq, struct task_struct *p);
+
+ void (*switched_from) (struct rq *this_rq, struct task_struct *task,
+ int running);
+@@ -1120,53 +1117,12 @@ struct sched_class {
+ void (*prio_changed) (struct rq *this_rq, struct task_struct *task,
+ int oldprio, int running);
+
+- unsigned int (*get_rr_interval) (struct rq *rq,
+- struct task_struct *task);
++ unsigned int (*get_rr_interval) (struct task_struct *task);
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+- void (*moved_group) (struct task_struct *p, int on_rq);
+-#endif
+-};
+-#else /* __GENKSYMS__ */
+-/*
+- * struct sched_class is private to the scheduler, but since it is
+- * defined here it affects the symbol version of many exported symbols.
+- * This is a fake definition purely to keep symbol versions stable.
+- */
+-struct sched_class {
+- const struct sched_class *next;
+- void (*enqueue_task) (struct rq *, struct task_struct *, int);
+- void (*dequeue_task) (struct rq *, struct task_struct *, int);
+- void (*yield_task) (struct rq *);
+- void (*check_preempt_curr) (struct rq *, struct task_struct *, int);
+- struct task_struct * (*pick_next_task) (struct rq *);
+- void (*put_prev_task) (struct rq *, struct task_struct *);
+-#ifdef CONFIG_SMP
+- int (*select_task_rq)(struct task_struct *, int, int);
+- unsigned long (*load_balance) (struct rq *, int, struct rq *,
+- unsigned long, struct sched_domain *,
+- enum cpu_idle_type, int *, int *);
+- int (*move_one_task) (struct rq *, int, struct rq *,
+- struct sched_domain *, enum cpu_idle_type);
+- void (*pre_schedule) (struct rq *, struct task_struct *);
+- void (*post_schedule) (struct rq *);
+- void (*task_wake_up) (struct rq *, struct task_struct *);
+- void (*set_cpus_allowed)(struct task_struct *, const struct cpumask *);
+- void (*rq_online)(struct rq *);
+- void (*rq_offline)(struct rq *);
+-#endif
+- void (*set_curr_task) (struct rq *);
+- void (*task_tick) (struct rq *, struct task_struct *, int);
+- void (*task_new) (struct rq *, struct task_struct *);
+- void (*switched_from) (struct rq *, struct task_struct *, int);
+- void (*switched_to) (struct rq *, struct task_struct *, int);
+- void (*prio_changed) (struct rq *, struct task_struct *, int, int);
+- unsigned int (*get_rr_interval) (struct task_struct *);
+-#ifdef CONFIG_FAIR_GROUP_SCHED
+- void (*moved_group) (struct task_struct *);
++ void (*moved_group) (struct task_struct *p);
+ #endif
+ };
+-#endif /* __GENKSYMS__ */
+
+ struct load_weight {
+ unsigned long weight, inv_weight;
+@@ -1225,6 +1181,7 @@ struct sched_entity {
+ u64 nr_failed_migrations_running;
+ u64 nr_failed_migrations_hot;
+ u64 nr_forced_migrations;
++ u64 nr_forced2_migrations;
+
+ u64 nr_wakeups;
+ u64 nr_wakeups_sync;
+@@ -1933,7 +1890,6 @@ extern void sched_clock_idle_sleep_event(void);
+ extern void sched_clock_idle_wakeup_event(u64 delta_ns);
+
+ #ifdef CONFIG_HOTPLUG_CPU
+-extern void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p);
+ extern void idle_task_exit(void);
+ #else
+ static inline void idle_task_exit(void) {}
+diff --git a/include/linux/topology.h b/include/linux/topology.h
+index 5b81156..57e6357 100644
+--- a/include/linux/topology.h
++++ b/include/linux/topology.h
+@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
+ | 1*SD_WAKE_AFFINE \
+ | 1*SD_SHARE_CPUPOWER \
+ | 0*SD_POWERSAVINGS_BALANCE \
+- | 1*SD_SHARE_PKG_RESOURCES \
++ | 0*SD_SHARE_PKG_RESOURCES \
+ | 0*SD_SERIALIZE \
+ | 0*SD_PREFER_SIBLING \
+ , \
+diff --git a/kernel/cpu.c b/kernel/cpu.c
+index 7e8b6ac..291ac58 100644
+--- a/kernel/cpu.c
++++ b/kernel/cpu.c
+@@ -151,7 +151,7 @@ static inline void check_for_tasks(int cpu)
+
+ write_lock_irq(&tasklist_lock);
+ for_each_process(p) {
+- if (task_cpu(p) == cpu && p->state == TASK_RUNNING &&
++ if (task_cpu(p) == cpu &&
+ (!cputime_eq(p->utime, cputime_zero) ||
+ !cputime_eq(p->stime, cputime_zero)))
+ printk(KERN_WARNING "Task %s (pid = %d) is on cpu %d\
+@@ -163,7 +163,6 @@ static inline void check_for_tasks(int cpu)
+ }
+
+ struct take_cpu_down_param {
+- struct task_struct *caller;
+ unsigned long mod;
+ void *hcpu;
+ };
+@@ -172,7 +171,6 @@ struct take_cpu_down_param {
+ static int __ref take_cpu_down(void *_param)
+ {
+ struct take_cpu_down_param *param = _param;
+- unsigned int cpu = (unsigned long)param->hcpu;
+ int err;
+
+ /* Ensure this CPU doesn't handle any more interrupts. */
+@@ -183,8 +181,6 @@ static int __ref take_cpu_down(void *_param)
+ raw_notifier_call_chain(&cpu_chain, CPU_DYING | param->mod,
+ param->hcpu);
+
+- if (task_cpu(param->caller) == cpu)
+- move_task_off_dead_cpu(cpu, param->caller);
+ /* Force idle task to run as soon as we yield: it should
+ immediately notice cpu is offline and die quickly. */
+ sched_idle_next();
+@@ -195,10 +191,10 @@ static int __ref take_cpu_down(void *_param)
+ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ {
+ int err, nr_calls = 0;
++ cpumask_var_t old_allowed;
+ void *hcpu = (void *)(long)cpu;
+ unsigned long mod = tasks_frozen ? CPU_TASKS_FROZEN : 0;
+ struct take_cpu_down_param tcd_param = {
+- .caller = current,
+ .mod = mod,
+ .hcpu = hcpu,
+ };
+@@ -209,8 +205,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ if (!cpu_online(cpu))
+ return -EINVAL;
+
++ if (!alloc_cpumask_var(&old_allowed, GFP_KERNEL))
++ return -ENOMEM;
++
+ cpu_hotplug_begin();
+- set_cpu_active(cpu, false);
+ err = __raw_notifier_call_chain(&cpu_chain, CPU_DOWN_PREPARE | mod,
+ hcpu, -1, &nr_calls);
+ if (err == NOTIFY_BAD) {
+@@ -225,6 +223,10 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ goto out_release;
+ }
+
++ /* Ensure that we are not runnable on dying cpu */
++ cpumask_copy(old_allowed, ¤t->cpus_allowed);
++ set_cpus_allowed_ptr(current, cpu_active_mask);
++
+ err = __stop_machine(take_cpu_down, &tcd_param, cpumask_of(cpu));
+ if (err) {
+ set_cpu_active(cpu, true);
+@@ -233,7 +235,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+ hcpu) == NOTIFY_BAD)
+ BUG();
+
+- goto out_release;
++ goto out_allowed;
+ }
+ BUG_ON(cpu_online(cpu));
+
+@@ -251,6 +253,8 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen)
+
+ check_for_tasks(cpu);
+
++out_allowed:
++ set_cpus_allowed_ptr(current, old_allowed);
+ out_release:
+ cpu_hotplug_done();
+ if (!err) {
+@@ -258,6 +262,7 @@ out_release:
+ hcpu) == NOTIFY_BAD)
+ BUG();
+ }
++ free_cpumask_var(old_allowed);
+ return err;
+ }
+
+@@ -275,6 +280,18 @@ int __ref cpu_down(unsigned int cpu)
+ goto out;
+ }
+
++ set_cpu_active(cpu, false);
++
++ /*
++ * Make sure the all cpus did the reschedule and are not
++ * using stale version of the cpu_active_mask.
++ * This is not strictly necessary becuase stop_machine()
++ * that we run down the line already provides the required
++ * synchronization. But it's really a side effect and we do not
++ * want to depend on the innards of the stop_machine here.
++ */
++ synchronize_sched();
++
+ err = _cpu_down(cpu, 0);
+
+ out:
+@@ -365,12 +382,19 @@ int disable_nonboot_cpus(void)
+ return error;
+ cpu_maps_update_begin();
+ first_cpu = cpumask_first(cpu_online_mask);
+- /*
+- * We take down all of the non-boot CPUs in one shot to avoid races
++ /* We take down all of the non-boot CPUs in one shot to avoid races
+ * with the userspace trying to use the CPU hotplug at the same time
+ */
+ cpumask_clear(frozen_cpus);
+
++ for_each_online_cpu(cpu) {
++ if (cpu == first_cpu)
++ continue;
++ set_cpu_active(cpu, false);
++ }
++
++ synchronize_sched();
++
+ printk("Disabling non-boot CPUs ...\n");
+ for_each_online_cpu(cpu) {
+ if (cpu == first_cpu)
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 98d4048..9990074 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -542,6 +542,7 @@ struct rq {
+ struct load_weight load;
+ unsigned long nr_load_updates;
+ u64 nr_switches;
++ u64 nr_migrations_in;
+
+ struct cfs_rq cfs;
+ struct rt_rq rt;
+@@ -942,25 +943,14 @@ static inline void finish_lock_switch(struct rq *rq, struct task_struct *prev)
+ #endif /* __ARCH_WANT_UNLOCKED_CTXSW */
+
+ /*
+- * Check whether the task is waking, we use this to synchronize ->cpus_allowed
+- * against ttwu().
+- */
+-static inline int task_is_waking(struct task_struct *p)
+-{
+- return unlikely(p->state == TASK_WAKING);
+-}
+-
+-/*
+ * __task_rq_lock - lock the runqueue a given task resides on.
+ * Must be called interrupts disabled.
+ */
+ static inline struct rq *__task_rq_lock(struct task_struct *p)
+ __acquires(rq->lock)
+ {
+- struct rq *rq;
+-
+ for (;;) {
+- rq = task_rq(p);
++ struct rq *rq = task_rq(p);
+ spin_lock(&rq->lock);
+ if (likely(rq == task_rq(p)))
+ return rq;
+@@ -1832,20 +1822,6 @@ static void cfs_rq_set_shares(struct cfs_rq *cfs_rq, unsigned long shares)
+ static void calc_load_account_active(struct rq *this_rq);
+ static void update_sysctl(void);
+
+-static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
+-{
+- set_task_rq(p, cpu);
+-#ifdef CONFIG_SMP
+- /*
+- * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
+- * successfuly executed on another CPU. We must ensure that updates of
+- * per-task data have been completed by this moment.
+- */
+- smp_wmb();
+- task_thread_info(p)->cpu = cpu;
+-#endif
+-}
+-
+ #include "sched_stats.h"
+ #include "sched_idletask.c"
+ #include "sched_fair.c"
+@@ -1895,14 +1871,13 @@ static void update_avg(u64 *avg, u64 sample)
+ *avg += diff >> 3;
+ }
+
+-static void
+-enqueue_task(struct rq *rq, struct task_struct *p, int wakeup, bool head)
++static void enqueue_task(struct rq *rq, struct task_struct *p, int wakeup)
+ {
+ if (wakeup)
+ p->se.start_runtime = p->se.sum_exec_runtime;
+
+ sched_info_queued(p);
+- p->sched_class->enqueue_task(rq, p, wakeup, head);
++ p->sched_class->enqueue_task(rq, p, wakeup);
+ p->se.on_rq = 1;
+ }
+
+@@ -1978,7 +1953,7 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+ if (task_contributes_to_load(p))
+ rq->nr_uninterruptible--;
+
+- enqueue_task(rq, p, wakeup, false);
++ enqueue_task(rq, p, wakeup);
+ inc_nr_running(rq);
+ }
+
+@@ -2003,6 +1978,20 @@ inline int task_curr(const struct task_struct *p)
+ return cpu_curr(task_cpu(p)) == p;
+ }
+
++static inline void __set_task_cpu(struct task_struct *p, unsigned int cpu)
++{
++ set_task_rq(p, cpu);
++#ifdef CONFIG_SMP
++ /*
++ * After ->cpu is set up to a new value, task_rq_lock(p, ...) can be
++ * successfuly executed on another CPU. We must ensure that updates of
++ * per-task data have been completed by this moment.
++ */
++ smp_wmb();
++ task_thread_info(p)->cpu = cpu;
++#endif
++}
++
+ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ const struct sched_class *prev_class,
+ int oldprio, int running)
+@@ -2029,15 +2018,21 @@ static inline void check_class_changed(struct rq *rq, struct task_struct *p,
+ */
+ void kthread_bind(struct task_struct *p, unsigned int cpu)
+ {
++ struct rq *rq = cpu_rq(cpu);
++ unsigned long flags;
++
+ /* Must have done schedule() in kthread() before we set_task_cpu */
+ if (!wait_task_inactive(p, TASK_UNINTERRUPTIBLE)) {
+ WARN_ON(1);
+ return;
+ }
+
++ spin_lock_irqsave(&rq->lock, flags);
++ set_task_cpu(p, cpu);
+ p->cpus_allowed = cpumask_of_cpu(cpu);
+ p->rt.nr_cpus_allowed = 1;
+ p->flags |= PF_THREAD_BOUND;
++ spin_unlock_irqrestore(&rq->lock, flags);
+ }
+ EXPORT_SYMBOL(kthread_bind);
+
+@@ -2075,23 +2070,35 @@ task_hot(struct task_struct *p, u64 now, struct sched_domain *sd)
+ void set_task_cpu(struct task_struct *p, unsigned int new_cpu)
+ {
+ int old_cpu = task_cpu(p);
++ struct rq *old_rq = cpu_rq(old_cpu), *new_rq = cpu_rq(new_cpu);
++ struct cfs_rq *old_cfsrq = task_cfs_rq(p),
++ *new_cfsrq = cpu_cfs_rq(old_cfsrq, new_cpu);
++ u64 clock_offset;
+
+-#ifdef CONFIG_SCHED_DEBUG
+- /*
+- * We should never call set_task_cpu() on a blocked task,
+- * ttwu() will sort out the placement.
+- */
+- WARN_ON_ONCE(p->state != TASK_RUNNING && p->state != TASK_WAKING &&
+- !(task_thread_info(p)->preempt_count & PREEMPT_ACTIVE));
+-#endif
++ clock_offset = old_rq->clock - new_rq->clock;
+
+ trace_sched_migrate_task(p, new_cpu);
+
++#ifdef CONFIG_SCHEDSTATS
++ if (p->se.wait_start)
++ p->se.wait_start -= clock_offset;
++ if (p->se.sleep_start)
++ p->se.sleep_start -= clock_offset;
++ if (p->se.block_start)
++ p->se.block_start -= clock_offset;
++#endif
+ if (old_cpu != new_cpu) {
+ p->se.nr_migrations++;
++ new_rq->nr_migrations_in++;
++#ifdef CONFIG_SCHEDSTATS
++ if (task_hot(p, old_rq->clock, NULL))
++ schedstat_inc(p, se.nr_forced2_migrations);
++#endif
+ perf_sw_event(PERF_COUNT_SW_CPU_MIGRATIONS,
+ 1, 1, NULL, 0);
+ }
++ p->se.vruntime -= old_cfsrq->min_vruntime -
++ new_cfsrq->min_vruntime;
+
+ __set_task_cpu(p, new_cpu);
+ }
+@@ -2324,69 +2331,6 @@ void task_oncpu_function_call(struct task_struct *p,
+ preempt_enable();
+ }
+
+-#ifdef CONFIG_SMP
+-/*
+- * ->cpus_allowed is protected by either TASK_WAKING or rq->lock held.
+- */
+-static int select_fallback_rq(int cpu, struct task_struct *p)
+-{
+- int dest_cpu;
+- const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(cpu));
+-
+- /* Look for allowed, online CPU in same node. */
+- for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
+- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+- return dest_cpu;
+-
+- /* Any allowed, online CPU? */
+- dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
+- if (dest_cpu < nr_cpu_ids)
+- return dest_cpu;
+-
+- /* No more Mr. Nice Guy. */
+- if (unlikely(dest_cpu >= nr_cpu_ids)) {
+- dest_cpu = cpuset_cpus_allowed_fallback(p);
+- /*
+- * Don't tell them about moving exiting tasks or
+- * kernel threads (both mm NULL), since they never
+- * leave kernel.
+- */
+- if (p->mm && printk_ratelimit()) {
+- printk(KERN_INFO "process %d (%s) no "
+- "longer affine to cpu%d\n",
+- task_pid_nr(p), p->comm, cpu);
+- }
+- }
+-
+- return dest_cpu;
+-}
+-
+-/*
+- * The caller (fork, wakeup) owns TASK_WAKING, ->cpus_allowed is stable.
+- */
+-static inline
+-int select_task_rq(struct rq *rq, struct task_struct *p, int sd_flags, int wake_flags)
+-{
+- int cpu = p->sched_class->select_task_rq(rq, p, sd_flags, wake_flags);
+-
+- /*
+- * In order not to call set_task_cpu() on a blocking task we need
+- * to rely on ttwu() to place the task on a valid ->cpus_allowed
+- * cpu.
+- *
+- * Since this is common to all placement strategies, this lives here.
+- *
+- * [ this allows ->select_task() to simply return task_cpu(p) and
+- * not worry about this generic constraint ]
+- */
+- if (unlikely(!cpumask_test_cpu(cpu, &p->cpus_allowed) ||
+- !cpu_online(cpu)))
+- cpu = select_fallback_rq(task_cpu(p), p);
+-
+- return cpu;
+-}
+-#endif
+-
+ /***
+ * try_to_wake_up - wake up a thread
+ * @p: the to-be-woken-up thread
+@@ -2435,34 +2379,22 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
+ *
+ * First fix up the nr_uninterruptible count:
+ */
+- if (task_contributes_to_load(p)) {
+- if (likely(cpu_online(orig_cpu)))
+- rq->nr_uninterruptible--;
+- else
+- this_rq()->nr_uninterruptible--;
+- }
++ if (task_contributes_to_load(p))
++ rq->nr_uninterruptible--;
+ p->state = TASK_WAKING;
++ task_rq_unlock(rq, &flags);
+
+- if (p->sched_class->task_waking)
+- p->sched_class->task_waking(rq, p);
+-
+- cpu = select_task_rq(rq, p, SD_BALANCE_WAKE, wake_flags);
++ cpu = p->sched_class->select_task_rq(p, SD_BALANCE_WAKE, wake_flags);
+ if (cpu != orig_cpu)
+ set_task_cpu(p, cpu);
+- __task_rq_unlock(rq);
+
+- rq = cpu_rq(cpu);
+- spin_lock(&rq->lock);
+- update_rq_clock(rq);
++ rq = task_rq_lock(p, &flags);
++
++ if (rq != orig_rq)
++ update_rq_clock(rq);
+
+- /*
+- * We migrated the task without holding either rq->lock, however
+- * since the task is not on the task list itself, nobody else
+- * will try and migrate the task, hence the rq should match the
+- * cpu we just moved it to.
+- */
+- WARN_ON(task_cpu(p) != cpu);
+ WARN_ON(p->state != TASK_WAKING);
++ cpu = task_cpu(p);
+
+ #ifdef CONFIG_SCHEDSTATS
+ schedstat_inc(rq, ttwu_count);
+@@ -2515,8 +2447,8 @@ out_running:
+
+ p->state = TASK_RUNNING;
+ #ifdef CONFIG_SMP
+- if (p->sched_class->task_woken)
+- p->sched_class->task_woken(rq, p);
++ if (p->sched_class->task_wake_up)
++ p->sched_class->task_wake_up(rq, p);
+
+ if (unlikely(rq->idle_stamp)) {
+ u64 delta = rq->clock - rq->idle_stamp;
+@@ -2596,6 +2528,7 @@ static void __sched_fork(struct task_struct *p)
+ p->se.nr_failed_migrations_running = 0;
+ p->se.nr_failed_migrations_hot = 0;
+ p->se.nr_forced_migrations = 0;
++ p->se.nr_forced2_migrations = 0;
+
+ p->se.nr_wakeups = 0;
+ p->se.nr_wakeups_sync = 0;
+@@ -2616,6 +2549,14 @@ static void __sched_fork(struct task_struct *p)
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+ INIT_HLIST_HEAD(&p->preempt_notifiers);
+ #endif
++
++ /*
++ * We mark the process as running here, but have not actually
++ * inserted it onto the runqueue yet. This guarantees that
++ * nobody will actually run it, and a signal or other external
++ * event cannot wake it up and insert it on the runqueue either.
++ */
++ p->state = TASK_RUNNING;
+ }
+
+ /*
+@@ -2626,12 +2567,6 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ int cpu = get_cpu();
+
+ __sched_fork(p);
+- /*
+- * We mark the process as running here. This guarantees that
+- * nobody will actually run it, and a signal or other external
+- * event cannot wake it up and insert it on the runqueue either.
+- */
+- p->state = TASK_RUNNING;
+
+ /*
+ * Revert to default priority/policy on fork if requested.
+@@ -2663,9 +2598,9 @@ void sched_fork(struct task_struct *p, int clone_flags)
+ if (!rt_prio(p->prio))
+ p->sched_class = &fair_sched_class;
+
+- if (p->sched_class->task_fork)
+- p->sched_class->task_fork(p);
+-
++#ifdef CONFIG_SMP
++ cpu = p->sched_class->select_task_rq(p, SD_BALANCE_FORK, 0);
++#endif
+ set_task_cpu(p, cpu);
+
+ #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
+@@ -2695,38 +2630,28 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+ {
+ unsigned long flags;
+ struct rq *rq;
+- int cpu = get_cpu();
+-
+-#ifdef CONFIG_SMP
+- rq = task_rq_lock(p, &flags);
+- p->state = TASK_WAKING;
+-
+- /*
+- * Fork balancing, do it here and not earlier because:
+- * - cpus_allowed can change in the fork path
+- * - any previously selected cpu might disappear through hotplug
+- *
+- * We set TASK_WAKING so that select_task_rq() can drop rq->lock
+- * without people poking at ->cpus_allowed.
+- */
+- cpu = select_task_rq(rq, p, SD_BALANCE_FORK, 0);
+- set_task_cpu(p, cpu);
+-
+- p->state = TASK_RUNNING;
+- task_rq_unlock(rq, &flags);
+-#endif
+
+ rq = task_rq_lock(p, &flags);
++ BUG_ON(p->state != TASK_RUNNING);
+ update_rq_clock(rq);
+- activate_task(rq, p, 0);
++
++ if (!p->sched_class->task_new || !current->se.on_rq) {
++ activate_task(rq, p, 0);
++ } else {
++ /*
++ * Let the scheduling class do new task startup
++ * management (if any):
++ */
++ p->sched_class->task_new(rq, p);
++ inc_nr_running(rq);
++ }
+ trace_sched_wakeup_new(rq, p, 1);
+ check_preempt_curr(rq, p, WF_FORK);
+ #ifdef CONFIG_SMP
+- if (p->sched_class->task_woken)
+- p->sched_class->task_woken(rq, p);
++ if (p->sched_class->task_wake_up)
++ p->sched_class->task_wake_up(rq, p);
+ #endif
+ task_rq_unlock(rq, &flags);
+- put_cpu();
+ }
+
+ #ifdef CONFIG_PREEMPT_NOTIFIERS
+@@ -3113,6 +3038,15 @@ static void calc_load_account_active(struct rq *this_rq)
+ }
+
+ /*
++ * Externally visible per-cpu scheduler statistics:
++ * cpu_nr_migrations(cpu) - number of migrations into that cpu
++ */
++u64 cpu_nr_migrations(int cpu)
++{
++ return cpu_rq(cpu)->nr_migrations_in;
++}
++
++/*
+ * Update rq->cpu_load[] statistics. This function is usually called every
+ * scheduler tick (TICK_NSEC).
+ */
+@@ -3194,28 +3128,24 @@ static void double_rq_unlock(struct rq *rq1, struct rq *rq2)
+ }
+
+ /*
+- * sched_exec - execve() is a valuable balancing opportunity, because at
+- * this point the task has the smallest effective memory and cache footprint.
++ * If dest_cpu is allowed for this process, migrate the task to it.
++ * This is accomplished by forcing the cpu_allowed mask to only
++ * allow dest_cpu, which will force the cpu onto dest_cpu. Then
++ * the cpu_allowed mask is restored.
+ */
+-void sched_exec(void)
++static void sched_migrate_task(struct task_struct *p, int dest_cpu)
+ {
+- struct task_struct *p = current;
+ struct migration_req req;
+ unsigned long flags;
+ struct rq *rq;
+- int dest_cpu;
+
+ rq = task_rq_lock(p, &flags);
+- dest_cpu = p->sched_class->select_task_rq(rq, p, SD_BALANCE_EXEC, 0);
+- if (dest_cpu == smp_processor_id())
+- goto unlock;
++ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed)
++ || unlikely(!cpu_active(dest_cpu)))
++ goto out;
+
+- /*
+- * select_task_rq() can race against ->cpus_allowed
+- */
+- if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed) &&
+- likely(cpu_active(dest_cpu)) &&
+- migrate_task(p, dest_cpu, &req)) {
++ /* force the process onto the specified CPU */
++ if (migrate_task(p, dest_cpu, &req)) {
+ /* Need to wait for migration thread (might exit: take ref). */
+ struct task_struct *mt = rq->migration_thread;
+
+@@ -3227,11 +3157,24 @@ void sched_exec(void)
+
+ return;
+ }
+-unlock:
++out:
+ task_rq_unlock(rq, &flags);
+ }
+
+ /*
++ * sched_exec - execve() is a valuable balancing opportunity, because at
++ * this point the task has the smallest effective memory and cache footprint.
++ */
++void sched_exec(void)
++{
++ int new_cpu, this_cpu = get_cpu();
++ new_cpu = current->sched_class->select_task_rq(current, SD_BALANCE_EXEC, 0);
++ put_cpu();
++ if (new_cpu != this_cpu)
++ sched_migrate_task(current, new_cpu);
++}
++
++/*
+ * pull_task - move a task from a remote runqueue to the local runqueue.
+ * Both runqueues must be locked.
+ */
+@@ -6031,15 +5974,14 @@ EXPORT_SYMBOL(wait_for_completion_killable);
+ */
+ bool try_wait_for_completion(struct completion *x)
+ {
+- unsigned long flags;
+ int ret = 1;
+
+- spin_lock_irqsave(&x->wait.lock, flags);
++ spin_lock_irq(&x->wait.lock);
+ if (!x->done)
+ ret = 0;
+ else
+ x->done--;
+- spin_unlock_irqrestore(&x->wait.lock, flags);
++ spin_unlock_irq(&x->wait.lock);
+ return ret;
+ }
+ EXPORT_SYMBOL(try_wait_for_completion);
+@@ -6054,13 +5996,12 @@ EXPORT_SYMBOL(try_wait_for_completion);
+ */
+ bool completion_done(struct completion *x)
+ {
+- unsigned long flags;
+ int ret = 1;
+
+- spin_lock_irqsave(&x->wait.lock, flags);
++ spin_lock_irq(&x->wait.lock);
+ if (!x->done)
+ ret = 0;
+- spin_unlock_irqrestore(&x->wait.lock, flags);
++ spin_unlock_irq(&x->wait.lock);
+ return ret;
+ }
+ EXPORT_SYMBOL(completion_done);
+@@ -6154,7 +6095,7 @@ void rt_mutex_setprio(struct task_struct *p, int prio)
+ if (running)
+ p->sched_class->set_curr_task(rq);
+ if (on_rq) {
+- enqueue_task(rq, p, 0, oldprio < prio);
++ enqueue_task(rq, p, 0);
+
+ check_class_changed(rq, p, prev_class, oldprio, running);
+ }
+@@ -6198,7 +6139,7 @@ void set_user_nice(struct task_struct *p, long nice)
+ delta = p->prio - old_prio;
+
+ if (on_rq) {
+- enqueue_task(rq, p, 0, false);
++ enqueue_task(rq, p, 0);
+ /*
+ * If the task increased its priority or is running and
+ * lowered its priority, then reschedule its CPU:
+@@ -6589,7 +6530,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ return -EINVAL;
+
+ retval = -ESRCH;
+- rcu_read_lock();
++ read_lock(&tasklist_lock);
+ p = find_process_by_pid(pid);
+ if (p) {
+ retval = security_task_getscheduler(p);
+@@ -6597,7 +6538,7 @@ SYSCALL_DEFINE1(sched_getscheduler, pid_t, pid)
+ retval = p->policy
+ | (p->sched_reset_on_fork ? SCHED_RESET_ON_FORK : 0);
+ }
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+ return retval;
+ }
+
+@@ -6615,7 +6556,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ if (!param || pid < 0)
+ return -EINVAL;
+
+- rcu_read_lock();
++ read_lock(&tasklist_lock);
+ p = find_process_by_pid(pid);
+ retval = -ESRCH;
+ if (!p)
+@@ -6626,7 +6567,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ goto out_unlock;
+
+ lp.sched_priority = p->rt_priority;
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+
+ /*
+ * This one might sleep, we cannot do it with a spinlock held ...
+@@ -6636,7 +6577,7 @@ SYSCALL_DEFINE2(sched_getparam, pid_t, pid, struct sched_param __user *, param)
+ return retval;
+
+ out_unlock:
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+ return retval;
+ }
+
+@@ -6647,18 +6588,22 @@ long sched_setaffinity(pid_t pid, const struct cpumask *in_mask)
+ int retval;
+
+ get_online_cpus();
+- rcu_read_lock();
++ read_lock(&tasklist_lock);
+
+ p = find_process_by_pid(pid);
+ if (!p) {
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+ put_online_cpus();
+ return -ESRCH;
+ }
+
+- /* Prevent p going away */
++ /*
++ * It is not safe to call set_cpus_allowed with the
++ * tasklist_lock held. We will bump the task_struct's
++ * usage count and then drop tasklist_lock.
++ */
+ get_task_struct(p);
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+
+ if (!alloc_cpumask_var(&cpus_allowed, GFP_KERNEL)) {
+ retval = -ENOMEM;
+@@ -6739,12 +6684,10 @@ SYSCALL_DEFINE3(sched_setaffinity, pid_t, pid, unsigned int, len,
+ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ {
+ struct task_struct *p;
+- unsigned long flags;
+- struct rq *rq;
+ int retval;
+
+ get_online_cpus();
+- rcu_read_lock();
++ read_lock(&tasklist_lock);
+
+ retval = -ESRCH;
+ p = find_process_by_pid(pid);
+@@ -6755,12 +6698,10 @@ long sched_getaffinity(pid_t pid, struct cpumask *mask)
+ if (retval)
+ goto out_unlock;
+
+- rq = task_rq_lock(p, &flags);
+ cpumask_and(mask, &p->cpus_allowed, cpu_online_mask);
+- task_rq_unlock(rq, &flags);
+
+ out_unlock:
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+ put_online_cpus();
+
+ return retval;
+@@ -6999,8 +6940,6 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ {
+ struct task_struct *p;
+ unsigned int time_slice;
+- unsigned long flags;
+- struct rq *rq;
+ int retval;
+ struct timespec t;
+
+@@ -7008,7 +6947,7 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ return -EINVAL;
+
+ retval = -ESRCH;
+- rcu_read_lock();
++ read_lock(&tasklist_lock);
+ p = find_process_by_pid(pid);
+ if (!p)
+ goto out_unlock;
+@@ -7017,17 +6956,15 @@ SYSCALL_DEFINE2(sched_rr_get_interval, pid_t, pid,
+ if (retval)
+ goto out_unlock;
+
+- rq = task_rq_lock(p, &flags);
+- time_slice = p->sched_class->get_rr_interval(rq, p);
+- task_rq_unlock(rq, &flags);
++ time_slice = p->sched_class->get_rr_interval(p);
+
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+ jiffies_to_timespec(time_slice, &t);
+ retval = copy_to_user(interval, &t, sizeof(t)) ? -EFAULT : 0;
+ return retval;
+
+ out_unlock:
+- rcu_read_unlock();
++ read_unlock(&tasklist_lock);
+ return retval;
+ }
+
+@@ -7118,7 +7055,6 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu)
+ spin_lock_irqsave(&rq->lock, flags);
+
+ __sched_fork(idle);
+- idle->state = TASK_RUNNING;
+ idle->se.exec_start = sched_clock();
+
+ cpumask_copy(&idle->cpus_allowed, cpumask_of(cpu));
+@@ -7213,19 +7149,7 @@ int set_cpus_allowed_ptr(struct task_struct *p, const struct cpumask *new_mask)
+ struct rq *rq;
+ int ret = 0;
+
+- /*
+- * Serialize against TASK_WAKING so that ttwu() and wunt() can
+- * drop the rq->lock and still rely on ->cpus_allowed.
+- */
+-again:
+- while (task_is_waking(p))
+- cpu_relax();
+ rq = task_rq_lock(p, &flags);
+- if (task_is_waking(p)) {
+- task_rq_unlock(rq, &flags);
+- goto again;
+- }
+-
+ if (!cpumask_intersects(new_mask, cpu_active_mask)) {
+ ret = -EINVAL;
+ goto out;
+@@ -7254,7 +7178,7 @@ again:
+
+ get_task_struct(mt);
+ task_rq_unlock(rq, &flags);
+- wake_up_process(mt);
++ wake_up_process(rq->migration_thread);
+ put_task_struct(mt);
+ wait_for_completion(&req.done);
+ tlb_migrate_finish(p->mm);
+@@ -7281,7 +7205,7 @@ EXPORT_SYMBOL_GPL(set_cpus_allowed_ptr);
+ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ {
+ struct rq *rq_dest, *rq_src;
+- int ret = 0;
++ int ret = 0, on_rq;
+
+ if (unlikely(!cpu_active(dest_cpu)))
+ return ret;
+@@ -7293,17 +7217,19 @@ static int __migrate_task(struct task_struct *p, int src_cpu, int dest_cpu)
+ /* Already moved. */
+ if (task_cpu(p) != src_cpu)
+ goto done;
++ /* Waking up, don't get in the way of try_to_wake_up(). */
++ if (p->state == TASK_WAKING)
++ goto fail;
+ /* Affinity changed (again). */
+ if (!cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
+ goto fail;
+
+- /*
+- * If we're not on a rq, the next wake-up will ensure we're
+- * placed properly.
+- */
+- if (p->se.on_rq) {
++ on_rq = p->se.on_rq;
++ if (on_rq)
+ deactivate_task(rq_src, p, 0);
+- set_task_cpu(p, dest_cpu);
++
++ set_task_cpu(p, dest_cpu);
++ if (on_rq) {
+ activate_task(rq_dest, p, 0);
+ check_preempt_curr(rq_dest, p, 0);
+ }
+@@ -7382,29 +7308,57 @@ static int migration_thread(void *data)
+ }
+
+ #ifdef CONFIG_HOTPLUG_CPU
++
++static int __migrate_task_irq(struct task_struct *p, int src_cpu, int dest_cpu)
++{
++ int ret;
++
++ local_irq_disable();
++ ret = __migrate_task(p, src_cpu, dest_cpu);
++ local_irq_enable();
++ return ret;
++}
++
+ /*
+ * Figure out where task on dead CPU should go, use force if necessary.
+ */
+-void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
++static void move_task_off_dead_cpu(int dead_cpu, struct task_struct *p)
+ {
+- struct rq *rq = cpu_rq(dead_cpu);
+- int needs_cpu, uninitialized_var(dest_cpu);
+- unsigned long flags;
++ int dest_cpu;
++ const struct cpumask *nodemask = cpumask_of_node(cpu_to_node(dead_cpu));
+
+- local_irq_save(flags);
++again:
++ /* Look for allowed, online CPU in same node. */
++ for_each_cpu_and(dest_cpu, nodemask, cpu_active_mask)
++ if (cpumask_test_cpu(dest_cpu, &p->cpus_allowed))
++ goto move;
+
+- spin_lock(&rq->lock);
+- needs_cpu = (task_cpu(p) == dead_cpu) && (p->state != TASK_WAKING);
+- if (needs_cpu)
+- dest_cpu = select_fallback_rq(dead_cpu, p);
+- spin_unlock(&rq->lock);
+- /*
+- * It can only fail if we race with set_cpus_allowed(),
+- * in the racer should migrate the task anyway.
+- */
+- if (needs_cpu)
+- __migrate_task(p, dead_cpu, dest_cpu);
+- local_irq_restore(flags);
++ /* Any allowed, online CPU? */
++ dest_cpu = cpumask_any_and(&p->cpus_allowed, cpu_active_mask);
++ if (dest_cpu < nr_cpu_ids)
++ goto move;
++
++ /* No more Mr. Nice Guy. */
++ if (dest_cpu >= nr_cpu_ids) {
++ cpuset_cpus_allowed_locked(p, &p->cpus_allowed);
++ dest_cpu = cpumask_any_and(cpu_active_mask, &p->cpus_allowed);
++
++ /*
++ * Don't tell them about moving exiting tasks or
++ * kernel threads (both mm NULL), since they never
++ * leave kernel.
++ */
++ if (p->mm && printk_ratelimit()) {
++ printk(KERN_INFO "process %d (%s) no "
++ "longer affine to cpu%d\n",
++ task_pid_nr(p), p->comm, dead_cpu);
++ }
++ }
++
++move:
++ /* It can have affinity changed while we were choosing. */
++ if (unlikely(!__migrate_task_irq(p, dead_cpu, dest_cpu)))
++ goto again;
+ }
+
+ /*
+@@ -7798,23 +7752,14 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ cpu_rq(cpu)->migration_thread = NULL;
+ break;
+
+- case CPU_POST_DEAD:
+- /*
+- * Bring the migration thread down in CPU_POST_DEAD event,
+- * since the timers should have got migrated by now and thus
+- * we should not see a deadlock between trying to kill the
+- * migration thread and the sched_rt_period_timer.
+- */
+- rq = cpu_rq(cpu);
+- kthread_stop(rq->migration_thread);
+- put_task_struct(rq->migration_thread);
+- rq->migration_thread = NULL;
+- break;
+-
+ case CPU_DEAD:
+ case CPU_DEAD_FROZEN:
++ cpuset_lock(); /* around calls to cpuset_cpus_allowed_lock() */
+ migrate_live_tasks(cpu);
+ rq = cpu_rq(cpu);
++ kthread_stop(rq->migration_thread);
++ put_task_struct(rq->migration_thread);
++ rq->migration_thread = NULL;
+ /* Idle task back to normal (off runqueue, low prio) */
+ spin_lock_irq(&rq->lock);
+ update_rq_clock(rq);
+@@ -7823,6 +7768,7 @@ migration_call(struct notifier_block *nfb, unsigned long action, void *hcpu)
+ rq->idle->sched_class = &idle_sched_class;
+ migrate_dead_tasks(cpu);
+ spin_unlock_irq(&rq->lock);
++ cpuset_unlock();
+ migrate_nr_uninterruptible(rq);
+ BUG_ON(rq->nr_running != 0);
+ calc_global_load_remove(rq);
+@@ -10153,13 +10099,13 @@ void sched_move_task(struct task_struct *tsk)
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ if (tsk->sched_class->moved_group)
+- tsk->sched_class->moved_group(tsk, on_rq);
++ tsk->sched_class->moved_group(tsk);
+ #endif
+
+ if (unlikely(running))
+ tsk->sched_class->set_curr_task(rq);
+ if (on_rq)
+- enqueue_task(rq, tsk, 0, false);
++ enqueue_task(rq, tsk, 0);
+
+ task_rq_unlock(rq, &flags);
+ }
+@@ -10931,30 +10877,12 @@ static void cpuacct_charge(struct task_struct *tsk, u64 cputime)
+ }
+
+ /*
+- * When CONFIG_VIRT_CPU_ACCOUNTING is enabled one jiffy can be very large
+- * in cputime_t units. As a result, cpuacct_update_stats calls
+- * percpu_counter_add with values large enough to always overflow the
+- * per cpu batch limit causing bad SMP scalability.
+- *
+- * To fix this we scale percpu_counter_batch by cputime_one_jiffy so we
+- * batch the same amount of time with CONFIG_VIRT_CPU_ACCOUNTING disabled
+- * and enabled. We cap it at INT_MAX which is the largest allowed batch value.
+- */
+-#ifdef CONFIG_SMP
+-#define CPUACCT_BATCH \
+- min_t(long, percpu_counter_batch * cputime_one_jiffy, INT_MAX)
+-#else
+-#define CPUACCT_BATCH 0
+-#endif
+-
+-/*
+ * Charge the system/user time to the task's accounting group.
+ */
+ static void cpuacct_update_stats(struct task_struct *tsk,
+ enum cpuacct_stat_index idx, cputime_t val)
+ {
+ struct cpuacct *ca;
+- int batch = CPUACCT_BATCH;
+
+ if (unlikely(!cpuacct_subsys.active))
+ return;
+@@ -10963,7 +10891,7 @@ static void cpuacct_update_stats(struct task_struct *tsk,
+ ca = task_ca(tsk);
+
+ do {
+- __percpu_counter_add(&ca->cpustat[idx], val, batch);
++ percpu_counter_add(&ca->cpustat[idx], val);
+ ca = ca->parent;
+ } while (ca);
+ rcu_read_unlock();
+diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
+index 6f836a8..6988cf0 100644
+--- a/kernel/sched_debug.c
++++ b/kernel/sched_debug.c
+@@ -423,6 +423,7 @@ void proc_sched_show_task(struct task_struct *p, struct seq_file *m)
+ P(se.nr_failed_migrations_running);
+ P(se.nr_failed_migrations_hot);
+ P(se.nr_forced_migrations);
++ P(se.nr_forced2_migrations);
+ P(se.nr_wakeups);
+ P(se.nr_wakeups_sync);
+ P(se.nr_wakeups_migrate);
+@@ -498,6 +499,7 @@ void proc_sched_set_task(struct task_struct *p)
+ p->se.nr_failed_migrations_running = 0;
+ p->se.nr_failed_migrations_hot = 0;
+ p->se.nr_forced_migrations = 0;
++ p->se.nr_forced2_migrations = 0;
+ p->se.nr_wakeups = 0;
+ p->se.nr_wakeups_sync = 0;
+ p->se.nr_wakeups_migrate = 0;
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index 623b876..d80812d 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -488,7 +488,6 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr,
+ curr->sum_exec_runtime += delta_exec;
+ schedstat_add(cfs_rq, exec_clock, delta_exec);
+ delta_exec_weighted = calc_delta_fair(delta_exec, curr);
+-
+ curr->vruntime += delta_exec_weighted;
+ update_min_vruntime(cfs_rq);
+ }
+@@ -744,26 +743,16 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ se->vruntime = vruntime;
+ }
+
+-#define ENQUEUE_WAKEUP 1
+-#define ENQUEUE_MIGRATE 2
+-
+ static void
+-enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int flags)
++enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+ {
+ /*
+- * Update the normalized vruntime before updating min_vruntime
+- * through callig update_curr().
+- */
+- if (!(flags & ENQUEUE_WAKEUP) || (flags & ENQUEUE_MIGRATE))
+- se->vruntime += cfs_rq->min_vruntime;
+-
+- /*
+ * Update run-time statistics of the 'current'.
+ */
+ update_curr(cfs_rq);
+ account_entity_enqueue(cfs_rq, se);
+
+- if (flags & ENQUEUE_WAKEUP) {
++ if (wakeup) {
+ place_entity(cfs_rq, se, 0);
+ enqueue_sleeper(cfs_rq, se);
+ }
+@@ -817,14 +806,6 @@ dequeue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int sleep)
+ __dequeue_entity(cfs_rq, se);
+ account_entity_dequeue(cfs_rq, se);
+ update_min_vruntime(cfs_rq);
+-
+- /*
+- * Normalize the entity after updating the min_vruntime because the
+- * update can refer to the ->curr item and we need to reflect this
+- * movement in our normalized position.
+- */
+- if (!sleep)
+- se->vruntime -= cfs_rq->min_vruntime;
+ }
+
+ /*
+@@ -1031,24 +1012,17 @@ static inline void hrtick_update(struct rq *rq)
+ * increased. Here we update the fair scheduling stats and
+ * then put the task into the rbtree:
+ */
+-static void
+-enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup, bool head)
++static void enqueue_task_fair(struct rq *rq, struct task_struct *p, int wakeup)
+ {
+ struct cfs_rq *cfs_rq;
+ struct sched_entity *se = &p->se;
+- int flags = 0;
+-
+- if (wakeup)
+- flags |= ENQUEUE_WAKEUP;
+- if (p->state == TASK_WAKING)
+- flags |= ENQUEUE_MIGRATE;
+
+ for_each_sched_entity(se) {
+ if (se->on_rq)
+ break;
+ cfs_rq = cfs_rq_of(se);
+- enqueue_entity(cfs_rq, se, flags);
+- flags = ENQUEUE_WAKEUP;
++ enqueue_entity(cfs_rq, se, wakeup);
++ wakeup = 1;
+ }
+
+ hrtick_update(rq);
+@@ -1124,14 +1098,6 @@ static void yield_task_fair(struct rq *rq)
+
+ #ifdef CONFIG_SMP
+
+-static void task_waking_fair(struct rq *rq, struct task_struct *p)
+-{
+- struct sched_entity *se = &p->se;
+- struct cfs_rq *cfs_rq = cfs_rq_of(se);
+-
+- se->vruntime -= cfs_rq->min_vruntime;
+-}
+-
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ /*
+ * effective_load() calculates the load change as seen from the root_task_group
+@@ -1250,7 +1216,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ * effect of the currently running task from the load
+ * of the current CPU:
+ */
+- rcu_read_lock();
+ if (sync) {
+ tg = task_group(current);
+ weight = current->se.load.weight;
+@@ -1276,7 +1241,6 @@ static int wake_affine(struct sched_domain *sd, struct task_struct *p, int sync)
+ balanced = !this_load ||
+ 100*(this_load + effective_load(tg, this_cpu, weight, weight)) <=
+ imbalance*(load + effective_load(tg, prev_cpu, 0, weight));
+- rcu_read_unlock();
+
+ /*
+ * If the currently running task will sleep within
+@@ -1384,56 +1348,6 @@ find_idlest_cpu(struct sched_group *group, struct task_struct *p, int this_cpu)
+ }
+
+ /*
+- * Try and locate an idle CPU in the sched_domain.
+- */
+-static int select_idle_sibling(struct task_struct *p, int target)
+-{
+- int cpu = smp_processor_id();
+- int prev_cpu = task_cpu(p);
+- struct sched_domain *sd;
+- int i;
+-
+- /*
+- * If the task is going to be woken-up on this cpu and if it is
+- * already idle, then it is the right target.
+- */
+- if (target == cpu && idle_cpu(cpu))
+- return cpu;
+-
+- /*
+- * If the task is going to be woken-up on the cpu where it previously
+- * ran and if it is currently idle, then it the right target.
+- */
+- if (target == prev_cpu && idle_cpu(prev_cpu))
+- return prev_cpu;
+-
+- /*
+- * Otherwise, iterate the domains and find an elegible idle cpu.
+- */
+- for_each_domain(target, sd) {
+- if (!(sd->flags & SD_SHARE_PKG_RESOURCES))
+- break;
+-
+- for_each_cpu_and(i, sched_domain_span(sd), &p->cpus_allowed) {
+- if (idle_cpu(i)) {
+- target = i;
+- break;
+- }
+- }
+-
+- /*
+- * Lets stop looking for an idle sibling when we reached
+- * the domain that spans the current cpu and prev_cpu.
+- */
+- if (cpumask_test_cpu(cpu, sched_domain_span(sd)) &&
+- cpumask_test_cpu(prev_cpu, sched_domain_span(sd)))
+- break;
+- }
+-
+- return target;
+-}
+-
+-/*
+ * sched_balance_self: balance the current task (running on cpu) in domains
+ * that have the 'flag' flag set. In practice, this is SD_BALANCE_FORK and
+ * SD_BALANCE_EXEC.
+@@ -1444,8 +1358,7 @@ static int select_idle_sibling(struct task_struct *p, int target)
+ *
+ * preempt must be disabled.
+ */
+-static int
+-select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_flags)
++static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flags)
+ {
+ struct sched_domain *tmp, *affine_sd = NULL, *sd = NULL;
+ int cpu = smp_processor_id();
+@@ -1462,6 +1375,7 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ new_cpu = prev_cpu;
+ }
+
++ rcu_read_lock();
+ for_each_domain(cpu, tmp) {
+ if (!(tmp->flags & SD_LOAD_BALANCE))
+ continue;
+@@ -1490,14 +1404,38 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ want_sd = 0;
+ }
+
+- /*
+- * If both cpu and prev_cpu are part of this domain,
+- * cpu is a valid SD_WAKE_AFFINE target.
+- */
+- if (want_affine && (tmp->flags & SD_WAKE_AFFINE) &&
+- cpumask_test_cpu(prev_cpu, sched_domain_span(tmp))) {
+- affine_sd = tmp;
+- want_affine = 0;
++ if (want_affine && (tmp->flags & SD_WAKE_AFFINE)) {
++ int candidate = -1, i;
++
++ if (cpumask_test_cpu(prev_cpu, sched_domain_span(tmp)))
++ candidate = cpu;
++
++ /*
++ * Check for an idle shared cache.
++ */
++ if (tmp->flags & SD_PREFER_SIBLING) {
++ if (candidate == cpu) {
++ if (!cpu_rq(prev_cpu)->cfs.nr_running)
++ candidate = prev_cpu;
++ }
++
++ if (candidate == -1 || candidate == cpu) {
++ for_each_cpu(i, sched_domain_span(tmp)) {
++ if (!cpumask_test_cpu(i, &p->cpus_allowed))
++ continue;
++ if (!cpu_rq(i)->cfs.nr_running) {
++ candidate = i;
++ break;
++ }
++ }
++ }
++ }
++
++ if (candidate >= 0) {
++ affine_sd = tmp;
++ want_affine = 0;
++ cpu = candidate;
++ }
+ }
+
+ if (!want_sd && !want_affine)
+@@ -1510,7 +1448,6 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ sd = tmp;
+ }
+
+-#ifdef CONFIG_FAIR_GROUP_SCHED
+ if (sched_feat(LB_SHARES_UPDATE)) {
+ /*
+ * Pick the largest domain to update shares over
+@@ -1521,19 +1458,13 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ cpumask_weight(sched_domain_span(sd))))
+ tmp = affine_sd;
+
+- if (tmp) {
+- spin_unlock(&rq->lock);
++ if (tmp)
+ update_shares(tmp);
+- spin_lock(&rq->lock);
+- }
+ }
+-#endif
+
+- if (affine_sd) {
+- if (cpu == prev_cpu || wake_affine(affine_sd, p, sync))
+- return select_idle_sibling(p, cpu);
+- else
+- return select_idle_sibling(p, prev_cpu);
++ if (affine_sd && wake_affine(affine_sd, p, sync)) {
++ new_cpu = cpu;
++ goto out;
+ }
+
+ while (sd) {
+@@ -1575,6 +1506,8 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ /* while loop will break here if sd == NULL */
+ }
+
++out:
++ rcu_read_unlock();
+ return new_cpu;
+ }
+ #endif /* CONFIG_SMP */
+@@ -1978,32 +1911,28 @@ static void task_tick_fair(struct rq *rq, struct task_struct *curr, int queued)
+ }
+
+ /*
+- * called on fork with the child task as argument from the parent's context
+- * - child not yet on the tasklist
+- * - preemption disabled
++ * Share the fairness runtime between parent and child, thus the
++ * total amount of pressure for CPU stays equal - new tasks
++ * get a chance to run but frequent forkers are not allowed to
++ * monopolize the CPU. Note: the parent runqueue is locked,
++ * the child is not running yet.
+ */
+-static void task_fork_fair(struct task_struct *p)
++static void task_new_fair(struct rq *rq, struct task_struct *p)
+ {
+- struct cfs_rq *cfs_rq = task_cfs_rq(current);
++ struct cfs_rq *cfs_rq = task_cfs_rq(p);
+ struct sched_entity *se = &p->se, *curr = cfs_rq->curr;
+ int this_cpu = smp_processor_id();
+- struct rq *rq = this_rq();
+- unsigned long flags;
+-
+- spin_lock_irqsave(&rq->lock, flags);
+-
+- update_rq_clock(rq);
+
+- if (unlikely(task_cpu(p) != this_cpu))
+- __set_task_cpu(p, this_cpu);
++ sched_info_queued(p);
+
+ update_curr(cfs_rq);
+-
+ if (curr)
+ se->vruntime = curr->vruntime;
+ place_entity(cfs_rq, se, 1);
+
+- if (sysctl_sched_child_runs_first && curr && entity_before(curr, se)) {
++ /* 'curr' will be NULL if the child belongs to a different group */
++ if (sysctl_sched_child_runs_first && this_cpu == task_cpu(p) &&
++ curr && entity_before(curr, se)) {
+ /*
+ * Upon rescheduling, sched_class::put_prev_task() will place
+ * 'current' within the tree based on its new key value.
+@@ -2012,9 +1941,7 @@ static void task_fork_fair(struct task_struct *p)
+ resched_task(rq->curr);
+ }
+
+- se->vruntime -= cfs_rq->min_vruntime;
+-
+- spin_unlock_irqrestore(&rq->lock, flags);
++ enqueue_task_fair(rq, p, 0);
+ }
+
+ /*
+@@ -2067,27 +1994,30 @@ static void set_curr_task_fair(struct rq *rq)
+ }
+
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+-static void moved_group_fair(struct task_struct *p, int on_rq)
++static void moved_group_fair(struct task_struct *p)
+ {
+ struct cfs_rq *cfs_rq = task_cfs_rq(p);
+
+ update_curr(cfs_rq);
+- if (!on_rq)
+- place_entity(cfs_rq, &p->se, 1);
++ place_entity(cfs_rq, &p->se, 1);
+ }
+ #endif
+
+-unsigned int get_rr_interval_fair(struct rq *rq, struct task_struct *task)
++unsigned int get_rr_interval_fair(struct task_struct *task)
+ {
+ struct sched_entity *se = &task->se;
++ unsigned long flags;
++ struct rq *rq;
+ unsigned int rr_interval = 0;
+
+ /*
+ * Time slice is 0 for SCHED_OTHER tasks that are on an otherwise
+ * idle runqueue:
+ */
++ rq = task_rq_lock(task, &flags);
+ if (rq->cfs.load.weight)
+ rr_interval = NS_TO_JIFFIES(sched_slice(&rq->cfs, se));
++ task_rq_unlock(rq, &flags);
+
+ return rr_interval;
+ }
+@@ -2113,13 +2043,11 @@ static const struct sched_class fair_sched_class = {
+ .move_one_task = move_one_task_fair,
+ .rq_online = rq_online_fair,
+ .rq_offline = rq_offline_fair,
+-
+- .task_waking = task_waking_fair,
+ #endif
+
+ .set_curr_task = set_curr_task_fair,
+ .task_tick = task_tick_fair,
+- .task_fork = task_fork_fair,
++ .task_new = task_new_fair,
+
+ .prio_changed = prio_changed_fair,
+ .switched_to = switched_to_fair,
+diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
+index 93ad2e7..b133a28 100644
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -6,8 +6,7 @@
+ */
+
+ #ifdef CONFIG_SMP
+-static int
+-select_task_rq_idle(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
++static int select_task_rq_idle(struct task_struct *p, int sd_flag, int flags)
+ {
+ return task_cpu(p); /* IDLE tasks as never migrated */
+ }
+@@ -98,7 +97,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
+ check_preempt_curr(rq, p, 0);
+ }
+
+-unsigned int get_rr_interval_idle(struct rq *rq, struct task_struct *task)
++unsigned int get_rr_interval_idle(struct task_struct *task)
+ {
+ return 0;
+ }
+diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
+index af24fab..a4d790c 100644
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -194,7 +194,7 @@ static inline struct rt_rq *group_rt_rq(struct sched_rt_entity *rt_se)
+ return rt_se->my_q;
+ }
+
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head);
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se);
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se);
+
+ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+@@ -204,7 +204,7 @@ static void sched_rt_rq_enqueue(struct rt_rq *rt_rq)
+
+ if (rt_rq->rt_nr_running) {
+ if (rt_se && !on_rt_rq(rt_se))
+- enqueue_rt_entity(rt_se, false);
++ enqueue_rt_entity(rt_se);
+ if (rt_rq->highest_prio.curr < curr->prio)
+ resched_task(curr);
+ }
+@@ -803,7 +803,7 @@ void dec_rt_tasks(struct sched_rt_entity *rt_se, struct rt_rq *rt_rq)
+ dec_rt_group(rt_se, rt_rq);
+ }
+
+-static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
++static void __enqueue_rt_entity(struct sched_rt_entity *rt_se)
+ {
+ struct rt_rq *rt_rq = rt_rq_of_se(rt_se);
+ struct rt_prio_array *array = &rt_rq->active;
+@@ -819,10 +819,7 @@ static void __enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
+ if (group_rq && (rt_rq_throttled(group_rq) || !group_rq->rt_nr_running))
+ return;
+
+- if (head)
+- list_add(&rt_se->run_list, queue);
+- else
+- list_add_tail(&rt_se->run_list, queue);
++ list_add_tail(&rt_se->run_list, queue);
+ __set_bit(rt_se_prio(rt_se), array->bitmap);
+
+ inc_rt_tasks(rt_se, rt_rq);
+@@ -859,11 +856,11 @@ static void dequeue_rt_stack(struct sched_rt_entity *rt_se)
+ }
+ }
+
+-static void enqueue_rt_entity(struct sched_rt_entity *rt_se, bool head)
++static void enqueue_rt_entity(struct sched_rt_entity *rt_se)
+ {
+ dequeue_rt_stack(rt_se);
+ for_each_sched_rt_entity(rt_se)
+- __enqueue_rt_entity(rt_se, head);
++ __enqueue_rt_entity(rt_se);
+ }
+
+ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+@@ -874,22 +871,21 @@ static void dequeue_rt_entity(struct sched_rt_entity *rt_se)
+ struct rt_rq *rt_rq = group_rt_rq(rt_se);
+
+ if (rt_rq && rt_rq->rt_nr_running)
+- __enqueue_rt_entity(rt_se, false);
++ __enqueue_rt_entity(rt_se);
+ }
+ }
+
+ /*
+ * Adding/removing a task to/from a priority array:
+ */
+-static void
+-enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup, bool head)
++static void enqueue_task_rt(struct rq *rq, struct task_struct *p, int wakeup)
+ {
+ struct sched_rt_entity *rt_se = &p->rt;
+
+ if (wakeup)
+ rt_se->timeout = 0;
+
+- enqueue_rt_entity(rt_se, head);
++ enqueue_rt_entity(rt_se);
+
+ if (!task_current(rq, p) && p->rt.nr_cpus_allowed > 1)
+ enqueue_pushable_task(rq, p);
+@@ -942,9 +938,10 @@ static void yield_task_rt(struct rq *rq)
+ #ifdef CONFIG_SMP
+ static int find_lowest_rq(struct task_struct *task);
+
+-static int
+-select_task_rq_rt(struct rq *rq, struct task_struct *p, int sd_flag, int flags)
++static int select_task_rq_rt(struct task_struct *p, int sd_flag, int flags)
+ {
++ struct rq *rq = task_rq(p);
++
+ if (sd_flag != SD_BALANCE_WAKE)
+ return smp_processor_id();
+
+@@ -1488,7 +1485,7 @@ static void post_schedule_rt(struct rq *rq)
+ * If we are not running and we are not going to reschedule soon, we should
+ * try to push tasks away now
+ */
+-static void task_woken_rt(struct rq *rq, struct task_struct *p)
++static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+ {
+ if (!task_running(rq, p) &&
+ !test_tsk_need_resched(rq->curr) &&
+@@ -1737,7 +1734,7 @@ static void set_curr_task_rt(struct rq *rq)
+ dequeue_pushable_task(rq, p);
+ }
+
+-unsigned int get_rr_interval_rt(struct rq *rq, struct task_struct *task)
++unsigned int get_rr_interval_rt(struct task_struct *task)
+ {
+ /*
+ * Time slice is 0 for SCHED_FIFO tasks
+@@ -1769,7 +1766,7 @@ static const struct sched_class rt_sched_class = {
+ .rq_offline = rq_offline_rt,
+ .pre_schedule = pre_schedule_rt,
+ .post_schedule = post_schedule_rt,
+- .task_woken = task_woken_rt,
++ .task_wake_up = task_wake_up_rt,
+ .switched_from = switched_from_rt,
+ #endif
+
Added: dists/sid/linux-2.6/debian/patches/debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch Mon Sep 20 23:25:04 2010 (r16338)
@@ -0,0 +1,88 @@
+From 960bb81153ec66609799b8f1c072b9266629e765 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Mon, 20 Sep 2010 23:07:08 +0100
+Subject: [PATCH] Revert "sched: Pre-compute cpumask_weight(sched_domain_span(sd))"
+
+This reverts commit 6efd9bbce0d4b02d295f28054caa74e6edf811b7
+which is an ABI breaker.
+---
+ include/linux/sched.h | 1 -
+ kernel/sched.c | 7 ++-----
+ kernel/sched_fair.c | 8 +++++---
+ 3 files changed, 7 insertions(+), 9 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 2246de3..682d6d4 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1003,7 +1003,6 @@ struct sched_domain {
+ char *name;
+ #endif
+
+- unsigned int span_weight;
+ /*
+ * Span of all CPUs in this domain.
+ *
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 152214d..98d4048 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -3678,7 +3678,7 @@ unsigned long __weak arch_scale_freq_power(struct sched_domain *sd, int cpu)
+
+ unsigned long default_scale_smt_power(struct sched_domain *sd, int cpu)
+ {
+- unsigned long weight = sd->span_weight;
++ unsigned long weight = cpumask_weight(sched_domain_span(sd));
+ unsigned long smt_gain = sd->smt_gain;
+
+ smt_gain /= weight;
+@@ -3711,7 +3711,7 @@ unsigned long scale_rt_power(int cpu)
+
+ static void update_cpu_power(struct sched_domain *sd, int cpu)
+ {
+- unsigned long weight = sd->span_weight;
++ unsigned long weight = cpumask_weight(sched_domain_span(sd));
+ unsigned long power = SCHED_LOAD_SCALE;
+ struct sched_group *sdg = sd->groups;
+
+@@ -8166,9 +8166,6 @@ cpu_attach_domain(struct sched_domain *sd, struct root_domain *rd, int cpu)
+ struct rq *rq = cpu_rq(cpu);
+ struct sched_domain *tmp;
+
+- for (tmp = sd; tmp; tmp = tmp->parent)
+- tmp->span_weight = cpumask_weight(sched_domain_span(tmp));
+-
+ /* Remove the sched domains which do not contribute to scheduling. */
+ for (tmp = sd; tmp; ) {
+ struct sched_domain *parent = tmp->parent;
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index 01e311e..623b876 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -1516,7 +1516,9 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+ * Pick the largest domain to update shares over
+ */
+ tmp = sd;
+- if (affine_sd && (!tmp || affine_sd->span_weight > sd->span_weight))
++ if (affine_sd && (!tmp ||
++ cpumask_weight(sched_domain_span(affine_sd)) >
++ cpumask_weight(sched_domain_span(sd))))
+ tmp = affine_sd;
+
+ if (tmp) {
+@@ -1562,10 +1564,10 @@ select_task_rq_fair(struct rq *rq, struct task_struct *p, int sd_flag, int wake_
+
+ /* Now try balancing at a lower domain level of new_cpu */
+ cpu = new_cpu;
+- weight = sd->span_weight;
++ weight = cpumask_weight(sched_domain_span(sd));
+ sd = NULL;
+ for_each_domain(cpu, tmp) {
+- if (weight <= tmp->span_weight)
++ if (weight <= cpumask_weight(sched_domain_span(tmp)))
+ break;
+ if (tmp->flags & sd_flag)
+ sd = tmp;
+--
+1.7.1
+
Added: dists/sid/linux-2.6/debian/patches/debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch Mon Sep 20 23:25:04 2010 (r16338)
@@ -0,0 +1,75 @@
+From 24b6233b4e4bdb7b45dc28e399595a79874e3ec0 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Mon, 20 Sep 2010 23:20:13 +0100
+Subject: [PATCH] sched: Avoid ABI change due to sched_class changes
+
+struct sched_class is private to the scheduler, but since it is
+defined in <linux/sched.h> it affects the symbol version of many
+exported symbols. Hide the changes from genksyms since it should not
+consider sched_class as part of the exported ABI.
+---
+ include/linux/sched.h | 41 +++++++++++++++++++++++++++++++++++++++++
+ 1 files changed, 41 insertions(+), 0 deletions(-)
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 682d6d4..1184379 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -1071,6 +1071,7 @@ struct sched_domain;
+ #define WF_SYNC 0x01 /* waker goes to sleep after wakup */
+ #define WF_FORK 0x02 /* child wakeup after fork */
+
++#ifndef __GENKSYMS__
+ struct sched_class {
+ const struct sched_class *next;
+
+@@ -1126,6 +1127,46 @@ struct sched_class {
+ void (*moved_group) (struct task_struct *p, int on_rq);
+ #endif
+ };
++#else /* __GENKSYMS__ */
++/*
++ * struct sched_class is private to the scheduler, but since it is
++ * defined here it affects the symbol version of many exported symbols.
++ * This is a fake definition purely to keep symbol versions stable.
++ */
++struct sched_class {
++ const struct sched_class *next;
++ void (*enqueue_task) (struct rq *, struct task_struct *, int);
++ void (*dequeue_task) (struct rq *, struct task_struct *, int);
++ void (*yield_task) (struct rq *);
++ void (*check_preempt_curr) (struct rq *, struct task_struct *, int);
++ struct task_struct * (*pick_next_task) (struct rq *);
++ void (*put_prev_task) (struct rq *, struct task_struct *);
++#ifdef CONFIG_SMP
++ int (*select_task_rq)(struct task_struct *, int, int);
++ unsigned long (*load_balance) (struct rq *, int, struct rq *,
++ unsigned long, struct sched_domain *,
++ enum cpu_idle_type, int *, int *);
++ int (*move_one_task) (struct rq *, int, struct rq *,
++ struct sched_domain *, enum cpu_idle_type);
++ void (*pre_schedule) (struct rq *, struct task_struct *);
++ void (*post_schedule) (struct rq *);
++ void (*task_wake_up) (struct rq *, struct task_struct *);
++ void (*set_cpus_allowed)(struct task_struct *, const struct cpumask *);
++ void (*rq_online)(struct rq *);
++ void (*rq_offline)(struct rq *);
++#endif
++ void (*set_curr_task) (struct rq *);
++ void (*task_tick) (struct rq *, struct task_struct *, int);
++ void (*task_new) (struct rq *, struct task_struct *);
++ void (*switched_from) (struct rq *, struct task_struct *, int);
++ void (*switched_to) (struct rq *, struct task_struct *, int);
++ void (*prio_changed) (struct rq *, struct task_struct *, int, int);
++ unsigned int (*get_rr_interval) (struct task_struct *);
++#ifdef CONFIG_FAIR_GROUP_SCHED
++ void (*moved_group) (struct task_struct *);
++#endif
++};
++#endif /* __GENKSYMS__ */
+
+ struct load_weight {
+ unsigned long weight, inv_weight;
+--
+1.7.1
+
Modified: dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch Mon Sep 20 20:58:50 2010 (r16337)
+++ dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch Mon Sep 20 23:25:04 2010 (r16338)
@@ -1,4 +1,4 @@
-bwh: Adjust context in fs/ext4/ext4.h, fs/btrfs/super.h
+bwh: Adjust context in fs/ext4/ext4.h, fs/btrfs/super.h, kernel/sched.c
diff -NurpP --minimal linux-2.6.32.1/arch/alpha/Kconfig linux-2.6.32.1-vs2.3.0.36.27/arch/alpha/Kconfig
--- linux-2.6.32.1/arch/alpha/Kconfig 2009-12-03 20:01:49.000000000 +0100
@@ -15795,8 +15795,8 @@
rq_weight += weight;
@@ -1811,6 +1841,175 @@ static void cfs_rq_set_shares(struct cfs
-
static void calc_load_account_active(struct rq *this_rq);
+ static void update_sysctl(void);
+
+#if defined(CONFIG_RT_GROUP_SCHED) || defined(CONFIG_FAIR_GROUP_SCHED)
Modified: dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch Mon Sep 20 20:58:50 2010 (r16337)
+++ dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch Mon Sep 20 23:25:04 2010 (r16338)
@@ -15,6 +15,8 @@
$ git diff debian-base..debian-pvops
+[bwh: Fix context in drivers/xen/events.c]
+
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 9ec8558..3e30e60 100644
--- a/Documentation/kernel-parameters.txt
@@ -15123,9 +15125,9 @@
{
return cpu_evtchn_mask_p[cpu].bits;
@@ -110,6 +126,7 @@ static inline unsigned long *cpu_evtchn_mask(int cpu)
- #define VALID_EVTCHN(chn) ((chn) != 0)
static struct irq_chip xen_dynamic_chip;
+ static struct irq_chip xen_percpu_chip;
+static struct irq_chip xen_pirq_chip;
/* Constructor for packed IRQ information. */
Modified: dists/sid/linux-2.6/debian/patches/series/24
==============================================================================
--- dists/sid/linux-2.6/debian/patches/series/24 Mon Sep 20 20:58:50 2010 (r16337)
+++ dists/sid/linux-2.6/debian/patches/series/24 Mon Sep 20 23:25:04 2010 (r16338)
@@ -3,3 +3,34 @@
+ bugfix/all/SCSI-scsi_dh_emc-request-flag-cleanup.patch
+ features/arm/mach-types-update-2010-09-09.patch
+ bugfix/all/sound-use-semicolons-to-end-statements.patch
+- bugfix/x86/compat-retruncate-rax-after-ia32-syscall-entry-tracing.patch
+- bugfix/all/wireless-extensions-fix-kernel-heap-content-leak.patch
+- bugfix/all/compat-make-compat_alloc_user_space-incorporate-the-access_ok.patch
+- bugfix/x86/compat-test-rax-for-the-syscall-number-not-eax.patch
+- bugfix/all/tun-Dont-add-sysfs-attributes-to-devices-without-sysfs-dirs.patch
+- features/all/xen/pvhvm/0017-xen-pvhvm-make-it-clearer-that-XEN_UNPLUG_-define.patch
+- features/all/xen/pvhvm/0016-xen-pvhvm-rename-xen_emul_unplug-ignore-to-unnnec.patch
+- features/all/xen/pvhvm/0015-xen-pvhvm-allow-user-to-request-no-emulated-device.patch
+- features/all/xen/pvhvm/0013-Introduce-CONFIG_XEN_PVHVM-compile-option.patch
+- features/all/xen/pvhvm/0012-blkfront-do-not-create-a-PV-cdrom-device-if-xen_hvm.patch
+- features/all/xen/pvhvm/0009-x86-Call-HVMOP_pagetable_dying-on-exit_mmap.patch
+- features/all/xen/pvhvm/0008-x86-Unplug-emulated-disks-and-nics.patch
+- features/all/xen/pvhvm/0007-x86-Use-xen_vcpuop_clockevent-xen_clocksource-and.patch
+- features/all/xen/pvhvm/0005-xen-Add-suspend-resume-support-for-PV-on-HVM-guests.patch
+- features/all/xen/pvhvm/0004-xen-Xen-PCI-platform-device-driver.patch
+- features/all/xen/pvhvm/0003-x86-xen-event-channels-delivery-on-HVM.patch
+- debian/sched-fix-conflict-between-2.6.32.7-and-vserver.patch
++ bugfix/all/stable/2.6.32.22.patch
++ features/all/xen/pvhvm/0003-x86-xen-event-channels-delivery-on-HVM.patch
++ features/all/xen/pvhvm/0004-xen-Xen-PCI-platform-device-driver.patch
++ features/all/xen/pvhvm/0005-xen-Add-suspend-resume-support-for-PV-on-HVM-guests.patch
++ features/all/xen/pvhvm/0007-x86-Use-xen_vcpuop_clockevent-xen_clocksource-and.patch
++ features/all/xen/pvhvm/0008-x86-Unplug-emulated-disks-and-nics.patch
++ features/all/xen/pvhvm/0009-x86-Call-HVMOP_pagetable_dying-on-exit_mmap.patch
++ features/all/xen/pvhvm/0012-blkfront-do-not-create-a-PV-cdrom-device-if-xen_hvm.patch
++ features/all/xen/pvhvm/0013-Introduce-CONFIG_XEN_PVHVM-compile-option.patch
++ features/all/xen/pvhvm/0015-xen-pvhvm-allow-user-to-request-no-emulated-device.patch
++ features/all/xen/pvhvm/0016-xen-pvhvm-rename-xen_emul_unplug-ignore-to-unnnec.patch
++ features/all/xen/pvhvm/0017-xen-pvhvm-make-it-clearer-that-XEN_UNPLUG_-define.patch
++ debian/revert-sched-Pre-compute-cpumask_weight-sched_domain.patch
++ debian/sched-Avoid-ABI-change-due-to-sched_class-changes.patch
Copied and modified: dists/sid/linux-2.6/debian/patches/series/24-extra (from r16335, dists/sid/linux-2.6/debian/patches/series/23-extra)
==============================================================================
--- dists/sid/linux-2.6/debian/patches/series/23-extra Sun Sep 19 02:12:56 2010 (r16335, copy source)
+++ dists/sid/linux-2.6/debian/patches/series/24-extra Mon Sep 20 23:25:04 2010 (r16338)
@@ -1,8 +1,10 @@
++ debian/revert-sched-2.6.32.22-changes.patch featureset=openvz
+ features/all/openvz/openvz.patch featureset=openvz
+ features/all/openvz/revert-cgroup-lite-add-cgroup-id-for-blk-cgroups.patch featureset=openvz
+ features/all/openvz/partially-revert-CPT-Replace-legacy-net-statistics.patch featureset=openvz
+ features/all/openvz/cfq-iosched-do-not-force-idling-for-sync-workload.patch featureset=openvz
++ debian/revert-sched-2.6.32.22-changes.patch featureset=vserver
+ features/all/vserver/vs2.3.0.36.27.patch featureset=vserver
+ features/all/vserver/s390-buildfix.patch featureset=vserver
+ features/all/vserver/ia64-buildfix.patch featureset=vserver
More information about the Kernel-svn-changes
mailing list