[kernel] r11341 - in dists/trunk/linux-2.6/debian/patches: bugfix/all series

Fri May 9 12:24:11 UTC 2008

Author: maks
Date: Fri May  9 12:24:09 2008
New Revision: 11341

Log:
update to patch-2.6.26-rc1-git7

nuke upstream merge media build fix
first patch conflict too, ah the pleasure of pruning


Added:
   dists/trunk/linux-2.6/debian/patches/bugfix/all/patch-2.6.26-rc1-git7
Removed:
   dists/trunk/linux-2.6/debian/patches/bugfix/all/drivers-media-build.patch
   dists/trunk/linux-2.6/debian/patches/bugfix/all/patch-2.6.26-rc1-git6
Modified:
   dists/trunk/linux-2.6/debian/patches/series/1~experimental.1

Added: dists/trunk/linux-2.6/debian/patches/bugfix/all/patch-2.6.26-rc1-git7
==============================================================================

--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/bugfix/all/patch-2.6.26-rc1-git7	Fri May  9 12:24:09 2008
@@ -0,0 +1,18538 @@
+diff --git a/Documentation/DocBook/kgdb.tmpl b/Documentation/DocBook/kgdb.tmpl
+index 97618be..028a844 100644
+--- a/Documentation/DocBook/kgdb.tmpl
++++ b/Documentation/DocBook/kgdb.tmpl
+@@ -72,7 +72,7 @@
+     kgdb is a source level debugger for linux kernel. It is used along
+     with gdb to debug a linux kernel.  The expectation is that gdb can
+     be used to "break in" to the kernel to inspect memory, variables
+-    and look through a cal stack information similar to what an
++    and look through call stack information similar to what an
+     application developer would use gdb for.  It is possible to place
+     breakpoints in kernel code and perform some limited execution
+     stepping.
+@@ -93,8 +93,10 @@
+   <chapter id="CompilingAKernel">
+     <title>Compiling a kernel</title>
+     <para>
+-    To enable <symbol>CONFIG_KGDB</symbol>, look under the "Kernel debugging"
+-    and then select "KGDB: kernel debugging with remote gdb".
++    To enable <symbol>CONFIG_KGDB</symbol> you should first turn on
++    "Prompt for development and/or incomplete code/drivers"
++    (CONFIG_EXPERIMENTAL) in  "General setup", then under the
++    "Kernel debugging" select "KGDB: kernel debugging with remote gdb".
+     </para>
+     <para>
+     Next you should choose one of more I/O drivers to interconnect debugging
+diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
+index c2992bc..8b22d7d 100644
+--- a/Documentation/filesystems/Locking
++++ b/Documentation/filesystems/Locking
+@@ -92,7 +92,6 @@ prototypes:
+ 	void (*destroy_inode)(struct inode *);
+ 	void (*dirty_inode) (struct inode *);
+ 	int (*write_inode) (struct inode *, int);
+-	void (*put_inode) (struct inode *);
+ 	void (*drop_inode) (struct inode *);
+ 	void (*delete_inode) (struct inode *);
+ 	void (*put_super) (struct super_block *);
+@@ -115,7 +114,6 @@ alloc_inode:		no	no	no
+ destroy_inode:		no
+ dirty_inode:		no				(must not sleep)
+ write_inode:		no
+-put_inode:		no
+ drop_inode:		no				!!!inode_lock!!!
+ delete_inode:		no
+ put_super:		yes	yes	no
+diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
+index 81e5be6..b7522c6 100644
+--- a/Documentation/filesystems/vfs.txt
++++ b/Documentation/filesystems/vfs.txt
+@@ -205,7 +205,6 @@ struct super_operations {
+ 
+         void (*dirty_inode) (struct inode *);
+         int (*write_inode) (struct inode *, int);
+-        void (*put_inode) (struct inode *);
+         void (*drop_inode) (struct inode *);
+         void (*delete_inode) (struct inode *);
+         void (*put_super) (struct super_block *);
+@@ -246,9 +245,6 @@ or bottom half).
+ 	inode to disc.  The second parameter indicates whether the write
+ 	should be synchronous or not, not all filesystems check this flag.
+ 
+-  put_inode: called when the VFS inode is removed from the inode
+-	cache.
+-
+   drop_inode: called when the last access to the inode is dropped,
+ 	with the inode_lock spinlock held.
+ 
+diff --git a/Documentation/kbuild/kconfig-language.txt b/Documentation/kbuild/kconfig-language.txt
+index 00b950d..c412c24 100644
+--- a/Documentation/kbuild/kconfig-language.txt
++++ b/Documentation/kbuild/kconfig-language.txt
+@@ -377,27 +377,3 @@ config FOO
+ 
+ limits FOO to module (=m) or disabled (=n).
+ 
+-
+-Build limited by a third config symbol which may be =y or =m
+-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+-A common idiom that we see (and sometimes have problems with) is this:
+-
+-When option C in B (module or subsystem) uses interfaces from A (module
+-or subsystem), and both A and B are tristate (could be =y or =m if they
+-were independent of each other, but they aren't), then we need to limit
+-C such that it cannot be built statically if A is built as a loadable
+-module.  (C already depends on B, so there is no dependency issue to
+-take care of here.)
+-
+-If A is linked statically into the kernel image, C can be built
+-statically or as loadable module(s).  However, if A is built as loadable
+-module(s), then C must be restricted to loadable module(s) also.  This
+-can be expressed in kconfig language as:
+-
+-config C
+-	depends on A = y || A = B
+-
+-or for real examples, use this command in a kernel tree:
+-
+-$ find . -name Kconfig\* | xargs grep -ns "depends on.*=.*||.*=" | grep -v orig
+-
+diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
+index a3c3544..cdd5b93 100644
+--- a/Documentation/kernel-parameters.txt
++++ b/Documentation/kernel-parameters.txt
+@@ -1094,9 +1094,6 @@ and is between 256 and 4096 characters. It is defined in the file
+ 	mac5380=	[HW,SCSI] Format:
+ 			<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
+ 
+-	mac53c9x=	[HW,SCSI] Format:
+-			<num_esps>,<disconnect>,<nosync>,<can_queue>,<cmd_per_lun>,<sg_tablesize>,<hostid>,<use_tags>
+-
+ 	machvec=	[IA64] Force the use of a particular machine-vector
+ 			(machvec) in a generic kernel.
+ 			Example: machvec=hpzx1_swiotlb
+@@ -1525,6 +1522,8 @@ and is between 256 and 4096 characters. It is defined in the file
+ 				This is normally done in pci_enable_device(),
+ 				so this option is a temporary workaround
+ 				for broken drivers that don't call it.
++		skip_isa_align	[X86] do not align io start addr, so can
++				handle more pci cards
+ 		firmware	[ARM] Do not re-enumerate the bus but instead
+ 				just use the configuration from the
+ 				bootloader. This is currently used on
+diff --git a/Documentation/s390/CommonIO b/Documentation/s390/CommonIO
+index 8fbc0a8..bf0baa1 100644
+--- a/Documentation/s390/CommonIO
++++ b/Documentation/s390/CommonIO
+@@ -8,17 +8,6 @@ Command line parameters
+ 
+   Enable logging of debug information in case of ccw device timeouts.
+ 
+-
+-* cio_msg = yes | no
+-  
+-  Determines whether information on found devices and sensed device 
+-  characteristics should be shown during startup or when new devices are
+-  found, i. e. messages of the types "Detected device 0.0.4711 on subchannel
+-  0.0.0042" and "SenseID: Device 0.0.4711 reports: ...".
+-
+-  Default is off.
+-
+-
+ * cio_ignore = {all} |
+ 	       {<device> | <range of devices>} |
+ 	       {!<device> | !<range of devices>}
+diff --git a/Documentation/scheduler/sched-design.txt b/Documentation/scheduler/sched-design.txt
+deleted file mode 100644
+index 1605bf0..0000000
+--- a/Documentation/scheduler/sched-design.txt
++++ /dev/null
+@@ -1,165 +0,0 @@
+-		   Goals, Design and Implementation of the
+-		      new ultra-scalable O(1) scheduler
+-
+-
+-  This is an edited version of an email Ingo Molnar sent to
+-  lkml on 4 Jan 2002.  It describes the goals, design, and
+-  implementation of Ingo's new ultra-scalable O(1) scheduler.
+-  Last Updated: 18 April 2002.
+-
+-
+-Goal
+-====
+-
+-The main goal of the new scheduler is to keep all the good things we know
+-and love about the current Linux scheduler:
+-
+- - good interactive performance even during high load: if the user
+-   types or clicks then the system must react instantly and must execute
+-   the user tasks smoothly, even during considerable background load.
+-
+- - good scheduling/wakeup performance with 1-2 runnable processes.
+-
+- - fairness: no process should stay without any timeslice for any
+-   unreasonable amount of time. No process should get an unjustly high
+-   amount of CPU time.
+-
+- - priorities: less important tasks can be started with lower priority,
+-   more important tasks with higher priority.
+-
+- - SMP efficiency: no CPU should stay idle if there is work to do.
+-
+- - SMP affinity: processes which run on one CPU should stay affine to
+-   that CPU. Processes should not bounce between CPUs too frequently.
+-
+- - plus additional scheduler features: RT scheduling, CPU binding.
+-
+-and the goal is also to add a few new things:
+-
+- - fully O(1) scheduling. Are you tired of the recalculation loop
+-   blowing the L1 cache away every now and then? Do you think the goodness
+-   loop is taking a bit too long to finish if there are lots of runnable
+-   processes? This new scheduler takes no prisoners: wakeup(), schedule(),
+-   the timer interrupt are all O(1) algorithms. There is no recalculation
+-   loop. There is no goodness loop either.
+-
+- - 'perfect' SMP scalability. With the new scheduler there is no 'big'
+-   runqueue_lock anymore - it's all per-CPU runqueues and locks - two
+-   tasks on two separate CPUs can wake up, schedule and context-switch
+-   completely in parallel, without any interlocking. All
+-   scheduling-relevant data is structured for maximum scalability.
+-
+- - better SMP affinity. The old scheduler has a particular weakness that
+-   causes the random bouncing of tasks between CPUs if/when higher
+-   priority/interactive tasks, this was observed and reported by many
+-   people. The reason is that the timeslice recalculation loop first needs
+-   every currently running task to consume its timeslice. But when this
+-   happens on eg. an 8-way system, then this property starves an
+-   increasing number of CPUs from executing any process. Once the last
+-   task that has a timeslice left has finished using up that timeslice,
+-   the recalculation loop is triggered and other CPUs can start executing
+-   tasks again - after having idled around for a number of timer ticks.
+-   The more CPUs, the worse this effect.
+-
+-   Furthermore, this same effect causes the bouncing effect as well:
+-   whenever there is such a 'timeslice squeeze' of the global runqueue,
+-   idle processors start executing tasks which are not affine to that CPU.
+-   (because the affine tasks have finished off their timeslices already.)
+-
+-   The new scheduler solves this problem by distributing timeslices on a
+-   per-CPU basis, without having any global synchronization or
+-   recalculation.
+-
+- - batch scheduling. A significant proportion of computing-intensive tasks
+-   benefit from batch-scheduling, where timeslices are long and processes
+-   are roundrobin scheduled. The new scheduler does such batch-scheduling
+-   of the lowest priority tasks - so nice +19 jobs will get
+-   'batch-scheduled' automatically. With this scheduler, nice +19 jobs are
+-   in essence SCHED_IDLE, from an interactiveness point of view.
+-
+- - handle extreme loads more smoothly, without breakdown and scheduling
+-   storms.
+-
+- - O(1) RT scheduling. For those RT folks who are paranoid about the
+-   O(nr_running) property of the goodness loop and the recalculation loop.
+-
+- - run fork()ed children before the parent. Andrea has pointed out the
+-   advantages of this a few months ago, but patches for this feature
+-   do not work with the old scheduler as well as they should,
+-   because idle processes often steal the new child before the fork()ing
+-   CPU gets to execute it.
+-
+-
+-Design
+-======
+-
+-The core of the new scheduler contains the following mechanisms:
+-
+- - *two* priority-ordered 'priority arrays' per CPU. There is an 'active'
+-   array and an 'expired' array. The active array contains all tasks that
+-   are affine to this CPU and have timeslices left. The expired array
+-   contains all tasks which have used up their timeslices - but this array
+-   is kept sorted as well. The active and expired array is not accessed
+-   directly, it's accessed through two pointers in the per-CPU runqueue
+-   structure. If all active tasks are used up then we 'switch' the two
+-   pointers and from now on the ready-to-go (former-) expired array is the
+-   active array - and the empty active array serves as the new collector
+-   for expired tasks.
+-
+- - there is a 64-bit bitmap cache for array indices. Finding the highest
+-   priority task is thus a matter of two x86 BSFL bit-search instructions.
+-
+-the split-array solution enables us to have an arbitrary number of active
+-and expired tasks, and the recalculation of timeslices can be done
+-immediately when the timeslice expires. Because the arrays are always
+-access through the pointers in the runqueue, switching the two arrays can
+-be done very quickly.
+-
+-this is a hybride priority-list approach coupled with roundrobin
+-scheduling and the array-switch method of distributing timeslices.
+-
+- - there is a per-task 'load estimator'.
+-
+-one of the toughest things to get right is good interactive feel during
+-heavy system load. While playing with various scheduler variants i found
+-that the best interactive feel is achieved not by 'boosting' interactive
+-tasks, but by 'punishing' tasks that want to use more CPU time than there
+-is available. This method is also much easier to do in an O(1) fashion.
+-
+-to establish the actual 'load' the task contributes to the system, a
+-complex-looking but pretty accurate method is used: there is a 4-entry
+-'history' ringbuffer of the task's activities during the last 4 seconds.
+-This ringbuffer is operated without much overhead. The entries tell the
+-scheduler a pretty accurate load-history of the task: has it used up more
+-CPU time or less during the past N seconds. [the size '4' and the interval
+-of 4x 1 seconds was found by lots of experimentation - this part is
+-flexible and can be changed in both directions.]
+-
+-the penalty a task gets for generating more load than the CPU can handle
+-is a priority decrease - there is a maximum amount to this penalty
+-relative to their static priority, so even fully CPU-bound tasks will
+-observe each other's priorities, and will share the CPU accordingly.
+-
+-the SMP load-balancer can be extended/switched with additional parallel
+-computing and cache hierarchy concepts: NUMA scheduling, multi-core CPUs
+-can be supported easily by changing the load-balancer. Right now it's
+-tuned for my SMP systems.
+-
+-i skipped the prev->mm == next->mm advantage - no workload i know of shows
+-any sensitivity to this. It can be added back by sacrificing O(1)
+-schedule() [the current and one-lower priority list can be searched for a
+-that->mm == current->mm condition], but costs a fair number of cycles
+-during a number of important workloads, so i wanted to avoid this as much
+-as possible.
+-
+-- the SMP idle-task startup code was still racy and the new scheduler
+-triggered this. So i streamlined the idle-setup code a bit. We do not call
+-into schedule() before all processors have started up fully and all idle
+-threads are in place.
+-
+-- the patch also cleans up a number of aspects of sched.c - moves code
+-into other areas of the kernel where it's appropriate, and simplifies
+-certain code paths and data constructs. As a result, the new scheduler's
+-code is smaller than the old one.
+-
+-	Ingo
+diff --git a/MAINTAINERS b/MAINTAINERS
+index abe2787..f5583dc 100644
+--- a/MAINTAINERS
++++ b/MAINTAINERS
+@@ -2112,12 +2112,10 @@ L:	netdev at vger.kernel.org
+ S:	Maintained
+ 
+ INTEL ETHERNET DRIVERS (e100/e1000/e1000e/igb/ixgb/ixgbe)
+-P:	Auke Kok
+-M:	auke-jan.h.kok at intel.com
+-P:	Jesse Brandeburg
+-M:	jesse.brandeburg at intel.com
+ P:	Jeff Kirsher
+ M:	jeffrey.t.kirsher at intel.com
++P:	Jesse Brandeburg
++M:	jesse.brandeburg at intel.com
+ P:	Bruce Allan
+ M:	bruce.w.allan at intel.com
+ P:	John Ronciak
+diff --git a/arch/arm/kernel/sys_arm.c b/arch/arm/kernel/sys_arm.c
+index 9bd1870..0128687 100644
+--- a/arch/arm/kernel/sys_arm.c
++++ b/arch/arm/kernel/sys_arm.c
+@@ -34,23 +34,6 @@ extern unsigned long do_mremap(unsigned long addr, unsigned long old_len,
+ 			       unsigned long new_len, unsigned long flags,
+ 			       unsigned long new_addr);
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-asmlinkage int sys_pipe(unsigned long __user *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ /* common code for old and new mmaps */
+ inline long do_mmap2(
+ 	unsigned long addr, unsigned long len,
+diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
+index 968deb5..0ecff5a 100644
+--- a/arch/arm/mach-orion5x/common.c
++++ b/arch/arm/mach-orion5x/common.c
+@@ -223,7 +223,9 @@ static struct platform_device orion5x_eth = {
+ 
+ void __init orion5x_eth_init(struct mv643xx_eth_platform_data *eth_data)
+ {
++	eth_data->shared = &orion5x_eth_shared;
+ 	orion5x_eth.dev.platform_data = eth_data;
++
+ 	platform_device_register(&orion5x_eth_shared);
+ 	platform_device_register(&orion5x_eth);
+ }
+diff --git a/arch/avr32/kernel/sys_avr32.c b/arch/avr32/kernel/sys_avr32.c
+index 8deb600..8e8911e 100644
+--- a/arch/avr32/kernel/sys_avr32.c
++++ b/arch/avr32/kernel/sys_avr32.c
+@@ -14,19 +14,6 @@
+ #include <asm/mman.h>
+ #include <asm/uaccess.h>
+ 
+-asmlinkage int sys_pipe(unsigned long __user *filedes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(filedes, fd, sizeof(fd)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ 			  unsigned long prot, unsigned long flags,
+ 			  unsigned long fd, off_t offset)
+diff --git a/arch/blackfin/kernel/sys_bfin.c b/arch/blackfin/kernel/sys_bfin.c
+index efb7b25..fce49d7 100644
+--- a/arch/blackfin/kernel/sys_bfin.c
++++ b/arch/blackfin/kernel/sys_bfin.c
+@@ -45,23 +45,6 @@
+ #include <asm/cacheflush.h>
+ #include <asm/dma.h>
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-asmlinkage int sys_pipe(unsigned long __user *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2 * sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ /* common code for old and new mmaps */
+ static inline long
+ do_mmap2(unsigned long addr, unsigned long len,
+diff --git a/arch/cris/kernel/sys_cris.c b/arch/cris/kernel/sys_cris.c
+index 8b99841..d124066 100644
+--- a/arch/cris/kernel/sys_cris.c
++++ b/arch/cris/kernel/sys_cris.c
+@@ -40,8 +40,11 @@ asmlinkage int sys_pipe(unsigned long __user * fildes)
+         error = do_pipe(fd);
+         unlock_kernel();
+         if (!error) {
+-                if (copy_to_user(fildes, fd, 2*sizeof(int)))
++                if (copy_to_user(fildes, fd, 2*sizeof(int))) {
++			sys_close(fd[0]);
++			sys_close(fd[1]);
+                         error = -EFAULT;
++		}
+         }
+         return error;
+ }
+diff --git a/arch/frv/kernel/sys_frv.c b/arch/frv/kernel/sys_frv.c
+index 04c6b16..49b2cf2 100644
+--- a/arch/frv/kernel/sys_frv.c
++++ b/arch/frv/kernel/sys_frv.c
+@@ -28,23 +28,6 @@
+ #include <asm/setup.h>
+ #include <asm/uaccess.h>
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-asmlinkage long sys_pipe(unsigned long __user * fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ 			  unsigned long prot, unsigned long flags,
+ 			  unsigned long fd, unsigned long pgoff)
+diff --git a/arch/h8300/kernel/sys_h8300.c b/arch/h8300/kernel/sys_h8300.c
+index 00608be..2745656 100644
+--- a/arch/h8300/kernel/sys_h8300.c
++++ b/arch/h8300/kernel/sys_h8300.c
+@@ -27,23 +27,6 @@
+ #include <asm/traps.h>
+ #include <asm/unistd.h>
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-asmlinkage int sys_pipe(unsigned long * fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ /* common code for old and new mmaps */
+ static inline long do_mmap2(
+ 	unsigned long addr, unsigned long len,
+diff --git a/arch/m32r/kernel/sys_m32r.c b/arch/m32r/kernel/sys_m32r.c
+index 6d7a80f..319c797 100644
+--- a/arch/m32r/kernel/sys_m32r.c
++++ b/arch/m32r/kernel/sys_m32r.c
+@@ -90,8 +90,11 @@ sys_pipe(unsigned long r0, unsigned long r1, unsigned long r2,
+ 
+ 	error = do_pipe(fd);
+ 	if (!error) {
+-		if (copy_to_user((void __user *)r0, fd, 2*sizeof(int)))
++		if (copy_to_user((void __user *)r0, fd, 2*sizeof(int))) {
++			sys_close(fd[0]);
++			sys_close(fd[1]);
+ 			error = -EFAULT;
++		}
+ 	}
+ 	return error;
+ }
+diff --git a/arch/m68k/kernel/sys_m68k.c b/arch/m68k/kernel/sys_m68k.c
+index e892f17..7f54efa 100644
+--- a/arch/m68k/kernel/sys_m68k.c
++++ b/arch/m68k/kernel/sys_m68k.c
+@@ -30,23 +30,6 @@
+ #include <asm/page.h>
+ #include <asm/unistd.h>
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-asmlinkage int sys_pipe(unsigned long __user * fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ /* common code for old and new mmaps */
+ static inline long do_mmap2(
+ 	unsigned long addr, unsigned long len,
+diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c
+index fd4858e..75b8340 100644
+--- a/arch/m68k/kernel/traps.c
++++ b/arch/m68k/kernel/traps.c
+@@ -468,15 +468,26 @@ static inline void access_error040(struct frame *fp)
+ 			 * (if do_page_fault didn't fix the mapping,
+                          * the writeback won't do good)
+ 			 */
++disable_wb:
+ #ifdef DEBUG
+ 			printk(".. disabling wb2\n");
+ #endif
+ 			if (fp->un.fmt7.wb2a == fp->un.fmt7.faddr)
+ 				fp->un.fmt7.wb2s &= ~WBV_040;
++			if (fp->un.fmt7.wb3a == fp->un.fmt7.faddr)
++				fp->un.fmt7.wb3s &= ~WBV_040;
+ 		}
+-	} else if (send_fault_sig(&fp->ptregs) > 0) {
+-		printk("68040 access error, ssw=%x\n", ssw);
+-		trap_c(fp);
++	} else {
++		/* In case of a bus error we either kill the process or expect
++		 * the kernel to catch the fault, which then is also responsible
++		 * for cleaning up the mess.
++		 */
++		current->thread.signo = SIGBUS;
++		current->thread.faddr = fp->un.fmt7.faddr;
++		if (send_fault_sig(&fp->ptregs) >= 0)
++			printk("68040 bus error (ssw=%x, faddr=%lx)\n", ssw,
++			       fp->un.fmt7.faddr);
++		goto disable_wb;
+ 	}
+ 
+ 	do_040writebacks(fp);
+diff --git a/arch/m68k/mac/config.c b/arch/m68k/mac/config.c
+index 735a49b..ad3e3ba 100644
+--- a/arch/m68k/mac/config.c
++++ b/arch/m68k/mac/config.c
+@@ -48,9 +48,6 @@
+ struct mac_booter_data mac_bi_data;
+ int mac_bisize = sizeof mac_bi_data;
+ 
+-struct mac_hw_present mac_hw_present;
+-EXPORT_SYMBOL(mac_hw_present);
+-
+ /* New m68k bootinfo stuff and videobase */
+ 
+ extern int m68k_num_memory;
+@@ -817,27 +814,6 @@ void __init mac_identify(void)
+ 		m68k_ramdisk.addr, m68k_ramdisk.size);
+ #endif
+ 
+-	/*
+-	 * TODO: set the various fields in macintosh_config->hw_present here!
+-	 */
+-	switch (macintosh_config->scsi_type) {
+-	case MAC_SCSI_OLD:
+-		MACHW_SET(MAC_SCSI_80);
+-		break;
+-	case MAC_SCSI_QUADRA:
+-	case MAC_SCSI_QUADRA2:
+-	case MAC_SCSI_QUADRA3:
+-		MACHW_SET(MAC_SCSI_96);
+-		if ((macintosh_config->ident == MAC_MODEL_Q900) ||
+-		    (macintosh_config->ident == MAC_MODEL_Q950))
+-			MACHW_SET(MAC_SCSI_96_2);
+-		break;
+-	default:
+-		printk(KERN_WARNING "config.c: wtf: unknown scsi, using 53c80\n");
+-		MACHW_SET(MAC_SCSI_80);
+-		break;
+-	}
+-
+ 	iop_init();
+ 	via_init();
+ 	oss_init();
+diff --git a/arch/m68knommu/kernel/sys_m68k.c b/arch/m68knommu/kernel/sys_m68k.c
+index 65f7a95..7002816 100644
+--- a/arch/m68knommu/kernel/sys_m68k.c
++++ b/arch/m68knommu/kernel/sys_m68k.c
+@@ -28,23 +28,6 @@
+ #include <asm/cacheflush.h>
+ #include <asm/unistd.h>
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-asmlinkage int sys_pipe(unsigned long * fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ /* common code for old and new mmaps */
+ static inline long do_mmap2(
+ 	unsigned long addr, unsigned long len,
+diff --git a/arch/mn10300/kernel/sys_mn10300.c b/arch/mn10300/kernel/sys_mn10300.c
+index 5f17a1e..bca5a84 100644
+--- a/arch/mn10300/kernel/sys_mn10300.c
++++ b/arch/mn10300/kernel/sys_mn10300.c
+@@ -29,23 +29,6 @@
+ #define MIN_MAP_ADDR	PAGE_SIZE	/* minimum fixed mmap address */
+ 
+ /*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way Unix traditionally does this, though.
+- */
+-asmlinkage long sys_pipe(unsigned long __user *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2 * sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+-/*
+  * memory mapping syscall
+  */
+ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c
+index 4f58921..71b3195 100644
+--- a/arch/parisc/kernel/sys_parisc.c
++++ b/arch/parisc/kernel/sys_parisc.c
+@@ -33,19 +33,6 @@
+ #include <linux/utsname.h>
+ #include <linux/personality.h>
+ 
+-int sys_pipe(int __user *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ static unsigned long get_unshared_area(unsigned long addr, unsigned long len)
+ {
+ 	struct vm_area_struct *vma;
+diff --git a/arch/powerpc/kernel/syscalls.c b/arch/powerpc/kernel/syscalls.c
+index e722a4e..4fe69ca 100644
+--- a/arch/powerpc/kernel/syscalls.c
++++ b/arch/powerpc/kernel/syscalls.c
+@@ -136,23 +136,6 @@ int sys_ipc(uint call, int first, unsigned long second, long third,
+ 	return ret;
+ }
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-int sys_pipe(int __user *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ static inline unsigned long do_mmap2(unsigned long addr, size_t len,
+ 			unsigned long prot, unsigned long flags,
+ 			unsigned long fd, unsigned long off, int shift)
+diff --git a/arch/powerpc/kvm/booke_guest.c b/arch/powerpc/kvm/booke_guest.c
+index 6d9884a..712d89a 100644
+--- a/arch/powerpc/kvm/booke_guest.c
++++ b/arch/powerpc/kvm/booke_guest.c
+@@ -49,6 +49,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
+ 	{ "inst_emu",   VCPU_STAT(emulated_inst_exits) },
+ 	{ "dec",        VCPU_STAT(dec_exits) },
+ 	{ "ext_intr",   VCPU_STAT(ext_intr_exits) },
++	{ "halt_wakeup", VCPU_STAT(halt_wakeup) },
+ 	{ NULL }
+ };
+ 
+@@ -338,6 +339,11 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
+ 		}
+ 		break;
+ 
++	case BOOKE_INTERRUPT_FP_UNAVAIL:
++		kvmppc_queue_exception(vcpu, exit_nr);
++		r = RESUME_GUEST;
++		break;
++
+ 	case BOOKE_INTERRUPT_DATA_STORAGE:
+ 		vcpu->arch.dear = vcpu->arch.fault_dear;
+ 		vcpu->arch.esr = vcpu->arch.fault_esr;
+diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
+index bad40bd..777e0f3 100644
+--- a/arch/powerpc/kvm/powerpc.c
++++ b/arch/powerpc/kvm/powerpc.c
+@@ -36,13 +36,12 @@ gfn_t unalias_gfn(struct kvm *kvm, gfn_t gfn)
+ 
+ int kvm_cpu_has_interrupt(struct kvm_vcpu *v)
+ {
+-	/* XXX implement me */
+-	return 0;
++	return !!(v->arch.pending_exceptions);
+ }
+ 
+ int kvm_arch_vcpu_runnable(struct kvm_vcpu *v)
+ {
+-	return 1;
++	return !(v->arch.msr & MSR_WE);
+ }
+ 
+ 
+@@ -214,6 +213,11 @@ static void kvmppc_decrementer_func(unsigned long data)
+ 	struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
+ 
+ 	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_DECREMENTER);
++
++	if (waitqueue_active(&vcpu->wq)) {
++		wake_up_interruptible(&vcpu->wq);
++		vcpu->stat.halt_wakeup++;
++	}
+ }
+ 
+ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+@@ -339,6 +343,8 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+ 	int r;
+ 	sigset_t sigsaved;
+ 
++	vcpu_load(vcpu);
++
+ 	if (vcpu->sigset_active)
+ 		sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved);
+ 
+@@ -363,12 +369,20 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run)
+ 	if (vcpu->sigset_active)
+ 		sigprocmask(SIG_SETMASK, &sigsaved, NULL);
+ 
++	vcpu_put(vcpu);
++
+ 	return r;
+ }
+ 
+ int kvm_vcpu_ioctl_interrupt(struct kvm_vcpu *vcpu, struct kvm_interrupt *irq)
+ {
+ 	kvmppc_queue_exception(vcpu, BOOKE_INTERRUPT_EXTERNAL);
++
++	if (waitqueue_active(&vcpu->wq)) {
++		wake_up_interruptible(&vcpu->wq);
++		vcpu->stat.halt_wakeup++;
++	}
++
+ 	return 0;
+ }
+ 
+diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
+index 4bb023f..f1d2cdc 100644
+--- a/arch/powerpc/lib/Makefile
++++ b/arch/powerpc/lib/Makefile
+@@ -23,3 +23,4 @@ obj-$(CONFIG_SMP)	+= locks.o
+ endif
+ 
+ obj-$(CONFIG_PPC_LIB_RHEAP) += rheap.o
++obj-$(CONFIG_HAS_IOMEM)	+= devres.o
+diff --git a/arch/powerpc/lib/devres.c b/arch/powerpc/lib/devres.c
+new file mode 100644
+index 0000000..292115d
+--- /dev/null
++++ b/arch/powerpc/lib/devres.c
+@@ -0,0 +1,42 @@
++/*
++ * Copyright (C) 2008 Freescale Semiconductor, Inc.
++ *
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#include <linux/device.h>	/* devres_*(), devm_ioremap_release() */
++#include <linux/io.h>		/* ioremap_flags() */
++#include <linux/module.h>	/* EXPORT_SYMBOL() */
++
++/**
++ * devm_ioremap_prot - Managed ioremap_flags()
++ * @dev: Generic device to remap IO address for
++ * @offset: BUS offset to map
++ * @size: Size of map
++ * @flags: Page flags
++ *
++ * Managed ioremap_prot().  Map is automatically unmapped on driver
++ * detach.
++ */
++void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
++				 size_t size, unsigned long flags)
++{
++	void __iomem **ptr, *addr;
++
++	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
++	if (!ptr)
++		return NULL;
++
++	addr = ioremap_flags(offset, size, flags);
++	if (addr) {
++		*ptr = addr;
++		devres_add(dev, ptr);
++	} else
++		devres_free(ptr);
++
++	return addr;
++}
++EXPORT_SYMBOL(devm_ioremap_prot);
+diff --git a/arch/powerpc/platforms/chrp/pegasos_eth.c b/arch/powerpc/platforms/chrp/pegasos_eth.c
+index 5bcc58d..130ff72 100644
+--- a/arch/powerpc/platforms/chrp/pegasos_eth.c
++++ b/arch/powerpc/platforms/chrp/pegasos_eth.c
+@@ -58,7 +58,9 @@ static struct resource mv643xx_eth0_resources[] = {
+ 
+ 
+ static struct mv643xx_eth_platform_data eth0_pd = {
++	.shared		= &mv643xx_eth_shared_device,
+ 	.port_number	= 0,
++
+ 	.tx_sram_addr = PEGASOS2_SRAM_BASE_ETH0,
+ 	.tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE,
+ 	.tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16,
+@@ -88,7 +90,9 @@ static struct resource mv643xx_eth1_resources[] = {
+ };
+ 
+ static struct mv643xx_eth_platform_data eth1_pd = {
++	.shared		= &mv643xx_eth_shared_device,
+ 	.port_number	= 1,
++
+ 	.tx_sram_addr = PEGASOS2_SRAM_BASE_ETH1,
+ 	.tx_sram_size = PEGASOS2_SRAM_TXRING_SIZE,
+ 	.tx_queue_size = PEGASOS2_SRAM_TXRING_SIZE/16,
+diff --git a/arch/powerpc/platforms/pseries/scanlog.c b/arch/powerpc/platforms/pseries/scanlog.c
+index bec3803..417eca7 100644
+--- a/arch/powerpc/platforms/pseries/scanlog.c
++++ b/arch/powerpc/platforms/pseries/scanlog.c
+@@ -55,11 +55,6 @@ static ssize_t scanlog_read(struct file *file, char __user *buf,
+         dp = PDE(inode);
+  	data = (unsigned int *)dp->data;
+ 
+-	if (!data) {
+-		printk(KERN_ERR "scanlog: read failed no data\n");
+-		return -EIO;
+-	}
+-
+ 	if (count > RTAS_DATA_BUF_SIZE)
+ 		count = RTAS_DATA_BUF_SIZE;
+ 
+@@ -146,11 +141,6 @@ static int scanlog_open(struct inode * inode, struct file * file)
+ 	struct proc_dir_entry *dp = PDE(inode);
+ 	unsigned int *data = (unsigned int *)dp->data;
+ 
+-	if (!data) {
+-		printk(KERN_ERR "scanlog: open failed no data\n");
+-		return -EIO;
+-	}
+-
+ 	if (data[0] != 0) {
+ 		/* This imperfect test stops a second copy of the
+ 		 * data (or a reset while data is being copied)
+@@ -168,10 +158,6 @@ static int scanlog_release(struct inode * inode, struct file * file)
+ 	struct proc_dir_entry *dp = PDE(inode);
+ 	unsigned int *data = (unsigned int *)dp->data;
+ 
+-	if (!data) {
+-		printk(KERN_ERR "scanlog: release failed no data\n");
+-		return -EIO;
+-	}
+ 	data[0] = 0;
+ 
+ 	return 0;
+@@ -200,12 +186,11 @@ static int __init scanlog_init(void)
+ 	if (!data)
+ 		goto err;
+ 
+-	ent = proc_create("ppc64/rtas/scan-log-dump", S_IRUSR, NULL,
+-			  &scanlog_fops);
++	ent = proc_create_data("ppc64/rtas/scan-log-dump", S_IRUSR, NULL,
++			       &scanlog_fops, data);
+ 	if (!ent)
+ 		goto err;
+ 
+-	ent->data = data;
+ 	proc_ppc64_scan_log_dump = ent;
+ 
+ 	return 0;
+diff --git a/arch/powerpc/sysdev/mv64x60_dev.c b/arch/powerpc/sysdev/mv64x60_dev.c
+index 41af122..a132e0d 100644
+--- a/arch/powerpc/sysdev/mv64x60_dev.c
++++ b/arch/powerpc/sysdev/mv64x60_dev.c
+@@ -239,6 +239,8 @@ static int __init mv64x60_eth_device_setup(struct device_node *np, int id,
+ 
+ 	memset(&pdata, 0, sizeof(pdata));
+ 
++	pdata.shared = shared_pdev;
++
+ 	prop = of_get_property(np, "reg", NULL);
+ 	if (!prop)
+ 		return -ENODEV;
+diff --git a/arch/ppc/syslib/mv64x60.c b/arch/ppc/syslib/mv64x60.c
+index 90fe904..418f305 100644
+--- a/arch/ppc/syslib/mv64x60.c
++++ b/arch/ppc/syslib/mv64x60.c
+@@ -341,6 +341,7 @@ static struct resource mv64x60_eth0_resources[] = {
+ };
+ 
+ static struct mv643xx_eth_platform_data eth0_pd = {
++	.shared		= &mv64x60_eth_shared_device;
+ 	.port_number	= 0,
+ };
+ 
+@@ -366,6 +367,7 @@ static struct resource mv64x60_eth1_resources[] = {
+ };
+ 
+ static struct mv643xx_eth_platform_data eth1_pd = {
++	.shared		= &mv64x60_eth_shared_device;
+ 	.port_number	= 1,
+ };
+ 
+@@ -391,6 +393,7 @@ static struct resource mv64x60_eth2_resources[] = {
+ };
+ 
+ static struct mv643xx_eth_platform_data eth2_pd = {
++	.shared		= &mv64x60_eth_shared_device;
+ 	.port_number	= 2,
+ };
+ 
+diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig
+index 29a7940..1d03508 100644
+--- a/arch/s390/Kconfig
++++ b/arch/s390/Kconfig
+@@ -430,6 +430,13 @@ config CMM_IUCV
+ 	  Select this option to enable the special message interface to
+ 	  the cooperative memory management.
+ 
++config PAGE_STATES
++	bool "Unused page notification"
++	help
++	  This enables the notification of unused pages to the
++	  hypervisor. The ESSA instruction is used to do the states
++	  changes between a page that has content and the unused state.
++
+ config VIRT_TIMER
+ 	bool "Virtual CPU timer support"
+ 	help
+diff --git a/arch/s390/kernel/compat_wrapper.S b/arch/s390/kernel/compat_wrapper.S
+index 743d54f..d003a6e 100644
+--- a/arch/s390/kernel/compat_wrapper.S
++++ b/arch/s390/kernel/compat_wrapper.S
+@@ -121,7 +121,7 @@ sys32_ptrace_wrapper:
+ 	lgfr	%r3,%r3			# long
+ 	llgtr	%r4,%r4			# long
+ 	llgfr	%r5,%r5			# long
+-	jg	sys_ptrace		# branch to system call
++	jg	compat_sys_ptrace	# branch to system call
+ 
+ 	.globl	sys32_alarm_wrapper
+ sys32_alarm_wrapper:
+diff --git a/arch/s390/kernel/entry.S b/arch/s390/kernel/entry.S
+index bdbb3bc..708cf9c 100644
+--- a/arch/s390/kernel/entry.S
++++ b/arch/s390/kernel/entry.S
+@@ -279,8 +279,6 @@ sysc_do_restart:
+ 	st	%r2,SP_R2(%r15)   # store return value (change R2 on stack)
+ 
+ sysc_return:
+-	tm	SP_PSW+1(%r15),0x01	# returning to user ?
+-	bno	BASED(sysc_restore)
+ 	tm	__TI_flags+3(%r9),_TIF_WORK_SVC
+ 	bnz	BASED(sysc_work)  # there is work to do (signals etc.)
+ sysc_restore:
+@@ -312,6 +310,8 @@ sysc_work_loop:
+ # One of the work bits is on. Find out which one.
+ #
+ sysc_work:
++	tm	SP_PSW+1(%r15),0x01	# returning to user ?
++	bno	BASED(sysc_restore)
+ 	tm	__TI_flags+3(%r9),_TIF_MCCK_PENDING
+ 	bo	BASED(sysc_mcck_pending)
+ 	tm	__TI_flags+3(%r9),_TIF_NEED_RESCHED
+@@ -602,12 +602,6 @@ io_no_vtime:
+ 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
+ 	basr	%r14,%r1		# branch to standard irq handler
+ io_return:
+-	tm	SP_PSW+1(%r15),0x01	# returning to user ?
+-#ifdef CONFIG_PREEMPT
+-	bno	BASED(io_preempt)	# no -> check for preemptive scheduling
+-#else
+-	bno	BASED(io_restore)	# no-> skip resched & signal
+-#endif
+ 	tm	__TI_flags+3(%r9),_TIF_WORK_INT
+ 	bnz	BASED(io_work)		# there is work to do (signals etc.)
+ io_restore:
+@@ -629,10 +623,18 @@ io_restore_trace_psw:
+ 	.long	0, io_restore_trace + 0x80000000
+ #endif
+ 
+-#ifdef CONFIG_PREEMPT
+-io_preempt:
++#
++# switch to kernel stack, then check the TIF bits
++#
++io_work:
++	tm	SP_PSW+1(%r15),0x01	# returning to user ?
++#ifndef CONFIG_PREEMPT
++	bno	BASED(io_restore)	# no-> skip resched & signal
++#else
++	bnz	BASED(io_work_user)	# no -> check for preemptive scheduling
++	# check for preemptive scheduling
+ 	icm	%r0,15,__TI_precount(%r9)
+-	bnz	BASED(io_restore)
++	bnz	BASED(io_restore)	# preemption disabled
+ 	l	%r1,SP_R15(%r15)
+ 	s	%r1,BASED(.Lc_spsize)
+ 	mvc	SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
+@@ -646,10 +648,7 @@ io_resume_loop:
+ 	br	%r1			# call schedule
+ #endif
+ 
+-#
+-# switch to kernel stack, then check the TIF bits
+-#
+-io_work:
++io_work_user:
+ 	l	%r1,__LC_KERNEL_STACK
+ 	s	%r1,BASED(.Lc_spsize)
+ 	mvc	SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
+diff --git a/arch/s390/kernel/entry64.S b/arch/s390/kernel/entry64.S
+index 5a4a7bc..fee1017 100644
+--- a/arch/s390/kernel/entry64.S
++++ b/arch/s390/kernel/entry64.S
+@@ -271,8 +271,6 @@ sysc_noemu:
+ 	stg	%r2,SP_R2(%r15) # store return value (change R2 on stack)
+ 
+ sysc_return:
+-	tm	SP_PSW+1(%r15),0x01	# returning to user ?
+-	jno	sysc_restore
+ 	tm	__TI_flags+7(%r9),_TIF_WORK_SVC
+ 	jnz	sysc_work	# there is work to do (signals etc.)
+ sysc_restore:
+@@ -304,6 +302,8 @@ sysc_work_loop:
+ # One of the work bits is on. Find out which one.
+ #
+ sysc_work:
++	tm	SP_PSW+1(%r15),0x01	# returning to user ?
++	jno	sysc_restore
+ 	tm	__TI_flags+7(%r9),_TIF_MCCK_PENDING
+ 	jo	sysc_mcck_pending
+ 	tm	__TI_flags+7(%r9),_TIF_NEED_RESCHED
+@@ -585,12 +585,6 @@ io_no_vtime:
+ 	la	%r2,SP_PTREGS(%r15)	# address of register-save area
+ 	brasl	%r14,do_IRQ		# call standard irq handler
+ io_return:
+-	tm	SP_PSW+1(%r15),0x01	# returning to user ?
+-#ifdef CONFIG_PREEMPT
+-	jno	io_preempt		# no -> check for preemptive scheduling
+-#else
+-	jno	io_restore		# no-> skip resched & signal
+-#endif
+ 	tm	__TI_flags+7(%r9),_TIF_WORK_INT
+ 	jnz	io_work 		# there is work to do (signals etc.)
+ io_restore:
+@@ -612,10 +606,41 @@ io_restore_trace_psw:
+ 	.quad	0, io_restore_trace
+ #endif
+ 
+-#ifdef CONFIG_PREEMPT
+-io_preempt:
++#
++# There is work todo, we need to check if we return to userspace, then
++# check, if we are in SIE, if yes leave it
++#
++io_work:
++	tm	SP_PSW+1(%r15),0x01	# returning to user ?
++#ifndef CONFIG_PREEMPT
++#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
++	jnz	io_work_user		# yes -> no need to check for SIE
++	la	%r1, BASED(sie_opcode)	# we return to kernel here
++	lg	%r2, SP_PSW+8(%r15)
++	clc	0(2,%r1), 0(%r2)	# is current instruction = SIE?
++	jne	io_restore		# no-> return to kernel
++	lg	%r1, SP_PSW+8(%r15)	# yes-> add 4 bytes to leave SIE
++	aghi	%r1, 4
++	stg	%r1, SP_PSW+8(%r15)
++	j	io_restore		# return to kernel
++#else
++	jno	io_restore		# no-> skip resched & signal
++#endif
++#else
++	jnz	io_work_user		# yes -> do resched & signal
++#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
++	la	%r1, BASED(sie_opcode)
++	lg	%r2, SP_PSW+8(%r15)
++	clc	0(2,%r1), 0(%r2)	# is current instruction = SIE?
++	jne	0f			# no -> leave PSW alone
++	lg	%r1, SP_PSW+8(%r15)	# yes-> add 4 bytes to leave SIE
++	aghi	%r1, 4
++	stg	%r1, SP_PSW+8(%r15)
++0:
++#endif
++	# check for preemptive scheduling
+ 	icm	%r0,15,__TI_precount(%r9)
+-	jnz	io_restore
++	jnz	io_restore		# preemption is disabled
+ 	# switch to kernel stack
+ 	lg	%r1,SP_R15(%r15)
+ 	aghi	%r1,-SP_SIZE
+@@ -629,10 +654,7 @@ io_resume_loop:
+ 	jg	preempt_schedule_irq
+ #endif
+ 
+-#
+-# switch to kernel stack, then check TIF bits
+-#
+-io_work:
++io_work_user:
+ 	lg	%r1,__LC_KERNEL_STACK
+ 	aghi	%r1,-SP_SIZE
+ 	mvc	SP_PTREGS(__PT_SIZE,%r1),SP_PTREGS(%r15)
+@@ -653,6 +675,11 @@ io_work_loop:
+ 	j	io_restore
+ io_work_done:
+ 
++#if defined(CONFIG_KVM) || defined(CONFIG_KVM_MODULE)
++sie_opcode:
++	.long 0xb2140000
++#endif
++
+ #
+ # _TIF_MCCK_PENDING is set, call handler
+ #
+diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
+index 7f42701..35827b9 100644
+--- a/arch/s390/kernel/ptrace.c
++++ b/arch/s390/kernel/ptrace.c
+@@ -292,8 +292,7 @@ poke_user(struct task_struct *child, addr_t addr, addr_t data)
+ 	return 0;
+ }
+ 
+-static int
+-do_ptrace_normal(struct task_struct *child, long request, long addr, long data)
++long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+ {
+ 	ptrace_area parea; 
+ 	int copied, ret;
+@@ -529,35 +528,19 @@ poke_user_emu31(struct task_struct *child, addr_t addr, addr_t data)
+ 	return 0;
+ }
+ 
+-static int
+-do_ptrace_emu31(struct task_struct *child, long request, long addr, long data)
++long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
++			compat_ulong_t caddr, compat_ulong_t cdata)
+ {
+-	unsigned int tmp;  /* 4 bytes !! */
++	unsigned long addr = caddr;
++	unsigned long data = cdata;
+ 	ptrace_area_emu31 parea; 
+ 	int copied, ret;
+ 
+ 	switch (request) {
+-	case PTRACE_PEEKTEXT:
+-	case PTRACE_PEEKDATA:
+-		/* read word at location addr. */
+-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 0);
+-		if (copied != sizeof(tmp))
+-			return -EIO;
+-		return put_user(tmp, (unsigned int __force __user *) data);
+-
+ 	case PTRACE_PEEKUSR:
+ 		/* read the word at location addr in the USER area. */
+ 		return peek_user_emu31(child, addr, data);
+ 
+-	case PTRACE_POKETEXT:
+-	case PTRACE_POKEDATA:
+-		/* write the word at location addr. */
+-		tmp = data;
+-		copied = access_process_vm(child, addr, &tmp, sizeof(tmp), 1);
+-		if (copied != sizeof(tmp))
+-			return -EIO;
+-		return 0;
+-
+ 	case PTRACE_POKEUSR:
+ 		/* write the word at location addr in the USER area */
+ 		return poke_user_emu31(child, addr, data);
+@@ -587,82 +570,11 @@ do_ptrace_emu31(struct task_struct *child, long request, long addr, long data)
+ 			copied += sizeof(unsigned int);
+ 		}
+ 		return 0;
+-	case PTRACE_GETEVENTMSG:
+-		return put_user((__u32) child->ptrace_message,
+-				(unsigned int __force __user *) data);
+-	case PTRACE_GETSIGINFO:
+-		if (child->last_siginfo == NULL)
+-			return -EINVAL;
+-		return copy_siginfo_to_user32((compat_siginfo_t
+-					       __force __user *) data,
+-					      child->last_siginfo);
+-	case PTRACE_SETSIGINFO:
+-		if (child->last_siginfo == NULL)
+-			return -EINVAL;
+-		return copy_siginfo_from_user32(child->last_siginfo,
+-						(compat_siginfo_t
+-						 __force __user *) data);
+ 	}
+-	return ptrace_request(child, request, addr, data);
++	return compat_ptrace_request(child, request, addr, data);
+ }
+ #endif
+ 
+-long arch_ptrace(struct task_struct *child, long request, long addr, long data)
+-{
+-	switch (request) {
+-	case PTRACE_SYSCALL:
+-		/* continue and stop at next (return from) syscall */
+-	case PTRACE_CONT:
+-		/* restart after signal. */
+-		if (!valid_signal(data))
+-			return -EIO;
+-		if (request == PTRACE_SYSCALL)
+-			set_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+-		else
+-			clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+-		child->exit_code = data;
+-		/* make sure the single step bit is not set. */
+-		user_disable_single_step(child);
+-		wake_up_process(child);
+-		return 0;
+-
+-	case PTRACE_KILL:
+-		/*
+-		 * make the child exit.  Best I can do is send it a sigkill. 
+-		 * perhaps it should be put in the status that it wants to 
+-		 * exit.
+-		 */
+-		if (child->exit_state == EXIT_ZOMBIE) /* already dead */
+-			return 0;
+-		child->exit_code = SIGKILL;
+-		/* make sure the single step bit is not set. */
+-		user_disable_single_step(child);
+-		wake_up_process(child);
+-		return 0;
+-
+-	case PTRACE_SINGLESTEP:
+-		/* set the trap flag. */
+-		if (!valid_signal(data))
+-			return -EIO;
+-		clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
+-		child->exit_code = data;
+-		user_enable_single_step(child);
+-		/* give it a chance to run. */
+-		wake_up_process(child);
+-		return 0;
+-
+-	/* Do requests that differ for 31/64 bit */
+-	default:
+-#ifdef CONFIG_COMPAT
+-		if (test_thread_flag(TIF_31BIT))
+-			return do_ptrace_emu31(child, request, addr, data);
+-#endif
+-		return do_ptrace_normal(child, request, addr, data);
+-	}
+-	/* Not reached.  */
+-	return -EIO;
+-}
+-
+ asmlinkage void
+ syscall_trace(struct pt_regs *regs, int entryexit)
+ {
+diff --git a/arch/s390/kernel/sys_s390.c b/arch/s390/kernel/sys_s390.c
+index 988d0d6..5fdb799 100644
+--- a/arch/s390/kernel/sys_s390.c
++++ b/arch/s390/kernel/sys_s390.c
+@@ -32,23 +32,6 @@
+ #include <asm/uaccess.h>
+ #include "entry.h"
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way Unix traditionally does this, though.
+- */
+-asmlinkage long sys_pipe(unsigned long __user *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ /* common code for old and new mmaps */
+ static inline long do_mmap2(
+ 	unsigned long addr, unsigned long len,
+diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
+index 1761b74..e051cad 100644
+--- a/arch/s390/kvm/Kconfig
++++ b/arch/s390/kvm/Kconfig
+@@ -22,7 +22,6 @@ config KVM
+ 	select PREEMPT_NOTIFIERS
+ 	select ANON_INODES
+ 	select S390_SWITCH_AMODE
+-	select PREEMPT
+ 	---help---
+ 	  Support hosting paravirtualized guest machines using the SIE
+ 	  virtualization capability on the mainframe. This should work
+diff --git a/arch/s390/kvm/intercept.c b/arch/s390/kvm/intercept.c
+index 349581a..47a0b64 100644
+--- a/arch/s390/kvm/intercept.c
++++ b/arch/s390/kvm/intercept.c
+@@ -105,6 +105,9 @@ static intercept_handler_t instruction_handlers[256] = {
+ static int handle_noop(struct kvm_vcpu *vcpu)
+ {
+ 	switch (vcpu->arch.sie_block->icptcode) {
++	case 0x0:
++		vcpu->stat.exit_null++;
++		break;
+ 	case 0x10:
+ 		vcpu->stat.exit_external_request++;
+ 		break;
+diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
+index 98d1e73..0ac36a6 100644
+--- a/arch/s390/kvm/kvm-s390.c
++++ b/arch/s390/kvm/kvm-s390.c
+@@ -31,6 +31,7 @@
+ 
+ struct kvm_stats_debugfs_item debugfs_entries[] = {
+ 	{ "userspace_handled", VCPU_STAT(exit_userspace) },
++	{ "exit_null", VCPU_STAT(exit_null) },
+ 	{ "exit_validity", VCPU_STAT(exit_validity) },
+ 	{ "exit_stop_request", VCPU_STAT(exit_stop_request) },
+ 	{ "exit_external_request", VCPU_STAT(exit_external_request) },
+@@ -221,10 +222,6 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
+ 	vcpu->arch.guest_fpregs.fpc &= FPC_VALID_MASK;
+ 	restore_fp_regs(&vcpu->arch.guest_fpregs);
+ 	restore_access_regs(vcpu->arch.guest_acrs);
+-
+-	if (signal_pending(current))
+-		atomic_set_mask(CPUSTAT_STOP_INT,
+-			&vcpu->arch.sie_block->cpuflags);
+ }
+ 
+ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
+diff --git a/arch/s390/mm/Makefile b/arch/s390/mm/Makefile
+index fb988a4..2a74581 100644
+--- a/arch/s390/mm/Makefile
++++ b/arch/s390/mm/Makefile
+@@ -5,3 +5,4 @@
+ obj-y	 := init.o fault.o extmem.o mmap.o vmem.o pgtable.o
+ obj-$(CONFIG_CMM) += cmm.o
+ obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
++obj-$(CONFIG_PAGE_STATES) += page-states.o
+diff --git a/arch/s390/mm/init.c b/arch/s390/mm/init.c
+index fa31de6..29f3a63 100644
+--- a/arch/s390/mm/init.c
++++ b/arch/s390/mm/init.c
+@@ -126,6 +126,9 @@ void __init mem_init(void)
+         /* clear the zero-page */
+         memset(empty_zero_page, 0, PAGE_SIZE);
+ 
++	/* Setup guest page hinting */
++	cmma_init();
++
+ 	/* this will put all low memory onto the freelists */
+ 	totalram_pages += free_all_bootmem();
+ 
+diff --git a/arch/s390/mm/page-states.c b/arch/s390/mm/page-states.c
+new file mode 100644
+index 0000000..fc0ad73
+--- /dev/null
++++ b/arch/s390/mm/page-states.c
+@@ -0,0 +1,79 @@
++/*
++ * arch/s390/mm/page-states.c
++ *
++ * Copyright IBM Corp. 2008
++ *
++ * Guest page hinting for unused pages.
++ *
++ * Author(s): Martin Schwidefsky <schwidefsky at de.ibm.com>
++ */
++
++#include <linux/kernel.h>
++#include <linux/errno.h>
++#include <linux/types.h>
++#include <linux/mm.h>
++#include <linux/init.h>
++
++#define ESSA_SET_STABLE		1
++#define ESSA_SET_UNUSED		2
++
++static int cmma_flag;
++
++static int __init cmma(char *str)
++{
++	char *parm;
++	parm = strstrip(str);
++	if (strcmp(parm, "yes") == 0 || strcmp(parm, "on") == 0) {
++		cmma_flag = 1;
++		return 1;
++	}
++	cmma_flag = 0;
++	if (strcmp(parm, "no") == 0 || strcmp(parm, "off") == 0)
++		return 1;
++	return 0;
++}
++
++__setup("cmma=", cmma);
++
++void __init cmma_init(void)
++{
++	register unsigned long tmp asm("0") = 0;
++	register int rc asm("1") = -EOPNOTSUPP;
++
++	if (!cmma_flag)
++		return;
++	asm volatile(
++		"       .insn rrf,0xb9ab0000,%1,%1,0,0\n"
++		"0:     la      %0,0\n"
++		"1:\n"
++		EX_TABLE(0b,1b)
++		: "+&d" (rc), "+&d" (tmp));
++	if (rc)
++		cmma_flag = 0;
++}
++
++void arch_free_page(struct page *page, int order)
++{
++	int i, rc;
++
++	if (!cmma_flag)
++		return;
++	for (i = 0; i < (1 << order); i++)
++		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
++			     : "=&d" (rc)
++			     : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
++			       "i" (ESSA_SET_UNUSED));
++}
++
++void arch_alloc_page(struct page *page, int order)
++{
++	int i, rc;
++
++	if (!cmma_flag)
++		return;
++	for (i = 0; i < (1 << order); i++)
++		asm volatile(".insn rrf,0xb9ab0000,%0,%1,%2,0"
++			     : "=&d" (rc)
++			     : "a" ((page_to_pfn(page) + i) << PAGE_SHIFT),
++			       "i" (ESSA_SET_STABLE));
++}
+diff --git a/arch/sh/kernel/sys_sh64.c b/arch/sh/kernel/sys_sh64.c
+index 578004d..91fb844 100644
+--- a/arch/sh/kernel/sys_sh64.c
++++ b/arch/sh/kernel/sys_sh64.c
+@@ -31,23 +31,6 @@
+ #include <asm/unistd.h>
+ 
+ /*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way Unix traditionally does this, though.
+- */
+-asmlinkage int sys_pipe(unsigned long * fildes)
+-{
+-        int fd[2];
+-        int error;
+-
+-        error = do_pipe(fd);
+-        if (!error) {
+-                if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-                        error = -EFAULT;
+-        }
+-        return error;
+-}
+-
+-/*
+  * Do a system call from kernel instead of calling sys_execve so we
+  * end up with proper pt_regs.
+  */
+diff --git a/arch/sparc/kernel/process.c b/arch/sparc/kernel/process.c
+index e7f3519..36431f3 100644
+--- a/arch/sparc/kernel/process.c
++++ b/arch/sparc/kernel/process.c
+@@ -419,14 +419,26 @@ asmlinkage int sparc_do_fork(unsigned long clone_flags,
+                              unsigned long stack_size)
+ {
+ 	unsigned long parent_tid_ptr, child_tid_ptr;
++	unsigned long orig_i1 = regs->u_regs[UREG_I1];
++	long ret;
+ 
+ 	parent_tid_ptr = regs->u_regs[UREG_I2];
+ 	child_tid_ptr = regs->u_regs[UREG_I4];
+ 
+-	return do_fork(clone_flags, stack_start,
+-		       regs, stack_size,
+-		       (int __user *) parent_tid_ptr,
+-		       (int __user *) child_tid_ptr);
++	ret = do_fork(clone_flags, stack_start,
++		      regs, stack_size,
++		      (int __user *) parent_tid_ptr,
++		      (int __user *) child_tid_ptr);
++
++	/* If we get an error and potentially restart the system
++	 * call, we're screwed because copy_thread() clobbered
++	 * the parent's %o1.  So detect that case and restore it
++	 * here.
++	 */
++	if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK)
++		regs->u_regs[UREG_I1] = orig_i1;
++
++	return ret;
+ }
+ 
+ /* Copy a Sparc thread.  The fork() return value conventions
+diff --git a/arch/sparc/kernel/signal.c b/arch/sparc/kernel/signal.c
+index 3c31229..3681579 100644
+--- a/arch/sparc/kernel/signal.c
++++ b/arch/sparc/kernel/signal.c
+@@ -245,15 +245,29 @@ static inline int invalid_frame_pointer(void __user *fp, int fplen)
+ 
+ static inline void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, unsigned long framesize)
+ {
+-	unsigned long sp;
++	unsigned long sp = regs->u_regs[UREG_FP];
+ 
+-	sp = regs->u_regs[UREG_FP];
++	/*
++	 * If we are on the alternate signal stack and would overflow it, don't.
++	 * Return an always-bogus address instead so we will die with SIGSEGV.
++	 */
++	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
++		return (void __user *) -1L;
+ 
+ 	/* This is the X/Open sanctioned signal stack switching.  */
+ 	if (sa->sa_flags & SA_ONSTACK) {
+-		if (!on_sig_stack(sp) && !((current->sas_ss_sp + current->sas_ss_size) & 7))
++		if (sas_ss_flags(sp) == 0)
+ 			sp = current->sas_ss_sp + current->sas_ss_size;
+ 	}
++
++	/* Always align the stack frame.  This handles two cases.  First,
++	 * sigaltstack need not be mindful of platform specific stack
++	 * alignment.  Second, if we took this signal because the stack
++	 * is not aligned properly, we'd like to take the signal cleanly
++	 * and report that.
++	 */
++	sp &= ~7UL;
++
+ 	return (void __user *)(sp - framesize);
+ }
+ 
+diff --git a/arch/sparc/kernel/sys_sparc.c b/arch/sparc/kernel/sys_sparc.c
+index f188b5d..e995491 100644
+--- a/arch/sparc/kernel/sys_sparc.c
++++ b/arch/sparc/kernel/sys_sparc.c
+@@ -223,8 +223,7 @@ int sparc_mmap_check(unsigned long addr, unsigned long len, unsigned long flags)
+ {
+ 	if (ARCH_SUN4C_SUN4 &&
+ 	    (len > 0x20000000 ||
+-	     ((flags & MAP_FIXED) &&
+-	      addr < 0xe0000000 && addr + len > 0x20000000)))
++	     (addr < 0xe0000000 && addr + len > 0x20000000)))
+ 		return -EINVAL;
+ 
+ 	/* See asm-sparc/uaccess.h */
+diff --git a/arch/sparc64/kernel/process.c b/arch/sparc64/kernel/process.c
+index 500ac6d..4129c04 100644
+--- a/arch/sparc64/kernel/process.c
++++ b/arch/sparc64/kernel/process.c
+@@ -503,6 +503,8 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags,
+ 			      unsigned long stack_size)
+ {
+ 	int __user *parent_tid_ptr, *child_tid_ptr;
++	unsigned long orig_i1 = regs->u_regs[UREG_I1];
++	long ret;
+ 
+ #ifdef CONFIG_COMPAT
+ 	if (test_thread_flag(TIF_32BIT)) {
+@@ -515,9 +517,19 @@ asmlinkage long sparc_do_fork(unsigned long clone_flags,
+ 		child_tid_ptr = (int __user *) regs->u_regs[UREG_I4];
+ 	}
+ 
+-	return do_fork(clone_flags, stack_start,
+-		       regs, stack_size,
+-		       parent_tid_ptr, child_tid_ptr);
++	ret = do_fork(clone_flags, stack_start,
++		      regs, stack_size,
++		      parent_tid_ptr, child_tid_ptr);
++
++	/* If we get an error and potentially restart the system
++	 * call, we're screwed because copy_thread() clobbered
++	 * the parent's %o1.  So detect that case and restore it
++	 * here.
++	 */
++	if ((unsigned long)ret >= -ERESTART_RESTARTBLOCK)
++		regs->u_regs[UREG_I1] = orig_i1;
++
++	return ret;
+ }
+ 
+ /* Copy a Sparc thread.  The fork() return value conventions
+diff --git a/arch/sparc64/kernel/signal.c b/arch/sparc64/kernel/signal.c
+index 45d6bf6..07c0443 100644
+--- a/arch/sparc64/kernel/signal.c
++++ b/arch/sparc64/kernel/signal.c
+@@ -376,16 +376,29 @@ save_fpu_state(struct pt_regs *regs, __siginfo_fpu_t __user *fpu)
+ 
+ static inline void __user *get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, unsigned long framesize)
+ {
+-	unsigned long sp;
++	unsigned long sp = regs->u_regs[UREG_FP] + STACK_BIAS;
+ 
+-	sp = regs->u_regs[UREG_FP] + STACK_BIAS;
++	/*
++	 * If we are on the alternate signal stack and would overflow it, don't.
++	 * Return an always-bogus address instead so we will die with SIGSEGV.
++	 */
++	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
++		return (void __user *) -1L;
+ 
+ 	/* This is the X/Open sanctioned signal stack switching.  */
+ 	if (ka->sa.sa_flags & SA_ONSTACK) {
+-		if (!on_sig_stack(sp) &&
+-		    !((current->sas_ss_sp + current->sas_ss_size) & 7))
++		if (sas_ss_flags(sp) == 0)
+ 			sp = current->sas_ss_sp + current->sas_ss_size;
+ 	}
++
++	/* Always align the stack frame.  This handles two cases.  First,
++	 * sigaltstack need not be mindful of platform specific stack
++	 * alignment.  Second, if we took this signal because the stack
++	 * is not aligned properly, we'd like to take the signal cleanly
++	 * and report that.
++	 */
++	sp &= ~7UL;
++
+ 	return (void __user *)(sp - framesize);
+ }
+ 
+diff --git a/arch/sparc64/kernel/signal32.c b/arch/sparc64/kernel/signal32.c
+index 9415d2c..0f6b7b1 100644
+--- a/arch/sparc64/kernel/signal32.c
++++ b/arch/sparc64/kernel/signal32.c
+@@ -406,11 +406,27 @@ static void __user *get_sigframe(struct sigaction *sa, struct pt_regs *regs, uns
+ 	regs->u_regs[UREG_FP] &= 0x00000000ffffffffUL;
+ 	sp = regs->u_regs[UREG_FP];
+ 	
++	/*
++	 * If we are on the alternate signal stack and would overflow it, don't.
++	 * Return an always-bogus address instead so we will die with SIGSEGV.
++	 */
++	if (on_sig_stack(sp) && !likely(on_sig_stack(sp - framesize)))
++		return (void __user *) -1L;
++
+ 	/* This is the X/Open sanctioned signal stack switching.  */
+ 	if (sa->sa_flags & SA_ONSTACK) {
+-		if (!on_sig_stack(sp) && !((current->sas_ss_sp + current->sas_ss_size) & 7))
++		if (sas_ss_flags(sp) == 0)
+ 			sp = current->sas_ss_sp + current->sas_ss_size;
+ 	}
++
++	/* Always align the stack frame.  This handles two cases.  First,
++	 * sigaltstack need not be mindful of platform specific stack
++	 * alignment.  Second, if we took this signal because the stack
++	 * is not aligned properly, we'd like to take the signal cleanly
++	 * and report that.
++	 */
++	sp &= ~7UL;
++
+ 	return (void __user *)(sp - framesize);
+ }
+ 
+diff --git a/arch/sparc64/kernel/smp.c b/arch/sparc64/kernel/smp.c
+index 3aba476..0d6403a 100644
+--- a/arch/sparc64/kernel/smp.c
++++ b/arch/sparc64/kernel/smp.c
+@@ -865,21 +865,14 @@ void smp_call_function_client(int irq, struct pt_regs *regs)
+ 	void *info = call_data->info;
+ 
+ 	clear_softint(1 << irq);
+-
+-	irq_enter();
+-
+-	if (!call_data->wait) {
+-		/* let initiator proceed after getting data */
+-		atomic_inc(&call_data->finished);
+-	}
+-
+-	func(info);
+-
+-	irq_exit();
+-
+ 	if (call_data->wait) {
+ 		/* let initiator proceed only after completion */
++		func(info);
+ 		atomic_inc(&call_data->finished);
++	} else {
++		/* let initiator proceed after getting data */
++		atomic_inc(&call_data->finished);
++		func(info);
+ 	}
+ }
+ 
+@@ -1041,9 +1034,7 @@ void smp_receive_signal(int cpu)
+ 
+ void smp_receive_signal_client(int irq, struct pt_regs *regs)
+ {
+-	irq_enter();
+ 	clear_softint(1 << irq);
+-	irq_exit();
+ }
+ 
+ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
+@@ -1051,8 +1042,6 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
+ 	struct mm_struct *mm;
+ 	unsigned long flags;
+ 
+-	irq_enter();
+-
+ 	clear_softint(1 << irq);
+ 
+ 	/* See if we need to allocate a new TLB context because
+@@ -1072,8 +1061,6 @@ void smp_new_mmu_context_version_client(int irq, struct pt_regs *regs)
+ 	load_secondary_context(mm);
+ 	__flush_tlb_mm(CTX_HWBITS(mm->context),
+ 		       SECONDARY_CONTEXT);
+-
+-	irq_exit();
+ }
+ 
+ void smp_new_mmu_context_version(void)
+@@ -1239,8 +1226,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
+ {
+ 	clear_softint(1 << irq);
+ 
+-	irq_enter();
+-
+ 	preempt_disable();
+ 
+ 	__asm__ __volatile__("flushw");
+@@ -1253,8 +1238,6 @@ void smp_penguin_jailcell(int irq, struct pt_regs *regs)
+ 	prom_world(0);
+ 
+ 	preempt_enable();
+-
+-	irq_exit();
+ }
+ 
+ /* /proc/profile writes can call this, don't __init it please. */
+diff --git a/arch/sparc64/kernel/sys_sparc.c b/arch/sparc64/kernel/sys_sparc.c
+index 8d4761f..0dbc941 100644
+--- a/arch/sparc64/kernel/sys_sparc.c
++++ b/arch/sparc64/kernel/sys_sparc.c
+@@ -549,13 +549,13 @@ int sparc64_mmap_check(unsigned long addr, unsigned long len,
+ 		if (len >= STACK_TOP32)
+ 			return -EINVAL;
+ 
+-		if ((flags & MAP_FIXED) && addr > STACK_TOP32 - len)
++		if (addr > STACK_TOP32 - len)
+ 			return -EINVAL;
+ 	} else {
+ 		if (len >= VA_EXCLUDE_START)
+ 			return -EINVAL;
+ 
+-		if ((flags & MAP_FIXED) && invalid_64bit_range(addr, len))
++		if (invalid_64bit_range(addr, len))
+ 			return -EINVAL;
+ 	}
+ 
+diff --git a/arch/sparc64/kernel/sys_sparc32.c b/arch/sparc64/kernel/sys_sparc32.c
+index 161ce47..1aa4288 100644
+--- a/arch/sparc64/kernel/sys_sparc32.c
++++ b/arch/sparc64/kernel/sys_sparc32.c
+@@ -236,13 +236,6 @@ asmlinkage long sys32_getegid16(void)
+ 
+ /* 32-bit timeval and related flotsam.  */
+ 
+-static long get_tv32(struct timeval *o, struct compat_timeval __user *i)
+-{
+-	return (!access_ok(VERIFY_READ, i, sizeof(*i)) ||
+-		(__get_user(o->tv_sec, &i->tv_sec) |
+-		 __get_user(o->tv_usec, &i->tv_usec)));
+-}
+-
+ static inline long put_tv32(struct compat_timeval __user *o, struct timeval *i)
+ {
+ 	return (!access_ok(VERIFY_WRITE, o, sizeof(*o)) ||
+@@ -757,30 +750,6 @@ asmlinkage long sys32_settimeofday(struct compat_timeval __user *tv,
+ 	return do_sys_settimeofday(tv ? &kts : NULL, tz ? &ktz : NULL);
+ }
+ 
+-asmlinkage long sys32_utimes(char __user *filename,
+-			     struct compat_timeval __user *tvs)
+-{
+-	struct timespec tv[2];
+-
+-	if (tvs) {
+-		struct timeval ktvs[2];
+-		if (get_tv32(&ktvs[0], tvs) ||
+-		    get_tv32(&ktvs[1], 1+tvs))
+-			return -EFAULT;
+-
+-		if (ktvs[0].tv_usec < 0 || ktvs[0].tv_usec >= 1000000 ||
+-		    ktvs[1].tv_usec < 0 || ktvs[1].tv_usec >= 1000000)
+-			return -EINVAL;
+-
+-		tv[0].tv_sec = ktvs[0].tv_sec;
+-		tv[0].tv_nsec = 1000 * ktvs[0].tv_usec;
+-		tv[1].tv_sec = ktvs[1].tv_sec;
+-		tv[1].tv_nsec = 1000 * ktvs[1].tv_usec;
+-	}
+-
+-	return do_utimes(AT_FDCWD, filename, tvs ? tv : NULL, 0);
+-}
+-
+ /* These are here just in case some old sparc32 binary calls it. */
+ asmlinkage long sys32_pause(void)
+ {
+diff --git a/arch/sparc64/kernel/systbls.S b/arch/sparc64/kernel/systbls.S
+index a4fef2b..8b5282d 100644
+--- a/arch/sparc64/kernel/systbls.S
++++ b/arch/sparc64/kernel/systbls.S
+@@ -45,7 +45,7 @@ sys_call_table32:
+ /*120*/	.word compat_sys_readv, compat_sys_writev, sys32_settimeofday, sys32_fchown16, sys_fchmod
+ 	.word sys_nis_syscall, sys32_setreuid16, sys32_setregid16, sys_rename, sys_truncate
+ /*130*/	.word sys_ftruncate, sys_flock, compat_sys_lstat64, sys_nis_syscall, sys_nis_syscall
+-	.word sys_nis_syscall, sys32_mkdir, sys_rmdir, sys32_utimes, compat_sys_stat64
++	.word sys_nis_syscall, sys32_mkdir, sys_rmdir, compat_sys_utimes, compat_sys_stat64
+ /*140*/	.word sys32_sendfile64, sys_nis_syscall, sys32_futex, sys_gettid, compat_sys_getrlimit
+ 	.word compat_sys_setrlimit, sys_pivot_root, sys32_prctl, sys_pciconfig_read, sys_pciconfig_write
+ /*150*/	.word sys_nis_syscall, sys_inotify_init, sys_inotify_add_watch, sys_poll, sys_getdents64
+diff --git a/arch/sparc64/mm/init.c b/arch/sparc64/mm/init.c
+index 4cad0b3..ec3e2c7 100644
+--- a/arch/sparc64/mm/init.c
++++ b/arch/sparc64/mm/init.c
+@@ -771,6 +771,9 @@ static void __init find_ramdisk(unsigned long phys_base)
+ 		initrd_end = ramdisk_image + sparc_ramdisk_size;
+ 
+ 		lmb_reserve(initrd_start, initrd_end);
++
++		initrd_start += PAGE_OFFSET;
++		initrd_end += PAGE_OFFSET;
+ 	}
+ #endif
+ }
+@@ -2362,16 +2365,3 @@ void __flush_tlb_all(void)
+ 	__asm__ __volatile__("wrpr	%0, 0, %%pstate"
+ 			     : : "r" (pstate));
+ }
+-
+-#ifdef CONFIG_MEMORY_HOTPLUG
+-
+-void online_page(struct page *page)
+-{
+-	ClearPageReserved(page);
+-	init_page_count(page);
+-	__free_page(page);
+-	totalram_pages++;
+-	num_physpages++;
+-}
+-
+-#endif /* CONFIG_MEMORY_HOTPLUG */
+diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c
+index 10b86e1..5047490 100644
+--- a/arch/um/drivers/line.c
++++ b/arch/um/drivers/line.c
+@@ -191,9 +191,9 @@ void line_flush_chars(struct tty_struct *tty)
+ 	line_flush_buffer(tty);
+ }
+ 
+-void line_put_char(struct tty_struct *tty, unsigned char ch)
++int line_put_char(struct tty_struct *tty, unsigned char ch)
+ {
+-	line_write(tty, &ch, sizeof(ch));
++	return line_write(tty, &ch, sizeof(ch));
+ }
+ 
+ int line_write(struct tty_struct *tty, const unsigned char *buf, int len)
+diff --git a/arch/um/include/line.h b/arch/um/include/line.h
+index 1223f2c..979b73e 100644
+--- a/arch/um/include/line.h
++++ b/arch/um/include/line.h
+@@ -71,7 +71,7 @@ extern int line_setup(struct line *lines, unsigned int sizeof_lines,
+ 		      char *init, char **error_out);
+ extern int line_write(struct tty_struct *tty, const unsigned char *buf,
+ 		      int len);
+-extern void line_put_char(struct tty_struct *tty, unsigned char ch);
++extern int line_put_char(struct tty_struct *tty, unsigned char ch);
+ extern void line_set_termios(struct tty_struct *tty, struct ktermios * old);
+ extern int line_chars_in_buffer(struct tty_struct *tty);
+ extern void line_flush_buffer(struct tty_struct *tty);
+diff --git a/arch/um/kernel/syscall.c b/arch/um/kernel/syscall.c
+index 9cffc62..128ee85 100644
+--- a/arch/um/kernel/syscall.c
++++ b/arch/um/kernel/syscall.c
+@@ -73,23 +73,6 @@ long old_mmap(unsigned long addr, unsigned long len,
+  out:
+ 	return err;
+ }
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-long sys_pipe(unsigned long __user * fildes)
+-{
+-	int fd[2];
+-	long error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, sizeof(fd)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ 
+ long sys_uname(struct old_utsname __user * name)
+ {
+diff --git a/arch/v850/kernel/syscalls.c b/arch/v850/kernel/syscalls.c
+index 003db9c..1a83daf 100644
+--- a/arch/v850/kernel/syscalls.c
++++ b/arch/v850/kernel/syscalls.c
+@@ -132,23 +132,6 @@ sys_ipc (uint call, int first, int second, int third, void *ptr, long fifth)
+ 	return ret;
+ }
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way unix traditionally does this, though.
+- */
+-int sys_pipe (int *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe (fd);
+-	if (!error) {
+-		if (copy_to_user (fildes, fd, 2*sizeof (int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ static inline unsigned long
+ do_mmap2 (unsigned long addr, size_t len,
+ 	 unsigned long prot, unsigned long flags,
+diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
+index c3f8809..bbcafaa 100644
+--- a/arch/x86/Kconfig
++++ b/arch/x86/Kconfig
+@@ -18,6 +18,7 @@ config X86_64
+ ### Arch settings
+ config X86
+ 	def_bool y
++	select HAVE_UNSTABLE_SCHED_CLOCK
+ 	select HAVE_IDE
+ 	select HAVE_OPROFILE
+ 	select HAVE_KPROBES
+@@ -1661,6 +1662,7 @@ config GEODE_MFGPT_TIMER
+ 
+ config OLPC
+ 	bool "One Laptop Per Child support"
++	depends on MGEODE_LX
+ 	default n
+ 	help
+ 	  Add support for detecting the unique features of the OLPC
+diff --git a/arch/x86/boot/compressed/relocs.c b/arch/x86/boot/compressed/relocs.c
+index d01ea42..edaadea 100644
+--- a/arch/x86/boot/compressed/relocs.c
++++ b/arch/x86/boot/compressed/relocs.c
+@@ -191,7 +191,7 @@ static void read_ehdr(FILE *fp)
+ 		die("Cannot read ELF header: %s\n",
+ 			strerror(errno));
+ 	}
+-	if (memcmp(ehdr.e_ident, ELFMAG, 4) != 0) {
++	if (memcmp(ehdr.e_ident, ELFMAG, SELFMAG) != 0) {
+ 		die("No ELF magic\n");
+ 	}
+ 	if (ehdr.e_ident[EI_CLASS] != ELFCLASS32) {
+diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
+index bbdacb3..5e618c3 100644
+--- a/arch/x86/kernel/Makefile
++++ b/arch/x86/kernel/Makefile
+@@ -83,9 +83,7 @@ obj-$(CONFIG_KVM_GUEST)		+= kvm.o
+ obj-$(CONFIG_KVM_CLOCK)		+= kvmclock.o
+ obj-$(CONFIG_PARAVIRT)		+= paravirt.o paravirt_patch_$(BITS).o
+ 
+-ifdef CONFIG_INPUT_PCSPKR
+-obj-y				+= pcspeaker.o
+-endif
++obj-$(CONFIG_PCSPKR_PLATFORM)	+= pcspeaker.o
+ 
+ obj-$(CONFIG_SCx200)		+= scx200.o
+ scx200-y			+= scx200_32.o
+diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
+index 7335959..fd5ca97 100644
+--- a/arch/x86/kernel/acpi/Makefile
++++ b/arch/x86/kernel/acpi/Makefile
+@@ -10,5 +10,5 @@ endif
+ $(obj)/wakeup_rm.o:    $(obj)/realmode/wakeup.bin
+ 
+ $(obj)/realmode/wakeup.bin: FORCE
+-	$(Q)$(MAKE) $(build)=$(obj)/realmode $@
++	$(Q)$(MAKE) $(build)=$(obj)/realmode
+ 
+diff --git a/arch/x86/kernel/acpi/realmode/Makefile b/arch/x86/kernel/acpi/realmode/Makefile
+index 0929008..1c31cc0 100644
+--- a/arch/x86/kernel/acpi/realmode/Makefile
++++ b/arch/x86/kernel/acpi/realmode/Makefile
+@@ -6,7 +6,8 @@
+ # for more details.
+ #
+ 
+-targets		:= wakeup.bin wakeup.elf
++always		:= wakeup.bin
++targets		:= wakeup.elf wakeup.lds
+ 
+ wakeup-y	+= wakeup.o wakemain.o video-mode.o copy.o
+ 
+@@ -48,7 +49,7 @@ LDFLAGS_wakeup.elf	:= -T
+ 
+ CPPFLAGS_wakeup.lds += -P -C
+ 
+-$(obj)/wakeup.elf: $(src)/wakeup.lds $(WAKEUP_OBJS) FORCE
++$(obj)/wakeup.elf: $(obj)/wakeup.lds $(WAKEUP_OBJS) FORCE
+ 	$(call if_changed,ld)
+ 
+ OBJCOPYFLAGS_wakeup.bin	:= -O binary
+diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c
+index ddee040..4bc1be5 100644
+--- a/arch/x86/kernel/kvmclock.c
++++ b/arch/x86/kernel/kvmclock.c
+@@ -133,6 +133,7 @@ static int kvm_register_clock(void)
+ 	return native_write_msr_safe(MSR_KVM_SYSTEM_TIME, low, high);
+ }
+ 
++#ifdef CONFIG_X86_LOCAL_APIC
+ static void kvm_setup_secondary_clock(void)
+ {
+ 	/*
+@@ -143,6 +144,7 @@ static void kvm_setup_secondary_clock(void)
+ 	/* ok, done with our trickery, call native */
+ 	setup_secondary_APIC_clock();
+ }
++#endif
+ 
+ /*
+  * After the clock is registered, the host will keep writing to the
+@@ -177,7 +179,9 @@ void __init kvmclock_init(void)
+ 		pv_time_ops.get_wallclock = kvm_get_wallclock;
+ 		pv_time_ops.set_wallclock = kvm_set_wallclock;
+ 		pv_time_ops.sched_clock = kvm_clock_read;
++#ifdef CONFIG_X86_LOCAL_APIC
+ 		pv_apic_ops.setup_secondary_clock = kvm_setup_secondary_clock;
++#endif
+ 		machine_ops.shutdown  = kvm_shutdown;
+ #ifdef CONFIG_KEXEC
+ 		machine_ops.crash_shutdown  = kvm_crash_shutdown;
+diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c
+index 3e2c54d..404683b 100644
+--- a/arch/x86/kernel/mpparse.c
++++ b/arch/x86/kernel/mpparse.c
+@@ -794,6 +794,11 @@ void __init find_smp_config(void)
+                             ACPI-based MP Configuration
+    -------------------------------------------------------------------------- */
+ 
++/*
++ * Keep this outside and initialized to 0, for !CONFIG_ACPI builds:
++ */
++int es7000_plat;
++
+ #ifdef CONFIG_ACPI
+ 
+ #ifdef	CONFIG_X86_IO_APIC
+@@ -909,8 +914,6 @@ void __init mp_override_legacy_irq(u8 bus_irq, u8 polarity, u8 trigger, u32 gsi)
+ 	MP_intsrc_info(&intsrc);
+ }
+ 
+-int es7000_plat;
+-
+ void __init mp_config_acpi_legacy_irqs(void)
+ {
+ 	struct mpc_config_intsrc intsrc;
+diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c
+index 07c6d42..f6be7d5 100644
+--- a/arch/x86/kernel/reboot.c
++++ b/arch/x86/kernel/reboot.c
+@@ -149,7 +149,6 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = {
+ 		.matches = {
+ 			DMI_MATCH(DMI_SYS_VENDOR, "Dell Inc."),
+ 			DMI_MATCH(DMI_PRODUCT_NAME, "OptiPlex 745"),
+-			DMI_MATCH(DMI_BOARD_NAME, "0WF810"),
+ 		},
+ 	},
+ 	{       /* Handle problems with rebooting on Dell Optiplex 745's DFF*/
+diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
+index c0c68c1..cc6f5eb 100644
+--- a/arch/x86/kernel/setup.c
++++ b/arch/x86/kernel/setup.c
+@@ -95,7 +95,7 @@ void __init setup_per_cpu_areas(void)
+ 
+ 	/* Copy section for each CPU (we discard the original) */
+ 	size = PERCPU_ENOUGH_ROOM;
+-	printk(KERN_INFO "PERCPU: Allocating %lu bytes of per cpu data\n",
++	printk(KERN_INFO "PERCPU: Allocating %zd bytes of per cpu data\n",
+ 			  size);
+ 
+ 	for_each_possible_cpu(i) {
+diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
+index 84241a2..6b087ab 100644
+--- a/arch/x86/kernel/smpboot.c
++++ b/arch/x86/kernel/smpboot.c
+@@ -299,7 +299,7 @@ static void __cpuinit smp_callin(void)
+ /*
+  * Activate a secondary processor.
+  */
+-void __cpuinit start_secondary(void *unused)
++static void __cpuinit start_secondary(void *unused)
+ {
+ 	/*
+ 	 * Don't put *anything* before cpu_init(), SMP booting is too
+@@ -1306,7 +1306,7 @@ static void remove_siblinginfo(int cpu)
+ 	cpu_clear(cpu, cpu_sibling_setup_map);
+ }
+ 
+-int additional_cpus __initdata = -1;
++static int additional_cpus __initdata = -1;
+ 
+ static __init int setup_additional_cpus(char *s)
+ {
+diff --git a/arch/x86/kernel/sys_i386_32.c b/arch/x86/kernel/sys_i386_32.c
+index a86d26f..d2ab52c 100644
+--- a/arch/x86/kernel/sys_i386_32.c
++++ b/arch/x86/kernel/sys_i386_32.c
+@@ -22,23 +22,6 @@
+ #include <asm/uaccess.h>
+ #include <asm/unistd.h>
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way Unix traditionally does this, though.
+- */
+-asmlinkage int sys_pipe(unsigned long __user * fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ asmlinkage long sys_mmap2(unsigned long addr, unsigned long len,
+ 			  unsigned long prot, unsigned long flags,
+ 			  unsigned long fd, unsigned long pgoff)
+diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c
+index bd802a5..3b360ef 100644
+--- a/arch/x86/kernel/sys_x86_64.c
++++ b/arch/x86/kernel/sys_x86_64.c
+@@ -17,23 +17,6 @@
+ #include <asm/uaccess.h>
+ #include <asm/ia32.h>
+ 
+-/*
+- * sys_pipe() is the normal C calling standard for creating
+- * a pipe. It's not the way Unix traditionally does this, though.
+- */
+-asmlinkage long sys_pipe(int __user *fildes)
+-{
+-	int fd[2];
+-	int error;
+-
+-	error = do_pipe(fd);
+-	if (!error) {
+-		if (copy_to_user(fildes, fd, 2*sizeof(int)))
+-			error = -EFAULT;
+-	}
+-	return error;
+-}
+-
+ asmlinkage long sys_mmap(unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags,
+ 	unsigned long fd, unsigned long off)
+ {
+diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c
+index 4c943ea..3324d90 100644
+--- a/arch/x86/kvm/i8254.c
++++ b/arch/x86/kvm/i8254.c
+@@ -288,6 +288,8 @@ static void pit_load_count(struct kvm *kvm, int channel, u32 val)
+ 	 * mode 1 is one shot, mode 2 is period, otherwise del timer */
+ 	switch (ps->channels[0].mode) {
+ 	case 1:
++        /* FIXME: enhance mode 4 precision */
++	case 4:
+ 		create_pit_timer(&ps->pit_timer, val, 0);
+ 		break;
+ 	case 2:
+diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
+index 2ad6f54..36c5406 100644
+--- a/arch/x86/kvm/mmu.c
++++ b/arch/x86/kvm/mmu.c
+@@ -79,36 +79,6 @@ static int dbg = 1;
+ 	}
+ #endif
+ 
+-#define PT64_PT_BITS 9
+-#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
+-#define PT32_PT_BITS 10
+-#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
+-
+-#define PT_WRITABLE_SHIFT 1
+-
+-#define PT_PRESENT_MASK (1ULL << 0)
+-#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
+-#define PT_USER_MASK (1ULL << 2)
+-#define PT_PWT_MASK (1ULL << 3)
+-#define PT_PCD_MASK (1ULL << 4)
+-#define PT_ACCESSED_MASK (1ULL << 5)
+-#define PT_DIRTY_MASK (1ULL << 6)
+-#define PT_PAGE_SIZE_MASK (1ULL << 7)
+-#define PT_PAT_MASK (1ULL << 7)
+-#define PT_GLOBAL_MASK (1ULL << 8)
+-#define PT64_NX_SHIFT 63
+-#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
+-
+-#define PT_PAT_SHIFT 7
+-#define PT_DIR_PAT_SHIFT 12
+-#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
+-
+-#define PT32_DIR_PSE36_SIZE 4
+-#define PT32_DIR_PSE36_SHIFT 13
+-#define PT32_DIR_PSE36_MASK \
+-	(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
+-
+-
+ #define PT_FIRST_AVAIL_BITS_SHIFT 9
+ #define PT64_SECOND_AVAIL_BITS_SHIFT 52
+ 
+@@ -154,10 +124,6 @@ static int dbg = 1;
+ #define PFERR_USER_MASK (1U << 2)
+ #define PFERR_FETCH_MASK (1U << 4)
+ 
+-#define PT64_ROOT_LEVEL 4
+-#define PT32_ROOT_LEVEL 2
+-#define PT32E_ROOT_LEVEL 3
+-
+ #define PT_DIRECTORY_LEVEL 2
+ #define PT_PAGE_TABLE_LEVEL 1
+ 
+@@ -186,6 +152,12 @@ static struct kmem_cache *mmu_page_header_cache;
+ 
+ static u64 __read_mostly shadow_trap_nonpresent_pte;
+ static u64 __read_mostly shadow_notrap_nonpresent_pte;
++static u64 __read_mostly shadow_base_present_pte;
++static u64 __read_mostly shadow_nx_mask;
++static u64 __read_mostly shadow_x_mask;	/* mutual exclusive with nx_mask */
++static u64 __read_mostly shadow_user_mask;
++static u64 __read_mostly shadow_accessed_mask;
++static u64 __read_mostly shadow_dirty_mask;
+ 
+ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
+ {
+@@ -194,6 +166,23 @@ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte)
+ }
+ EXPORT_SYMBOL_GPL(kvm_mmu_set_nonpresent_ptes);
+ 
++void kvm_mmu_set_base_ptes(u64 base_pte)
++{
++	shadow_base_present_pte = base_pte;
++}
++EXPORT_SYMBOL_GPL(kvm_mmu_set_base_ptes);
++
++void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
++		u64 dirty_mask, u64 nx_mask, u64 x_mask)
++{
++	shadow_user_mask = user_mask;
++	shadow_accessed_mask = accessed_mask;
++	shadow_dirty_mask = dirty_mask;
++	shadow_nx_mask = nx_mask;
++	shadow_x_mask = x_mask;
++}
++EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes);
++
+ static int is_write_protection(struct kvm_vcpu *vcpu)
+ {
+ 	return vcpu->arch.cr0 & X86_CR0_WP;
+@@ -232,7 +221,7 @@ static int is_writeble_pte(unsigned long pte)
+ 
+ static int is_dirty_pte(unsigned long pte)
+ {
+-	return pte & PT_DIRTY_MASK;
++	return pte & shadow_dirty_mask;
+ }
+ 
+ static int is_rmap_pte(u64 pte)
+@@ -387,7 +376,6 @@ static void account_shadowed(struct kvm *kvm, gfn_t gfn)
+ 
+ 	write_count = slot_largepage_idx(gfn, gfn_to_memslot(kvm, gfn));
+ 	*write_count += 1;
+-	WARN_ON(*write_count > KVM_PAGES_PER_HPAGE);
+ }
+ 
+ static void unaccount_shadowed(struct kvm *kvm, gfn_t gfn)
+@@ -547,7 +535,7 @@ static void rmap_remove(struct kvm *kvm, u64 *spte)
+ 		return;
+ 	sp = page_header(__pa(spte));
+ 	pfn = spte_to_pfn(*spte);
+-	if (*spte & PT_ACCESSED_MASK)
++	if (*spte & shadow_accessed_mask)
+ 		kvm_set_pfn_accessed(pfn);
+ 	if (is_writeble_pte(*spte))
+ 		kvm_release_pfn_dirty(pfn);
+@@ -1073,17 +1061,17 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *shadow_pte,
+ 	 * whether the guest actually used the pte (in order to detect
+ 	 * demand paging).
+ 	 */
+-	spte = PT_PRESENT_MASK | PT_DIRTY_MASK;
++	spte = shadow_base_present_pte | shadow_dirty_mask;
+ 	if (!speculative)
+ 		pte_access |= PT_ACCESSED_MASK;
+ 	if (!dirty)
+ 		pte_access &= ~ACC_WRITE_MASK;
+-	if (!(pte_access & ACC_EXEC_MASK))
+-		spte |= PT64_NX_MASK;
+-
+-	spte |= PT_PRESENT_MASK;
++	if (pte_access & ACC_EXEC_MASK)
++		spte |= shadow_x_mask;
++	else
++		spte |= shadow_nx_mask;
+ 	if (pte_access & ACC_USER_MASK)
+-		spte |= PT_USER_MASK;
++		spte |= shadow_user_mask;
+ 	if (largepage)
+ 		spte |= PT_PAGE_SIZE_MASK;
+ 
+@@ -1188,8 +1176,9 @@ static int __direct_map(struct kvm_vcpu *vcpu, gpa_t v, int write,
+ 				return -ENOMEM;
+ 			}
+ 
+-			table[index] = __pa(new_table->spt) | PT_PRESENT_MASK
+-				| PT_WRITABLE_MASK | PT_USER_MASK;
++			table[index] = __pa(new_table->spt)
++				| PT_PRESENT_MASK | PT_WRITABLE_MASK
++				| shadow_user_mask | shadow_x_mask;
+ 		}
+ 		table_addr = table[index] & PT64_BASE_ADDR_MASK;
+ 	}
+@@ -1244,7 +1233,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
+ 	if (!VALID_PAGE(vcpu->arch.mmu.root_hpa))
+ 		return;
+ 	spin_lock(&vcpu->kvm->mmu_lock);
+-#ifdef CONFIG_X86_64
+ 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ 		hpa_t root = vcpu->arch.mmu.root_hpa;
+ 
+@@ -1256,7 +1244,6 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu)
+ 		spin_unlock(&vcpu->kvm->mmu_lock);
+ 		return;
+ 	}
+-#endif
+ 	for (i = 0; i < 4; ++i) {
+ 		hpa_t root = vcpu->arch.mmu.pae_root[i];
+ 
+@@ -1282,7 +1269,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+ 
+ 	root_gfn = vcpu->arch.cr3 >> PAGE_SHIFT;
+ 
+-#ifdef CONFIG_X86_64
+ 	if (vcpu->arch.mmu.shadow_root_level == PT64_ROOT_LEVEL) {
+ 		hpa_t root = vcpu->arch.mmu.root_hpa;
+ 
+@@ -1297,7 +1283,6 @@ static void mmu_alloc_roots(struct kvm_vcpu *vcpu)
+ 		vcpu->arch.mmu.root_hpa = root;
+ 		return;
+ 	}
+-#endif
+ 	metaphysical = !is_paging(vcpu);
+ 	if (tdp_enabled)
+ 		metaphysical = 1;
+@@ -1377,7 +1362,7 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
+ 	spin_lock(&vcpu->kvm->mmu_lock);
+ 	kvm_mmu_free_some_pages(vcpu);
+ 	r = __direct_map(vcpu, gpa, error_code & PFERR_WRITE_MASK,
+-			 largepage, gfn, pfn, TDP_ROOT_LEVEL);
++			 largepage, gfn, pfn, kvm_x86_ops->get_tdp_level());
+ 	spin_unlock(&vcpu->kvm->mmu_lock);
+ 
+ 	return r;
+@@ -1484,7 +1469,7 @@ static int init_kvm_tdp_mmu(struct kvm_vcpu *vcpu)
+ 	context->page_fault = tdp_page_fault;
+ 	context->free = nonpaging_free;
+ 	context->prefetch_page = nonpaging_prefetch_page;
+-	context->shadow_root_level = TDP_ROOT_LEVEL;
++	context->shadow_root_level = kvm_x86_ops->get_tdp_level();
+ 	context->root_hpa = INVALID_PAGE;
+ 
+ 	if (!is_paging(vcpu)) {
+@@ -1633,7 +1618,7 @@ static bool last_updated_pte_accessed(struct kvm_vcpu *vcpu)
+ {
+ 	u64 *spte = vcpu->arch.last_pte_updated;
+ 
+-	return !!(spte && (*spte & PT_ACCESSED_MASK));
++	return !!(spte && (*spte & shadow_accessed_mask));
+ }
+ 
+ static void mmu_guess_page_from_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
+diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
+index e64e9f5..1730757 100644
+--- a/arch/x86/kvm/mmu.h
++++ b/arch/x86/kvm/mmu.h
+@@ -3,11 +3,38 @@
+ 
+ #include <linux/kvm_host.h>
+ 
+-#ifdef CONFIG_X86_64
+-#define TDP_ROOT_LEVEL PT64_ROOT_LEVEL
+-#else
+-#define TDP_ROOT_LEVEL PT32E_ROOT_LEVEL
+-#endif
++#define PT64_PT_BITS 9
++#define PT64_ENT_PER_PAGE (1 << PT64_PT_BITS)
++#define PT32_PT_BITS 10
++#define PT32_ENT_PER_PAGE (1 << PT32_PT_BITS)
++
++#define PT_WRITABLE_SHIFT 1
++
++#define PT_PRESENT_MASK (1ULL << 0)
++#define PT_WRITABLE_MASK (1ULL << PT_WRITABLE_SHIFT)
++#define PT_USER_MASK (1ULL << 2)
++#define PT_PWT_MASK (1ULL << 3)
++#define PT_PCD_MASK (1ULL << 4)
++#define PT_ACCESSED_MASK (1ULL << 5)
++#define PT_DIRTY_MASK (1ULL << 6)
++#define PT_PAGE_SIZE_MASK (1ULL << 7)
++#define PT_PAT_MASK (1ULL << 7)
++#define PT_GLOBAL_MASK (1ULL << 8)
++#define PT64_NX_SHIFT 63
++#define PT64_NX_MASK (1ULL << PT64_NX_SHIFT)
++
++#define PT_PAT_SHIFT 7
++#define PT_DIR_PAT_SHIFT 12
++#define PT_DIR_PAT_MASK (1ULL << PT_DIR_PAT_SHIFT)
++
++#define PT32_DIR_PSE36_SIZE 4
++#define PT32_DIR_PSE36_SHIFT 13
++#define PT32_DIR_PSE36_MASK \
++	(((1ULL << PT32_DIR_PSE36_SIZE) - 1) << PT32_DIR_PSE36_SHIFT)
++
++#define PT64_ROOT_LEVEL 4
++#define PT32_ROOT_LEVEL 2
++#define PT32E_ROOT_LEVEL 3
+ 
+ static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
+ {
+diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
+index 89e0be2..ab22615 100644
+--- a/arch/x86/kvm/svm.c
++++ b/arch/x86/kvm/svm.c
+@@ -1863,6 +1863,15 @@ static bool svm_cpu_has_accelerated_tpr(void)
+ 	return false;
+ }
+ 
++static int get_npt_level(void)
++{
++#ifdef CONFIG_X86_64
++	return PT64_ROOT_LEVEL;
++#else
++	return PT32E_ROOT_LEVEL;
++#endif
++}
++
+ static struct kvm_x86_ops svm_x86_ops = {
+ 	.cpu_has_kvm_support = has_svm,
+ 	.disabled_by_bios = is_disabled,
+@@ -1920,6 +1929,7 @@ static struct kvm_x86_ops svm_x86_ops = {
+ 	.inject_pending_vectors = do_interrupt_requests,
+ 
+ 	.set_tss_addr = svm_set_tss_addr,
++	.get_tdp_level = get_npt_level,
+ };
+ 
+ static int __init svm_init(void)
+diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
+index 8e5d664..bfe4db1 100644
+--- a/arch/x86/kvm/vmx.c
++++ b/arch/x86/kvm/vmx.c
+@@ -42,6 +42,9 @@ module_param(enable_vpid, bool, 0);
+ static int flexpriority_enabled = 1;
+ module_param(flexpriority_enabled, bool, 0);
+ 
++static int enable_ept = 1;
++module_param(enable_ept, bool, 0);
++
+ struct vmcs {
+ 	u32 revision_id;
+ 	u32 abort;
+@@ -84,7 +87,7 @@ static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
+ 	return container_of(vcpu, struct vcpu_vmx, vcpu);
+ }
+ 
+-static int init_rmode_tss(struct kvm *kvm);
++static int init_rmode(struct kvm *kvm);
+ 
+ static DEFINE_PER_CPU(struct vmcs *, vmxarea);
+ static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
+@@ -107,6 +110,11 @@ static struct vmcs_config {
+ 	u32 vmentry_ctrl;
+ } vmcs_config;
+ 
++struct vmx_capability {
++	u32 ept;
++	u32 vpid;
++} vmx_capability;
++
+ #define VMX_SEGMENT_FIELD(seg)					\
+ 	[VCPU_SREG_##seg] = {                                   \
+ 		.selector = GUEST_##seg##_SELECTOR,		\
+@@ -214,6 +222,32 @@ static inline bool cpu_has_vmx_virtualize_apic_accesses(void)
+ 		    SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES);
+ }
+ 
++static inline int cpu_has_vmx_invept_individual_addr(void)
++{
++	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_INDIVIDUAL_BIT));
++}
++
++static inline int cpu_has_vmx_invept_context(void)
++{
++	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_CONTEXT_BIT));
++}
++
++static inline int cpu_has_vmx_invept_global(void)
++{
++	return (!!(vmx_capability.ept & VMX_EPT_EXTENT_GLOBAL_BIT));
++}
++
++static inline int cpu_has_vmx_ept(void)
++{
++	return (vmcs_config.cpu_based_2nd_exec_ctrl &
++		SECONDARY_EXEC_ENABLE_EPT);
++}
++
++static inline int vm_need_ept(void)
++{
++	return (cpu_has_vmx_ept() && enable_ept);
++}
++
+ static inline int vm_need_virtualize_apic_accesses(struct kvm *kvm)
+ {
+ 	return ((cpu_has_vmx_virtualize_apic_accesses()) &&
+@@ -250,6 +284,18 @@ static inline void __invvpid(int ext, u16 vpid, gva_t gva)
+ 		  : : "a"(&operand), "c"(ext) : "cc", "memory");
+ }
+ 
++static inline void __invept(int ext, u64 eptp, gpa_t gpa)
++{
++	struct {
++		u64 eptp, gpa;
++	} operand = {eptp, gpa};
++
++	asm volatile (ASM_VMX_INVEPT
++			/* CF==1 or ZF==1 --> rc = -1 */
++			"; ja 1f ; ud2 ; 1:\n"
++			: : "a" (&operand), "c" (ext) : "cc", "memory");
++}
++
+ static struct kvm_msr_entry *find_msr_entry(struct vcpu_vmx *vmx, u32 msr)
+ {
+ 	int i;
+@@ -301,6 +347,33 @@ static inline void vpid_sync_vcpu_all(struct vcpu_vmx *vmx)
+ 	__invvpid(VMX_VPID_EXTENT_SINGLE_CONTEXT, vmx->vpid, 0);
+ }
+ 
++static inline void ept_sync_global(void)
++{
++	if (cpu_has_vmx_invept_global())
++		__invept(VMX_EPT_EXTENT_GLOBAL, 0, 0);
++}
++
++static inline void ept_sync_context(u64 eptp)
++{
++	if (vm_need_ept()) {
++		if (cpu_has_vmx_invept_context())
++			__invept(VMX_EPT_EXTENT_CONTEXT, eptp, 0);
++		else
++			ept_sync_global();
++	}
++}
++
++static inline void ept_sync_individual_addr(u64 eptp, gpa_t gpa)
++{
++	if (vm_need_ept()) {
++		if (cpu_has_vmx_invept_individual_addr())
++			__invept(VMX_EPT_EXTENT_INDIVIDUAL_ADDR,
++					eptp, gpa);
++		else
++			ept_sync_context(eptp);
++	}
++}
++
+ static unsigned long vmcs_readl(unsigned long field)
+ {
+ 	unsigned long value;
+@@ -388,6 +461,8 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
+ 		eb |= 1u << 1;
+ 	if (vcpu->arch.rmode.active)
+ 		eb = ~0;
++	if (vm_need_ept())
++		eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
+ 	vmcs_write32(EXCEPTION_BITMAP, eb);
+ }
+ 
+@@ -985,7 +1060,7 @@ static __init int adjust_vmx_controls(u32 ctl_min, u32 ctl_opt,
+ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
+ {
+ 	u32 vmx_msr_low, vmx_msr_high;
+-	u32 min, opt;
++	u32 min, opt, min2, opt2;
+ 	u32 _pin_based_exec_control = 0;
+ 	u32 _cpu_based_exec_control = 0;
+ 	u32 _cpu_based_2nd_exec_control = 0;
+@@ -1003,6 +1078,8 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
+ 	      CPU_BASED_CR8_LOAD_EXITING |
+ 	      CPU_BASED_CR8_STORE_EXITING |
+ #endif
++	      CPU_BASED_CR3_LOAD_EXITING |
++	      CPU_BASED_CR3_STORE_EXITING |
+ 	      CPU_BASED_USE_IO_BITMAPS |
+ 	      CPU_BASED_MOV_DR_EXITING |
+ 	      CPU_BASED_USE_TSC_OFFSETING;
+@@ -1018,11 +1095,13 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
+ 					   ~CPU_BASED_CR8_STORE_EXITING;
+ #endif
+ 	if (_cpu_based_exec_control & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) {
+-		min = 0;
+-		opt = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
++		min2 = 0;
++		opt2 = SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+ 			SECONDARY_EXEC_WBINVD_EXITING |
+-			SECONDARY_EXEC_ENABLE_VPID;
+-		if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS2,
++			SECONDARY_EXEC_ENABLE_VPID |
++			SECONDARY_EXEC_ENABLE_EPT;
++		if (adjust_vmx_controls(min2, opt2,
++					MSR_IA32_VMX_PROCBASED_CTLS2,
+ 					&_cpu_based_2nd_exec_control) < 0)
+ 			return -EIO;
+ 	}
+@@ -1031,6 +1110,16 @@ static __init int setup_vmcs_config(struct vmcs_config *vmcs_conf)
+ 				SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES))
+ 		_cpu_based_exec_control &= ~CPU_BASED_TPR_SHADOW;
+ #endif
++	if (_cpu_based_2nd_exec_control & SECONDARY_EXEC_ENABLE_EPT) {
++		/* CR3 accesses don't need to cause VM Exits when EPT enabled */
++		min &= ~(CPU_BASED_CR3_LOAD_EXITING |
++			 CPU_BASED_CR3_STORE_EXITING);
++		if (adjust_vmx_controls(min, opt, MSR_IA32_VMX_PROCBASED_CTLS,
++					&_cpu_based_exec_control) < 0)
++			return -EIO;
++		rdmsr(MSR_IA32_VMX_EPT_VPID_CAP,
++		      vmx_capability.ept, vmx_capability.vpid);
++	}
+ 
+ 	min = 0;
+ #ifdef CONFIG_X86_64
+@@ -1256,7 +1345,7 @@ static void enter_rmode(struct kvm_vcpu *vcpu)
+ 	fix_rmode_seg(VCPU_SREG_FS, &vcpu->arch.rmode.fs);
+ 
+ 	kvm_mmu_reset_context(vcpu);
+-	init_rmode_tss(vcpu->kvm);
++	init_rmode(vcpu->kvm);
+ }
+ 
+ #ifdef CONFIG_X86_64
+@@ -1304,8 +1393,64 @@ static void vmx_decache_cr4_guest_bits(struct kvm_vcpu *vcpu)
+ 	vcpu->arch.cr4 |= vmcs_readl(GUEST_CR4) & ~KVM_GUEST_CR4_MASK;
+ }
+ 
++static void ept_load_pdptrs(struct kvm_vcpu *vcpu)
++{
++	if (is_paging(vcpu) && is_pae(vcpu) && !is_long_mode(vcpu)) {
++		if (!load_pdptrs(vcpu, vcpu->arch.cr3)) {
++			printk(KERN_ERR "EPT: Fail to load pdptrs!\n");
++			return;
++		}
++		vmcs_write64(GUEST_PDPTR0, vcpu->arch.pdptrs[0]);
++		vmcs_write64(GUEST_PDPTR1, vcpu->arch.pdptrs[1]);
++		vmcs_write64(GUEST_PDPTR2, vcpu->arch.pdptrs[2]);
++		vmcs_write64(GUEST_PDPTR3, vcpu->arch.pdptrs[3]);
++	}
++}
++
++static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4);
++
++static void ept_update_paging_mode_cr0(unsigned long *hw_cr0,
++					unsigned long cr0,
++					struct kvm_vcpu *vcpu)
++{
++	if (!(cr0 & X86_CR0_PG)) {
++		/* From paging/starting to nonpaging */
++		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
++			     vmcs_config.cpu_based_exec_ctrl |
++			     (CPU_BASED_CR3_LOAD_EXITING |
++			      CPU_BASED_CR3_STORE_EXITING));
++		vcpu->arch.cr0 = cr0;
++		vmx_set_cr4(vcpu, vcpu->arch.cr4);
++		*hw_cr0 |= X86_CR0_PE | X86_CR0_PG;
++		*hw_cr0 &= ~X86_CR0_WP;
++	} else if (!is_paging(vcpu)) {
++		/* From nonpaging to paging */
++		vmcs_write32(CPU_BASED_VM_EXEC_CONTROL,
++			     vmcs_config.cpu_based_exec_ctrl &
++			     ~(CPU_BASED_CR3_LOAD_EXITING |
++			       CPU_BASED_CR3_STORE_EXITING));
++		vcpu->arch.cr0 = cr0;
++		vmx_set_cr4(vcpu, vcpu->arch.cr4);
++		if (!(vcpu->arch.cr0 & X86_CR0_WP))
++			*hw_cr0 &= ~X86_CR0_WP;
++	}
++}
++
++static void ept_update_paging_mode_cr4(unsigned long *hw_cr4,
++					struct kvm_vcpu *vcpu)
++{
++	if (!is_paging(vcpu)) {
++		*hw_cr4 &= ~X86_CR4_PAE;
++		*hw_cr4 |= X86_CR4_PSE;
++	} else if (!(vcpu->arch.cr4 & X86_CR4_PAE))
++		*hw_cr4 &= ~X86_CR4_PAE;
++}
++
+ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ {
++	unsigned long hw_cr0 = (cr0 & ~KVM_GUEST_CR0_MASK) |
++				KVM_VM_CR0_ALWAYS_ON;
++
+ 	vmx_fpu_deactivate(vcpu);
+ 
+ 	if (vcpu->arch.rmode.active && (cr0 & X86_CR0_PE))
+@@ -1323,29 +1468,61 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0)
+ 	}
+ #endif
+ 
++	if (vm_need_ept())
++		ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
++
+ 	vmcs_writel(CR0_READ_SHADOW, cr0);
+-	vmcs_writel(GUEST_CR0,
+-		    (cr0 & ~KVM_GUEST_CR0_MASK) | KVM_VM_CR0_ALWAYS_ON);
++	vmcs_writel(GUEST_CR0, hw_cr0);
+ 	vcpu->arch.cr0 = cr0;
+ 
+ 	if (!(cr0 & X86_CR0_TS) || !(cr0 & X86_CR0_PE))
+ 		vmx_fpu_activate(vcpu);
+ }
+ 
++static u64 construct_eptp(unsigned long root_hpa)
++{
++	u64 eptp;
++
++	/* TODO write the value reading from MSR */
++	eptp = VMX_EPT_DEFAULT_MT |
++		VMX_EPT_DEFAULT_GAW << VMX_EPT_GAW_EPTP_SHIFT;
++	eptp |= (root_hpa & PAGE_MASK);
++
++	return eptp;
++}
++
+ static void vmx_set_cr3(struct kvm_vcpu *vcpu, unsigned long cr3)
+ {
++	unsigned long guest_cr3;
++	u64 eptp;
++
++	guest_cr3 = cr3;
++	if (vm_need_ept()) {
++		eptp = construct_eptp(cr3);
++		vmcs_write64(EPT_POINTER, eptp);
++		ept_sync_context(eptp);
++		ept_load_pdptrs(vcpu);
++		guest_cr3 = is_paging(vcpu) ? vcpu->arch.cr3 :
++			VMX_EPT_IDENTITY_PAGETABLE_ADDR;
++	}
++
+ 	vmx_flush_tlb(vcpu);
+-	vmcs_writel(GUEST_CR3, cr3);
++	vmcs_writel(GUEST_CR3, guest_cr3);
+ 	if (vcpu->arch.cr0 & X86_CR0_PE)
+ 		vmx_fpu_deactivate(vcpu);
+ }
+ 
+ static void vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
+ {
+-	vmcs_writel(CR4_READ_SHADOW, cr4);
+-	vmcs_writel(GUEST_CR4, cr4 | (vcpu->arch.rmode.active ?
+-		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON));
++	unsigned long hw_cr4 = cr4 | (vcpu->arch.rmode.active ?
++		    KVM_RMODE_VM_CR4_ALWAYS_ON : KVM_PMODE_VM_CR4_ALWAYS_ON);
++
+ 	vcpu->arch.cr4 = cr4;
++	if (vm_need_ept())
++		ept_update_paging_mode_cr4(&hw_cr4, vcpu);
++
++	vmcs_writel(CR4_READ_SHADOW, cr4);
++	vmcs_writel(GUEST_CR4, hw_cr4);
+ }
+ 
+ static void vmx_set_efer(struct kvm_vcpu *vcpu, u64 efer)
+@@ -1530,6 +1707,41 @@ out:
+ 	return ret;
+ }
+ 
++static int init_rmode_identity_map(struct kvm *kvm)
++{
++	int i, r, ret;
++	pfn_t identity_map_pfn;
++	u32 tmp;
++
++	if (!vm_need_ept())
++		return 1;
++	if (unlikely(!kvm->arch.ept_identity_pagetable)) {
++		printk(KERN_ERR "EPT: identity-mapping pagetable "
++			"haven't been allocated!\n");
++		return 0;
++	}
++	if (likely(kvm->arch.ept_identity_pagetable_done))
++		return 1;
++	ret = 0;
++	identity_map_pfn = VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT;
++	r = kvm_clear_guest_page(kvm, identity_map_pfn, 0, PAGE_SIZE);
++	if (r < 0)
++		goto out;
++	/* Set up identity-mapping pagetable for EPT in real mode */
++	for (i = 0; i < PT32_ENT_PER_PAGE; i++) {
++		tmp = (i << 22) + (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER |
++			_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_PSE);
++		r = kvm_write_guest_page(kvm, identity_map_pfn,
++				&tmp, i * sizeof(tmp), sizeof(tmp));
++		if (r < 0)
++			goto out;
++	}
++	kvm->arch.ept_identity_pagetable_done = true;
++	ret = 1;
++out:
++	return ret;
++}
++
+ static void seg_setup(int seg)
+ {
+ 	struct kvm_vmx_segment_field *sf = &kvm_vmx_segment_fields[seg];
+@@ -1564,6 +1776,31 @@ out:
+ 	return r;
+ }
+ 
++static int alloc_identity_pagetable(struct kvm *kvm)
++{
++	struct kvm_userspace_memory_region kvm_userspace_mem;
++	int r = 0;
++
++	down_write(&kvm->slots_lock);
++	if (kvm->arch.ept_identity_pagetable)
++		goto out;
++	kvm_userspace_mem.slot = IDENTITY_PAGETABLE_PRIVATE_MEMSLOT;
++	kvm_userspace_mem.flags = 0;
++	kvm_userspace_mem.guest_phys_addr = VMX_EPT_IDENTITY_PAGETABLE_ADDR;
++	kvm_userspace_mem.memory_size = PAGE_SIZE;
++	r = __kvm_set_memory_region(kvm, &kvm_userspace_mem, 0);
++	if (r)
++		goto out;
++
++	down_read(&current->mm->mmap_sem);
++	kvm->arch.ept_identity_pagetable = gfn_to_page(kvm,
++			VMX_EPT_IDENTITY_PAGETABLE_ADDR >> PAGE_SHIFT);
++	up_read(&current->mm->mmap_sem);
++out:
++	up_write(&kvm->slots_lock);
++	return r;
++}
++
+ static void allocate_vpid(struct vcpu_vmx *vmx)
+ {
+ 	int vpid;
+@@ -1638,6 +1875,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ 				CPU_BASED_CR8_LOAD_EXITING;
+ #endif
+ 	}
++	if (!vm_need_ept())
++		exec_control |= CPU_BASED_CR3_STORE_EXITING |
++				CPU_BASED_CR3_LOAD_EXITING;
+ 	vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, exec_control);
+ 
+ 	if (cpu_has_secondary_exec_ctrls()) {
+@@ -1647,6 +1887,8 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ 				~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ 		if (vmx->vpid == 0)
+ 			exec_control &= ~SECONDARY_EXEC_ENABLE_VPID;
++		if (!vm_need_ept())
++			exec_control &= ~SECONDARY_EXEC_ENABLE_EPT;
+ 		vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control);
+ 	}
+ 
+@@ -1722,6 +1964,15 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
+ 	return 0;
+ }
+ 
++static int init_rmode(struct kvm *kvm)
++{
++	if (!init_rmode_tss(kvm))
++		return 0;
++	if (!init_rmode_identity_map(kvm))
++		return 0;
++	return 1;
++}
++
+ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
+ {
+ 	struct vcpu_vmx *vmx = to_vmx(vcpu);
+@@ -1729,7 +1980,7 @@ static int vmx_vcpu_reset(struct kvm_vcpu *vcpu)
+ 	int ret;
+ 
+ 	down_read(&vcpu->kvm->slots_lock);
+-	if (!init_rmode_tss(vmx->vcpu.kvm)) {
++	if (!init_rmode(vmx->vcpu.kvm)) {
+ 		ret = -ENOMEM;
+ 		goto out;
+ 	}
+@@ -1994,6 +2245,9 @@ static int handle_exception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+ 	if (intr_info & INTR_INFO_DELIVER_CODE_MASK)
+ 		error_code = vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
+ 	if (is_page_fault(intr_info)) {
++		/* EPT won't cause page fault directly */
++		if (vm_need_ept())
++			BUG();
+ 		cr2 = vmcs_readl(EXIT_QUALIFICATION);
+ 		KVMTRACE_3D(PAGE_FAULT, vcpu, error_code, (u32)cr2,
+ 			    (u32)((u64)cr2 >> 32), handler);
+@@ -2323,6 +2577,64 @@ static int handle_task_switch(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
+ 	return kvm_task_switch(vcpu, tss_selector, reason);
+ }
+ 
++static int handle_ept_violation(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
++{
++	u64 exit_qualification;
++	enum emulation_result er;
++	gpa_t gpa;
++	unsigned long hva;
++	int gla_validity;
++	int r;
++
++	exit_qualification = vmcs_read64(EXIT_QUALIFICATION);
++
++	if (exit_qualification & (1 << 6)) {
++		printk(KERN_ERR "EPT: GPA exceeds GAW!\n");
++		return -ENOTSUPP;
++	}
++
++	gla_validity = (exit_qualification >> 7) & 0x3;
++	if (gla_validity != 0x3 && gla_validity != 0x1 && gla_validity != 0) {
++		printk(KERN_ERR "EPT: Handling EPT violation failed!\n");
++		printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
++			(long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
++			(long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
++		printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
++			(long unsigned int)exit_qualification);
++		kvm_run->exit_reason = KVM_EXIT_UNKNOWN;
++		kvm_run->hw.hardware_exit_reason = 0;
++		return -ENOTSUPP;
++	}
++
++	gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS);
++	hva = gfn_to_hva(vcpu->kvm, gpa >> PAGE_SHIFT);
++	if (!kvm_is_error_hva(hva)) {
++		r = kvm_mmu_page_fault(vcpu, gpa & PAGE_MASK, 0);
++		if (r < 0) {
++			printk(KERN_ERR "EPT: Not enough memory!\n");
++			return -ENOMEM;
++		}
++		return 1;
++	} else {
++		/* must be MMIO */
++		er = emulate_instruction(vcpu, kvm_run, 0, 0, 0);
++
++		if (er == EMULATE_FAIL) {
++			printk(KERN_ERR
++			 "EPT: Fail to handle EPT violation vmexit!er is %d\n",
++			 er);
++			printk(KERN_ERR "EPT: GPA: 0x%lx, GVA: 0x%lx\n",
++			 (long unsigned int)vmcs_read64(GUEST_PHYSICAL_ADDRESS),
++			 (long unsigned int)vmcs_read64(GUEST_LINEAR_ADDRESS));
++			printk(KERN_ERR "EPT: Exit qualification is 0x%lx\n",
++				(long unsigned int)exit_qualification);
++			return -ENOTSUPP;
++		} else if (er == EMULATE_DO_MMIO)
++			return 0;
++	}
++	return 1;
++}
++
+ /*
+  * The exit handlers return 1 if the exit was handled fully and guest execution
+  * may resume.  Otherwise they set the kvm_run parameter to indicate what needs
+@@ -2346,6 +2658,7 @@ static int (*kvm_vmx_exit_handlers[])(struct kvm_vcpu *vcpu,
+ 	[EXIT_REASON_APIC_ACCESS]             = handle_apic_access,
+ 	[EXIT_REASON_WBINVD]                  = handle_wbinvd,
+ 	[EXIT_REASON_TASK_SWITCH]             = handle_task_switch,
++	[EXIT_REASON_EPT_VIOLATION]	      = handle_ept_violation,
+ };
+ 
+ static const int kvm_vmx_max_exit_handlers =
+@@ -2364,6 +2677,13 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+ 	KVMTRACE_3D(VMEXIT, vcpu, exit_reason, (u32)vmcs_readl(GUEST_RIP),
+ 		    (u32)((u64)vmcs_readl(GUEST_RIP) >> 32), entryexit);
+ 
++	/* Access CR3 don't cause VMExit in paging mode, so we need
++	 * to sync with guest real CR3. */
++	if (vm_need_ept() && is_paging(vcpu)) {
++		vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
++		ept_load_pdptrs(vcpu);
++	}
++
+ 	if (unlikely(vmx->fail)) {
+ 		kvm_run->exit_reason = KVM_EXIT_FAIL_ENTRY;
+ 		kvm_run->fail_entry.hardware_entry_failure_reason
+@@ -2372,7 +2692,8 @@ static int kvm_handle_exit(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu)
+ 	}
+ 
+ 	if ((vectoring_info & VECTORING_INFO_VALID_MASK) &&
+-				exit_reason != EXIT_REASON_EXCEPTION_NMI)
++			(exit_reason != EXIT_REASON_EXCEPTION_NMI &&
++			exit_reason != EXIT_REASON_EPT_VIOLATION))
+ 		printk(KERN_WARNING "%s: unexpected, valid vectoring info and "
+ 		       "exit reason is 0x%x\n", __func__, exit_reason);
+ 	if (exit_reason < kvm_vmx_max_exit_handlers
+@@ -2674,6 +2995,15 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+ 		return ERR_PTR(-ENOMEM);
+ 
+ 	allocate_vpid(vmx);
++	if (id == 0 && vm_need_ept()) {
++		kvm_mmu_set_base_ptes(VMX_EPT_READABLE_MASK |
++			VMX_EPT_WRITABLE_MASK |
++			VMX_EPT_DEFAULT_MT << VMX_EPT_MT_EPTE_SHIFT);
++		kvm_mmu_set_mask_ptes(0ull, VMX_EPT_FAKE_ACCESSED_MASK,
++				VMX_EPT_FAKE_DIRTY_MASK, 0ull,
++				VMX_EPT_EXECUTABLE_MASK);
++		kvm_enable_tdp();
++	}
+ 
+ 	err = kvm_vcpu_init(&vmx->vcpu, kvm, id);
+ 	if (err)
+@@ -2706,6 +3036,10 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, unsigned int id)
+ 		if (alloc_apic_access_page(kvm) != 0)
+ 			goto free_vmcs;
+ 
++	if (vm_need_ept())
++		if (alloc_identity_pagetable(kvm) != 0)
++			goto free_vmcs;
++
+ 	return &vmx->vcpu;
+ 
+ free_vmcs:
+@@ -2735,6 +3069,11 @@ static void __init vmx_check_processor_compat(void *rtn)
+ 	}
+ }
+ 
++static int get_ept_level(void)
++{
++	return VMX_EPT_DEFAULT_GAW + 1;
++}
++
+ static struct kvm_x86_ops vmx_x86_ops = {
+ 	.cpu_has_kvm_support = cpu_has_kvm_support,
+ 	.disabled_by_bios = vmx_disabled_by_bios,
+@@ -2791,6 +3130,7 @@ static struct kvm_x86_ops vmx_x86_ops = {
+ 	.inject_pending_vectors = do_interrupt_requests,
+ 
+ 	.set_tss_addr = vmx_set_tss_addr,
++	.get_tdp_level = get_ept_level,
+ };
+ 
+ static int __init vmx_init(void)
+@@ -2843,9 +3183,14 @@ static int __init vmx_init(void)
+ 	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_ESP);
+ 	vmx_disable_intercept_for_msr(vmx_msr_bitmap, MSR_IA32_SYSENTER_EIP);
+ 
++	if (cpu_has_vmx_ept())
++		bypass_guest_pf = 0;
++
+ 	if (bypass_guest_pf)
+ 		kvm_mmu_set_nonpresent_ptes(~0xffeull, 0ull);
+ 
++	ept_sync_global();
++
+ 	return 0;
+ 
+ out2:
+diff --git a/arch/x86/kvm/vmx.h b/arch/x86/kvm/vmx.h
+index 5dff460..79d94c6 100644
+--- a/arch/x86/kvm/vmx.h
++++ b/arch/x86/kvm/vmx.h
+@@ -35,6 +35,8 @@
+ #define CPU_BASED_MWAIT_EXITING                 0x00000400
+ #define CPU_BASED_RDPMC_EXITING                 0x00000800
+ #define CPU_BASED_RDTSC_EXITING                 0x00001000
++#define CPU_BASED_CR3_LOAD_EXITING		0x00008000
++#define CPU_BASED_CR3_STORE_EXITING		0x00010000
+ #define CPU_BASED_CR8_LOAD_EXITING              0x00080000
+ #define CPU_BASED_CR8_STORE_EXITING             0x00100000
+ #define CPU_BASED_TPR_SHADOW                    0x00200000
+@@ -49,6 +51,7 @@
+  * Definitions of Secondary Processor-Based VM-Execution Controls.
+  */
+ #define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
++#define SECONDARY_EXEC_ENABLE_EPT               0x00000002
+ #define SECONDARY_EXEC_ENABLE_VPID              0x00000020
+ #define SECONDARY_EXEC_WBINVD_EXITING		0x00000040
+ 
+@@ -100,10 +103,22 @@ enum vmcs_field {
+ 	VIRTUAL_APIC_PAGE_ADDR_HIGH     = 0x00002013,
+ 	APIC_ACCESS_ADDR		= 0x00002014,
+ 	APIC_ACCESS_ADDR_HIGH		= 0x00002015,
++	EPT_POINTER                     = 0x0000201a,
++	EPT_POINTER_HIGH                = 0x0000201b,
++	GUEST_PHYSICAL_ADDRESS          = 0x00002400,
++	GUEST_PHYSICAL_ADDRESS_HIGH     = 0x00002401,
+ 	VMCS_LINK_POINTER               = 0x00002800,
+ 	VMCS_LINK_POINTER_HIGH          = 0x00002801,
+ 	GUEST_IA32_DEBUGCTL             = 0x00002802,
+ 	GUEST_IA32_DEBUGCTL_HIGH        = 0x00002803,
++	GUEST_PDPTR0                    = 0x0000280a,
++	GUEST_PDPTR0_HIGH               = 0x0000280b,
++	GUEST_PDPTR1                    = 0x0000280c,
++	GUEST_PDPTR1_HIGH               = 0x0000280d,
++	GUEST_PDPTR2                    = 0x0000280e,
++	GUEST_PDPTR2_HIGH               = 0x0000280f,
++	GUEST_PDPTR3                    = 0x00002810,
++	GUEST_PDPTR3_HIGH               = 0x00002811,
+ 	PIN_BASED_VM_EXEC_CONTROL       = 0x00004000,
+ 	CPU_BASED_VM_EXEC_CONTROL       = 0x00004002,
+ 	EXCEPTION_BITMAP                = 0x00004004,
+@@ -226,6 +241,8 @@ enum vmcs_field {
+ #define EXIT_REASON_MWAIT_INSTRUCTION   36
+ #define EXIT_REASON_TPR_BELOW_THRESHOLD 43
+ #define EXIT_REASON_APIC_ACCESS         44
++#define EXIT_REASON_EPT_VIOLATION       48
++#define EXIT_REASON_EPT_MISCONFIG       49
+ #define EXIT_REASON_WBINVD		54
+ 
+ /*
+@@ -316,15 +333,36 @@ enum vmcs_field {
+ #define MSR_IA32_VMX_CR4_FIXED1                 0x489
+ #define MSR_IA32_VMX_VMCS_ENUM                  0x48a
+ #define MSR_IA32_VMX_PROCBASED_CTLS2            0x48b
++#define MSR_IA32_VMX_EPT_VPID_CAP               0x48c
+ 
+ #define MSR_IA32_FEATURE_CONTROL                0x3a
+ #define MSR_IA32_FEATURE_CONTROL_LOCKED         0x1
+ #define MSR_IA32_FEATURE_CONTROL_VMXON_ENABLED  0x4
+ 
+ #define APIC_ACCESS_PAGE_PRIVATE_MEMSLOT	9
++#define IDENTITY_PAGETABLE_PRIVATE_MEMSLOT	10
+ 
+ #define VMX_NR_VPIDS				(1 << 16)
+ #define VMX_VPID_EXTENT_SINGLE_CONTEXT		1
+ #define VMX_VPID_EXTENT_ALL_CONTEXT		2
+ 
++#define VMX_EPT_EXTENT_INDIVIDUAL_ADDR		0
++#define VMX_EPT_EXTENT_CONTEXT			1
++#define VMX_EPT_EXTENT_GLOBAL			2
++#define VMX_EPT_EXTENT_INDIVIDUAL_BIT		(1ull << 24)
++#define VMX_EPT_EXTENT_CONTEXT_BIT		(1ull << 25)
++#define VMX_EPT_EXTENT_GLOBAL_BIT		(1ull << 26)
++#define VMX_EPT_DEFAULT_GAW			3
++#define VMX_EPT_MAX_GAW				0x4
++#define VMX_EPT_MT_EPTE_SHIFT			3
++#define VMX_EPT_GAW_EPTP_SHIFT			3
++#define VMX_EPT_DEFAULT_MT			0x6ull
++#define VMX_EPT_READABLE_MASK			0x1ull
++#define VMX_EPT_WRITABLE_MASK			0x2ull
++#define VMX_EPT_EXECUTABLE_MASK			0x4ull
++#define VMX_EPT_FAKE_ACCESSED_MASK		(1ull << 62)
++#define VMX_EPT_FAKE_DIRTY_MASK			(1ull << 63)
++
++#define VMX_EPT_IDENTITY_PAGETABLE_ADDR		0xfffbc000ul
++
+ #endif
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 0ce5563..21338bd 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2417,6 +2417,9 @@ int kvm_arch_init(void *opaque)
+ 
+ 	kvm_x86_ops = ops;
+ 	kvm_mmu_set_nonpresent_ptes(0ull, 0ull);
++	kvm_mmu_set_base_ptes(PT_PRESENT_MASK);
++	kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
++			PT_DIRTY_MASK, PT64_NX_MASK, 0);
+ 	return 0;
+ 
+ out:
+@@ -3019,6 +3022,8 @@ int kvm_arch_vcpu_ioctl_set_regs(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+ 
+ 	kvm_x86_ops->decache_regs(vcpu);
+ 
++	vcpu->arch.exception.pending = false;
++
+ 	vcpu_put(vcpu);
+ 
+ 	return 0;
+@@ -3481,7 +3486,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
+ 	}
+ 
+ 	if (reason == TASK_SWITCH_IRET || reason == TASK_SWITCH_JMP) {
+-		cseg_desc.type &= ~(1 << 8); //clear the B flag
++		cseg_desc.type &= ~(1 << 1); //clear the B flag
+ 		save_guest_segment_descriptor(vcpu, tr_seg.selector,
+ 					      &cseg_desc);
+ 	}
+@@ -3507,7 +3512,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
+ 	}
+ 
+ 	if (reason != TASK_SWITCH_IRET) {
+-		nseg_desc.type |= (1 << 8);
++		nseg_desc.type |= (1 << 1);
+ 		save_guest_segment_descriptor(vcpu, tss_selector,
+ 					      &nseg_desc);
+ 	}
+@@ -3698,10 +3703,19 @@ void fx_init(struct kvm_vcpu *vcpu)
+ {
+ 	unsigned after_mxcsr_mask;
+ 
++	/*
++	 * Touch the fpu the first time in non atomic context as if
++	 * this is the first fpu instruction the exception handler
++	 * will fire before the instruction returns and it'll have to
++	 * allocate ram with GFP_KERNEL.
++	 */
++	if (!used_math())
++		fx_save(&vcpu->arch.host_fx_image);
++
+ 	/* Initialize guest FPU by resetting ours and saving into guest's */
+ 	preempt_disable();
+ 	fx_save(&vcpu->arch.host_fx_image);
+-	fpu_init();
++	fx_finit();
+ 	fx_save(&vcpu->arch.guest_fx_image);
+ 	fx_restore(&vcpu->arch.host_fx_image);
+ 	preempt_enable();
+@@ -3906,6 +3920,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
+ 	kvm_free_physmem(kvm);
+ 	if (kvm->arch.apic_access_page)
+ 		put_page(kvm->arch.apic_access_page);
++	if (kvm->arch.ept_identity_pagetable)
++		put_page(kvm->arch.ept_identity_pagetable);
+ 	kfree(kvm);
+ }
+ 
+diff --git a/arch/x86/kvm/x86_emulate.c b/arch/x86/kvm/x86_emulate.c
+index 2ca0838..f2a696d 100644
+--- a/arch/x86/kvm/x86_emulate.c
++++ b/arch/x86/kvm/x86_emulate.c
+@@ -1761,6 +1761,7 @@ twobyte_insn:
+ 		case 6: /* lmsw */
+ 			realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
+ 				      &ctxt->eflags);
++			c->dst.type = OP_NONE;
+ 			break;
+ 		case 7: /* invlpg*/
+ 			emulate_invlpg(ctxt->vcpu, memop);
+diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c
+index 1837885..914ccf9 100644
+--- a/arch/x86/mm/discontig_32.c
++++ b/arch/x86/mm/discontig_32.c
+@@ -476,29 +476,3 @@ int memory_add_physaddr_to_nid(u64 addr)
+ 
+ EXPORT_SYMBOL_GPL(memory_add_physaddr_to_nid);
+ #endif
+-
+-#ifndef CONFIG_HAVE_ARCH_PARSE_SRAT
+-/*
+- * XXX FIXME: Make SLIT table parsing available to 32-bit NUMA
+- *
+- * These stub functions are needed to compile 32-bit NUMA when SRAT is
+- * not set. There are functions in srat_64.c for parsing this table
+- * and it may be possible to make them common functions.
+- */
+-void acpi_numa_slit_init (struct acpi_table_slit *slit)
+-{
+-	printk(KERN_INFO "ACPI: No support for parsing SLIT table\n");
+-}
+-
+-void acpi_numa_processor_affinity_init (struct acpi_srat_cpu_affinity *pa)
+-{
+-}
+-
+-void acpi_numa_memory_affinity_init (struct acpi_srat_mem_affinity *ma)
+-{
+-}
+-
+-void acpi_numa_arch_fixup(void)
+-{
+-}
+-#endif /* CONFIG_HAVE_ARCH_PARSE_SRAT */
+diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c
+index 9ee007b..369cf06 100644
+--- a/arch/x86/mm/pgtable_32.c
++++ b/arch/x86/mm/pgtable_32.c
+@@ -172,10 +172,3 @@ void reserve_top_address(unsigned long reserve)
+ 	__FIXADDR_TOP = -reserve - PAGE_SIZE;
+ 	__VMALLOC_RESERVE += reserve;
+ }
+-
+-int pmd_bad(pmd_t pmd)
+-{
+-	WARN_ON_ONCE(pmd_bad_v1(pmd) != pmd_bad_v2(pmd));
+-
+-	return pmd_bad_v1(pmd);
+-}
+diff --git a/arch/x86/pci/Makefile_32 b/arch/x86/pci/Makefile_32
+index 7fa5198..89ec35d 100644
+--- a/arch/x86/pci/Makefile_32
++++ b/arch/x86/pci/Makefile_32
+@@ -6,11 +6,19 @@ obj-$(CONFIG_PCI_DIRECT)	+= direct.o
+ obj-$(CONFIG_PCI_OLPC)		+= olpc.o
+ 
+ pci-y				:= fixup.o
++
++# Do not change the ordering here. There is a nasty init function
++# ordering dependency which breaks when you move acpi.o below
++# legacy/irq.o
+ pci-$(CONFIG_ACPI)		+= acpi.o
+ pci-y				+= legacy.o irq.o
+ 
+-pci-$(CONFIG_X86_VISWS)		+= visws.o fixup.o
+-pci-$(CONFIG_X86_NUMAQ)		+= numa.o irq.o
++# Careful: VISWS and NUMAQ overrule the pci-y above. The colons are
++# therefor correct. This needs a proper fix by distangling the code.
++pci-$(CONFIG_X86_VISWS)		:= visws.o fixup.o
++pci-$(CONFIG_X86_NUMAQ)		:= numa.o irq.o
++
++# Necessary for NUMAQ as well
+ pci-$(CONFIG_NUMA)		+= mp_bus_to_node.o
+ 
+ obj-y				+= $(pci-y) common.o early.o
+diff --git a/arch/x86/pci/acpi.c b/arch/x86/pci/acpi.c
+index 1a9c0c6..d95de2f 100644
+--- a/arch/x86/pci/acpi.c
++++ b/arch/x86/pci/acpi.c
+@@ -6,45 +6,6 @@
+ #include <asm/numa.h>
+ #include "pci.h"
+ 
+-static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
+-{
+-	pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
+-	printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
+-	return 0;
+-}
+-
+-static struct dmi_system_id acpi_pciprobe_dmi_table[] __devinitdata = {
+-/*
+- * Systems where PCI IO resource ISA alignment can be skipped
+- * when the ISA enable bit in the bridge control is not set
+- */
+-	{
+-		.callback = can_skip_ioresource_align,
+-		.ident = "IBM System x3800",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
+-		},
+-	},
+-	{
+-		.callback = can_skip_ioresource_align,
+-		.ident = "IBM System x3850",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "x3850"),
+-		},
+-	},
+-	{
+-		.callback = can_skip_ioresource_align,
+-		.ident = "IBM System x3950",
+-		.matches = {
+-			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
+-			DMI_MATCH(DMI_PRODUCT_NAME, "x3950"),
+-		},
+-	},
+-	{}
+-};
+-
+ struct pci_root_info {
+ 	char *name;
+ 	unsigned int res_num;
+@@ -196,8 +157,6 @@ struct pci_bus * __devinit pci_acpi_scan_root(struct acpi_device *device, int do
+ 	int pxm;
+ #endif
+ 
+-	dmi_check_system(acpi_pciprobe_dmi_table);
+-
+ 	if (domain && !pci_domains_supported) {
+ 		printk(KERN_WARNING "PCI: Multiple domains not supported "
+ 		       "(dom %d, bus %d)\n", domain, busnum);
+diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c
+index 2a4d751..8545c8a 100644
+--- a/arch/x86/pci/common.c
++++ b/arch/x86/pci/common.c
+@@ -77,17 +77,48 @@ int pcibios_scanned;
+  */
+ DEFINE_SPINLOCK(pci_config_lock);
+ 
+-static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
++static int __devinit can_skip_ioresource_align(const struct dmi_system_id *d)
+ {
+-	struct resource *rom_r = &dev->resource[PCI_ROM_RESOURCE];
+-
+-	if (rom_r->parent)
+-		return;
+-	if (rom_r->start)
+-		/* we deal with BIOS assigned ROM later */
+-		return;
+-	if (!(pci_probe & PCI_ASSIGN_ROMS))
+-		rom_r->start = rom_r->end = rom_r->flags = 0;
++	pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
++	printk(KERN_INFO "PCI: %s detected, can skip ISA alignment\n", d->ident);
++	return 0;
++}
++
++static struct dmi_system_id can_skip_pciprobe_dmi_table[] __devinitdata = {
++/*
++ * Systems where PCI IO resource ISA alignment can be skipped
++ * when the ISA enable bit in the bridge control is not set
++ */
++	{
++		.callback = can_skip_ioresource_align,
++		.ident = "IBM System x3800",
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "x3800"),
++		},
++	},
++	{
++		.callback = can_skip_ioresource_align,
++		.ident = "IBM System x3850",
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "x3850"),
++		},
++	},
++	{
++		.callback = can_skip_ioresource_align,
++		.ident = "IBM System x3950",
++		.matches = {
++			DMI_MATCH(DMI_SYS_VENDOR, "IBM"),
++			DMI_MATCH(DMI_PRODUCT_NAME, "x3950"),
++		},
++	},
++	{}
++};
++
++void __init dmi_check_skip_isa_align(void)
++{
++	dmi_check_system(can_skip_pciprobe_dmi_table);
+ }
+ 
+ /*
+@@ -97,11 +128,7 @@ static void __devinit pcibios_fixup_device_resources(struct pci_dev *dev)
+ 
+ void __devinit  pcibios_fixup_bus(struct pci_bus *b)
+ {
+-	struct pci_dev *dev;
+-
+ 	pci_read_bridge_bases(b);
+-	list_for_each_entry(dev, &b->devices, bus_list)
+-		pcibios_fixup_device_resources(dev);
+ }
+ 
+ /*
+@@ -318,13 +345,16 @@ static struct dmi_system_id __devinitdata pciprobe_dmi_table[] = {
+ 	{}
+ };
+ 
++void __init dmi_check_pciprobe(void)
++{
++	dmi_check_system(pciprobe_dmi_table);
++}
++
+ struct pci_bus * __devinit pcibios_scan_root(int busnum)
+ {
+ 	struct pci_bus *bus = NULL;
+ 	struct pci_sysdata *sd;
+ 
+-	dmi_check_system(pciprobe_dmi_table);
+-
+ 	while ((bus = pci_find_next_bus(bus)) != NULL) {
+ 		if (bus->number == busnum) {
+ 			/* Already scanned */
+@@ -462,6 +492,9 @@ char * __devinit  pcibios_setup(char *str)
+ 	} else if (!strcmp(str, "routeirq")) {
+ 		pci_routeirq = 1;
+ 		return NULL;
++	} else if (!strcmp(str, "skip_isa_align")) {
++		pci_probe |= PCI_CAN_SKIP_ISA_ALIGN;
++		return NULL;
+ 	}
+ 	return str;
+ }
+@@ -489,7 +522,7 @@ void pcibios_disable_device (struct pci_dev *dev)
+ 		pcibios_disable_irq(dev);
+ }
+ 
+-struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
++struct pci_bus * __devinit pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
+ {
+ 	struct pci_bus *bus = NULL;
+ 	struct pci_sysdata *sd;
+@@ -512,7 +545,7 @@ struct pci_bus *pci_scan_bus_on_node(int busno, struct pci_ops *ops, int node)
+ 	return bus;
+ }
+ 
+-struct pci_bus *pci_scan_bus_with_sysdata(int busno)
++struct pci_bus * __devinit pci_scan_bus_with_sysdata(int busno)
+ {
+ 	return pci_scan_bus_on_node(busno, &pci_root_ops, -1);
+ }
+diff --git a/arch/x86/pci/fixup.c b/arch/x86/pci/fixup.c
+index b60b2ab..ff3a6a3 100644
+--- a/arch/x86/pci/fixup.c
++++ b/arch/x86/pci/fixup.c
+@@ -502,7 +502,7 @@ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_SIEMENS, 0x0015,
+  */
+ static void fam10h_pci_cfg_space_size(struct pci_dev *dev)
+ {
+-	dev->cfg_size = pci_cfg_space_size_ext(dev, 0);
++	dev->cfg_size = pci_cfg_space_size_ext(dev);
+ }
+ 
+ DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_AMD, 0x1200, fam10h_pci_cfg_space_size);
+diff --git a/arch/x86/pci/init.c b/arch/x86/pci/init.c
+index dd30c60..e70b9c5 100644
+--- a/arch/x86/pci/init.c
++++ b/arch/x86/pci/init.c
+@@ -33,6 +33,10 @@ static __init int pci_access_init(void)
+ 		printk(KERN_ERR
+ 		"PCI: Fatal: No config space access function found\n");
+ 
++	dmi_check_pciprobe();
++
++	dmi_check_skip_isa_align();
++
+ 	return 0;
+ }
+ arch_initcall(pci_access_init);
+diff --git a/arch/x86/pci/pci.h b/arch/x86/pci/pci.h
+index c58805a..f3972b1 100644
+--- a/arch/x86/pci/pci.h
++++ b/arch/x86/pci/pci.h
+@@ -38,6 +38,9 @@ enum pci_bf_sort_state {
+ 	pci_dmi_bf,
+ };
+ 
++extern void __init dmi_check_pciprobe(void);
++extern void __init dmi_check_skip_isa_align(void);
++
+ /* pci-i386.c */
+ 
+ extern unsigned int pcibios_max_latency;
+diff --git a/arch/x86/vdso/vdso32-setup.c b/arch/x86/vdso/vdso32-setup.c
+index 4dceeb1..cf058fe 100644
+--- a/arch/x86/vdso/vdso32-setup.c
++++ b/arch/x86/vdso/vdso32-setup.c
+@@ -162,7 +162,7 @@ static __init void relocate_vdso(Elf32_Ehdr *ehdr)
+ 	Elf32_Shdr *shdr;
+ 	int i;
+ 
+-	BUG_ON(memcmp(ehdr->e_ident, ELFMAG, 4) != 0 ||
++	BUG_ON(memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0 ||
+ 	       !elf_check_arch_ia32(ehdr) ||
+ 	       ehdr->e_type != ET_DYN);
+ 
+diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c
+index 4db42bf..6952768 100644
+--- a/arch/x86/video/fbdev.c
++++ b/arch/x86/video/fbdev.c
+@@ -1,5 +1,4 @@
+ /*
+- *
+  * Copyright (C) 2007 Antonino Daplas <adaplas at gmail.com>
+  *
+  * This file is subject to the terms and conditions of the GNU General Public
+@@ -29,3 +28,4 @@ int fb_is_primary_device(struct fb_info *info)
+ 	return retval;
+ }
+ EXPORT_SYMBOL(fb_is_primary_device);
++MODULE_LICENSE("GPL");
+diff --git a/block/blk-core.c b/block/blk-core.c
+index b754a4a..2987fe4 100644
+--- a/block/blk-core.c
++++ b/block/blk-core.c
+@@ -54,15 +54,16 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_done);
+ 
+ static void drive_stat_acct(struct request *rq, int new_io)
+ {
++	struct hd_struct *part;
+ 	int rw = rq_data_dir(rq);
+ 
+ 	if (!blk_fs_request(rq) || !rq->rq_disk)
+ 		return;
+ 
+-	if (!new_io) {
+-		__all_stat_inc(rq->rq_disk, merges[rw], rq->sector);
+-	} else {
+-		struct hd_struct *part = get_part(rq->rq_disk, rq->sector);
++	part = get_part(rq->rq_disk, rq->sector);
++	if (!new_io)
++		__all_stat_inc(rq->rq_disk, part, merges[rw], rq->sector);
++	else {
+ 		disk_round_stats(rq->rq_disk);
+ 		rq->rq_disk->in_flight++;
+ 		if (part) {
+@@ -253,9 +254,11 @@ EXPORT_SYMBOL(__generic_unplug_device);
+  **/
+ void generic_unplug_device(struct request_queue *q)
+ {
+-	spin_lock_irq(q->queue_lock);
+-	__generic_unplug_device(q);
+-	spin_unlock_irq(q->queue_lock);
++	if (blk_queue_plugged(q)) {
++		spin_lock_irq(q->queue_lock);
++		__generic_unplug_device(q);
++		spin_unlock_irq(q->queue_lock);
++	}
+ }
+ EXPORT_SYMBOL(generic_unplug_device);
+ 
+@@ -1536,10 +1539,11 @@ static int __end_that_request_first(struct request *req, int error,
+ 	}
+ 
+ 	if (blk_fs_request(req) && req->rq_disk) {
++		struct hd_struct *part = get_part(req->rq_disk, req->sector);
+ 		const int rw = rq_data_dir(req);
+ 
+-		all_stat_add(req->rq_disk, sectors[rw],
+-			     nr_bytes >> 9, req->sector);
++		all_stat_add(req->rq_disk, part, sectors[rw],
++				nr_bytes >> 9, req->sector);
+ 	}
+ 
+ 	total_bytes = bio_nbytes = 0;
+@@ -1725,8 +1729,8 @@ static void end_that_request_last(struct request *req, int error)
+ 		const int rw = rq_data_dir(req);
+ 		struct hd_struct *part = get_part(disk, req->sector);
+ 
+-		__all_stat_inc(disk, ios[rw], req->sector);
+-		__all_stat_add(disk, ticks[rw], duration, req->sector);
++		__all_stat_inc(disk, part, ios[rw], req->sector);
++		__all_stat_add(disk, part, ticks[rw], duration, req->sector);
+ 		disk_round_stats(disk);
+ 		disk->in_flight--;
+ 		if (part) {
+diff --git a/block/blk-ioc.c b/block/blk-ioc.c
+index e34df7c..012f065 100644
+--- a/block/blk-ioc.c
++++ b/block/blk-ioc.c
+@@ -41,8 +41,8 @@ int put_io_context(struct io_context *ioc)
+ 		rcu_read_lock();
+ 		if (ioc->aic && ioc->aic->dtor)
+ 			ioc->aic->dtor(ioc->aic);
+-		rcu_read_unlock();
+ 		cfq_dtor(ioc);
++		rcu_read_unlock();
+ 
+ 		kmem_cache_free(iocontext_cachep, ioc);
+ 		return 1;
+diff --git a/block/blk-merge.c b/block/blk-merge.c
+index 73b2356..651136a 100644
+--- a/block/blk-merge.c
++++ b/block/blk-merge.c
+@@ -149,9 +149,9 @@ static int blk_phys_contig_segment(struct request_queue *q, struct bio *bio,
+ static int blk_hw_contig_segment(struct request_queue *q, struct bio *bio,
+ 				 struct bio *nxt)
+ {
+-	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
++	if (!bio_flagged(bio, BIO_SEG_VALID))
+ 		blk_recount_segments(q, bio);
+-	if (unlikely(!bio_flagged(nxt, BIO_SEG_VALID)))
++	if (!bio_flagged(nxt, BIO_SEG_VALID))
+ 		blk_recount_segments(q, nxt);
+ 	if (!BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(nxt)) ||
+ 	    BIOVEC_VIRT_OVERSIZE(bio->bi_hw_back_size + nxt->bi_hw_front_size))
+@@ -312,9 +312,9 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req,
+ 			q->last_merge = NULL;
+ 		return 0;
+ 	}
+-	if (unlikely(!bio_flagged(req->biotail, BIO_SEG_VALID)))
++	if (!bio_flagged(req->biotail, BIO_SEG_VALID))
+ 		blk_recount_segments(q, req->biotail);
+-	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
++	if (!bio_flagged(bio, BIO_SEG_VALID))
+ 		blk_recount_segments(q, bio);
+ 	len = req->biotail->bi_hw_back_size + bio->bi_hw_front_size;
+ 	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(req->biotail), __BVEC_START(bio))
+@@ -352,9 +352,9 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req,
+ 		return 0;
+ 	}
+ 	len = bio->bi_hw_back_size + req->bio->bi_hw_front_size;
+-	if (unlikely(!bio_flagged(bio, BIO_SEG_VALID)))
++	if (!bio_flagged(bio, BIO_SEG_VALID))
+ 		blk_recount_segments(q, bio);
+-	if (unlikely(!bio_flagged(req->bio, BIO_SEG_VALID)))
++	if (!bio_flagged(req->bio, BIO_SEG_VALID))
+ 		blk_recount_segments(q, req->bio);
+ 	if (BIOVEC_VIRT_MERGEABLE(__BVEC_END(bio), __BVEC_START(req->bio)) &&
+ 	    !BIOVEC_VIRT_OVERSIZE(len)) {
+diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
+index e85c401..304ec73 100644
+--- a/block/blk-sysfs.c
++++ b/block/blk-sysfs.c
+@@ -146,11 +146,13 @@ static ssize_t queue_nomerges_store(struct request_queue *q, const char *page,
+ 	unsigned long nm;
+ 	ssize_t ret = queue_var_store(&nm, page, count);
+ 
++	spin_lock_irq(q->queue_lock);
+ 	if (nm)
+-	       set_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
++		queue_flag_set(QUEUE_FLAG_NOMERGES, q);
+ 	else
+-	       clear_bit(QUEUE_FLAG_NOMERGES, &q->queue_flags);
++		queue_flag_clear(QUEUE_FLAG_NOMERGES, q);
+ 
++	spin_unlock_irq(q->queue_lock);
+ 	return ret;
+ }
+ 
+diff --git a/block/blk-tag.c b/block/blk-tag.c
+index de64e04..32667be 100644
+--- a/block/blk-tag.c
++++ b/block/blk-tag.c
+@@ -70,7 +70,7 @@ void __blk_queue_free_tags(struct request_queue *q)
+ 	__blk_free_tags(bqt);
+ 
+ 	q->queue_tags = NULL;
+-	queue_flag_clear(QUEUE_FLAG_QUEUED, q);
++	queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
+ }
+ 
+ /**
+@@ -98,7 +98,7 @@ EXPORT_SYMBOL(blk_free_tags);
+  **/
+ void blk_queue_free_tags(struct request_queue *q)
+ {
+-	queue_flag_clear(QUEUE_FLAG_QUEUED, q);
++	queue_flag_clear_unlocked(QUEUE_FLAG_QUEUED, q);
+ }
+ EXPORT_SYMBOL(blk_queue_free_tags);
+ 
+@@ -171,6 +171,9 @@ EXPORT_SYMBOL(blk_init_tags);
+  * @q:  the request queue for the device
+  * @depth:  the maximum queue depth supported
+  * @tags: the tag to use
++ *
++ * Queue lock must be held here if the function is called to resize an
++ * existing map.
+  **/
+ int blk_queue_init_tags(struct request_queue *q, int depth,
+ 			struct blk_queue_tag *tags)
+@@ -197,7 +200,7 @@ int blk_queue_init_tags(struct request_queue *q, int depth,
+ 	 * assign it, all done
+ 	 */
+ 	q->queue_tags = tags;
+-	queue_flag_set(QUEUE_FLAG_QUEUED, q);
++	queue_flag_set_unlocked(QUEUE_FLAG_QUEUED, q);
+ 	INIT_LIST_HEAD(&q->tag_busy_list);
+ 	return 0;
+ fail:
+diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c
+index f4e1006..b399c62 100644
+--- a/block/cfq-iosched.c
++++ b/block/cfq-iosched.c
+@@ -1142,6 +1142,17 @@ static void cfq_put_queue(struct cfq_queue *cfqq)
+ 	kmem_cache_free(cfq_pool, cfqq);
+ }
+ 
++static void
++__call_for_each_cic(struct io_context *ioc,
++		    void (*func)(struct io_context *, struct cfq_io_context *))
++{
++	struct cfq_io_context *cic;
++	struct hlist_node *n;
++
++	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
++		func(ioc, cic);
++}
++
+ /*
+  * Call func for each cic attached to this ioc.
+  */
+@@ -1149,12 +1160,8 @@ static void
+ call_for_each_cic(struct io_context *ioc,
+ 		  void (*func)(struct io_context *, struct cfq_io_context *))
+ {
+-	struct cfq_io_context *cic;
+-	struct hlist_node *n;
+-
+ 	rcu_read_lock();
+-	hlist_for_each_entry_rcu(cic, n, &ioc->cic_list, cic_list)
+-		func(ioc, cic);
++	__call_for_each_cic(ioc, func);
+ 	rcu_read_unlock();
+ }
+ 
+@@ -1198,7 +1205,7 @@ static void cfq_free_io_context(struct io_context *ioc)
+ 	 * should be ok to iterate over the known list, we will see all cic's
+ 	 * since no new ones are added.
+ 	 */
+-	call_for_each_cic(ioc, cic_free_func);
++	__call_for_each_cic(ioc, cic_free_func);
+ }
+ 
+ static void cfq_exit_cfqq(struct cfq_data *cfqd, struct cfq_queue *cfqq)
+@@ -1296,10 +1303,10 @@ static void cfq_init_prio_data(struct cfq_queue *cfqq, struct io_context *ioc)
+ 		printk(KERN_ERR "cfq: bad prio %x\n", ioprio_class);
+ 	case IOPRIO_CLASS_NONE:
+ 		/*
+-		 * no prio set, place us in the middle of the BE classes
++		 * no prio set, inherit CPU scheduling settings
+ 		 */
+ 		cfqq->ioprio = task_nice_ioprio(tsk);
+-		cfqq->ioprio_class = IOPRIO_CLASS_BE;
++		cfqq->ioprio_class = task_nice_ioclass(tsk);
+ 		break;
+ 	case IOPRIO_CLASS_RT:
+ 		cfqq->ioprio = task_ioprio(ioc);
+diff --git a/drivers/accessibility/Kconfig b/drivers/accessibility/Kconfig
+index 1264c4b..ef3b65b 100644
+--- a/drivers/accessibility/Kconfig
++++ b/drivers/accessibility/Kconfig
+@@ -1,7 +1,17 @@
+ menuconfig ACCESSIBILITY
+ 	bool "Accessibility support"
+ 	---help---
+-	  Enable a submenu where accessibility items may be enabled.
++	  Accessibility handles all special kinds of hardware devices or
++	  software adapters which help people with disabilities (e.g.
++	  blindness) to use computers.
++
++	  That includes braille devices, speech synthesis, keyboard
++	  remapping, etc.
++
++	  Say Y here to get to see options for accessibility.
++	  This option alone does not add any kernel code.
++
++	  If you say N, all options in this submenu will be skipped and disabled.
+ 
+ 	  If unsure, say N.
+ 
+diff --git a/drivers/ata/Kconfig b/drivers/ata/Kconfig
+index 1c11df9..9bf2986 100644
+--- a/drivers/ata/Kconfig
++++ b/drivers/ata/Kconfig
+@@ -205,8 +205,8 @@ config SATA_VITESSE
+ 	  If unsure, say N.
+ 
+ config SATA_INIC162X
+-	tristate "Initio 162x SATA support (HIGHLY EXPERIMENTAL)"
+-	depends on PCI && EXPERIMENTAL
++	tristate "Initio 162x SATA support"
++	depends on PCI
+ 	help
+ 	  This option enables support for Initio 162x Serial ATA.
+ 
+@@ -697,6 +697,15 @@ config PATA_SCC
+ 
+ 	  If unsure, say N.
+ 
++config PATA_SCH
++	tristate "Intel SCH PATA support"
++	depends on PCI
++	help
++	  This option enables support for Intel SCH PATA on the Intel
++	  SCH (US15W, US15L, UL11L) series host controllers.
++
++	  If unsure, say N.
++
+ config PATA_BF54X
+ 	tristate "Blackfin 54x ATAPI support"
+ 	depends on BF542 || BF548 || BF549
+diff --git a/drivers/ata/Makefile b/drivers/ata/Makefile
+index b693d82..674965f 100644
+--- a/drivers/ata/Makefile
++++ b/drivers/ata/Makefile
+@@ -67,6 +67,7 @@ obj-$(CONFIG_PATA_SIS)		+= pata_sis.o
+ obj-$(CONFIG_PATA_TRIFLEX)	+= pata_triflex.o
+ obj-$(CONFIG_PATA_IXP4XX_CF)	+= pata_ixp4xx_cf.o
+ obj-$(CONFIG_PATA_SCC)		+= pata_scc.o
++obj-$(CONFIG_PATA_SCH)		+= pata_sch.o
+ obj-$(CONFIG_PATA_BF54X)	+= pata_bf54x.o
+ obj-$(CONFIG_PATA_PLATFORM)	+= pata_platform.o
+ obj-$(CONFIG_PATA_OF_PLATFORM)	+= pata_of_platform.o
+diff --git a/drivers/ata/ahci.c b/drivers/ata/ahci.c
+index 8cace9a..97f83fb 100644
+--- a/drivers/ata/ahci.c
++++ b/drivers/ata/ahci.c
+@@ -1267,9 +1267,7 @@ static int ahci_check_ready(struct ata_link *link)
+ 	void __iomem *port_mmio = ahci_port_base(link->ap);
+ 	u8 status = readl(port_mmio + PORT_TFDATA) & 0xFF;
+ 
+-	if (!(status & ATA_BUSY))
+-		return 1;
+-	return 0;
++	return ata_check_ready(status);
+ }
+ 
+ static int ahci_softreset(struct ata_link *link, unsigned int *class,
+diff --git a/drivers/ata/ata_generic.c b/drivers/ata/ata_generic.c
+index 47aeccd..75a406f 100644
+--- a/drivers/ata/ata_generic.c
++++ b/drivers/ata/ata_generic.c
+@@ -152,6 +152,12 @@ static int ata_generic_init_one(struct pci_dev *dev, const struct pci_device_id
+ 	if (dev->vendor == PCI_VENDOR_ID_AL)
+ 		ata_pci_bmdma_clear_simplex(dev);
+ 
++	if (dev->vendor == PCI_VENDOR_ID_ATI) {
++		int rc = pcim_enable_device(dev);
++		if (rc < 0)
++			return rc;
++		pcim_pin_device(dev);
++	}
+ 	return ata_pci_sff_init_one(dev, ppi, &generic_sht, NULL);
+ }
+ 
+diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
+index ea2c764..a9027b8 100644
+--- a/drivers/ata/ata_piix.c
++++ b/drivers/ata/ata_piix.c
+@@ -1348,6 +1348,8 @@ static void __devinit piix_init_sidpr(struct ata_host *host)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(host->dev);
+ 	struct piix_host_priv *hpriv = host->private_data;
++	struct ata_device *dev0 = &host->ports[0]->link.device[0];
++	u32 scontrol;
+ 	int i;
+ 
+ 	/* check for availability */
+@@ -1366,6 +1368,29 @@ static void __devinit piix_init_sidpr(struct ata_host *host)
+ 		return;
+ 
+ 	hpriv->sidpr = pcim_iomap_table(pdev)[PIIX_SIDPR_BAR];
++
++	/* SCR access via SIDPR doesn't work on some configurations.
++	 * Give it a test drive by inhibiting power save modes which
++	 * we'll do anyway.
++	 */
++	scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
++
++	/* if IPM is already 3, SCR access is probably working.  Don't
++	 * un-inhibit power save modes as BIOS might have inhibited
++	 * them for a reason.
++	 */
++	if ((scontrol & 0xf00) != 0x300) {
++		scontrol |= 0x300;
++		piix_sidpr_write(dev0, SCR_CONTROL, scontrol);
++		scontrol = piix_sidpr_read(dev0, SCR_CONTROL);
++
++		if ((scontrol & 0xf00) != 0x300) {
++			dev_printk(KERN_INFO, host->dev, "SCR access via "
++				   "SIDPR is available but doesn't work\n");
++			return;
++		}
++	}
++
+ 	host->ports[0]->ops = &piix_sidpr_sata_ops;
+ 	host->ports[1]->ops = &piix_sidpr_sata_ops;
+ }
+diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
+index 3bc4885..927b692 100644
+--- a/drivers/ata/libata-core.c
++++ b/drivers/ata/libata-core.c
+@@ -6292,6 +6292,7 @@ EXPORT_SYMBOL_GPL(ata_eh_freeze_port);
+ EXPORT_SYMBOL_GPL(ata_eh_thaw_port);
+ EXPORT_SYMBOL_GPL(ata_eh_qc_complete);
+ EXPORT_SYMBOL_GPL(ata_eh_qc_retry);
++EXPORT_SYMBOL_GPL(ata_eh_analyze_ncq_error);
+ EXPORT_SYMBOL_GPL(ata_do_eh);
+ EXPORT_SYMBOL_GPL(ata_std_error_handler);
+ 
+diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
+index 61dcd00..62e0331 100644
+--- a/drivers/ata/libata-eh.c
++++ b/drivers/ata/libata-eh.c
+@@ -1357,7 +1357,7 @@ static void ata_eh_analyze_serror(struct ata_link *link)
+  *	LOCKING:
+  *	Kernel thread context (may sleep).
+  */
+-static void ata_eh_analyze_ncq_error(struct ata_link *link)
++void ata_eh_analyze_ncq_error(struct ata_link *link)
+ {
+ 	struct ata_port *ap = link->ap;
+ 	struct ata_eh_context *ehc = &link->eh_context;
+diff --git a/drivers/ata/libata-sff.c b/drivers/ata/libata-sff.c
+index 2ec65a8..3c2d228 100644
+--- a/drivers/ata/libata-sff.c
++++ b/drivers/ata/libata-sff.c
+@@ -314,11 +314,7 @@ static int ata_sff_check_ready(struct ata_link *link)
+ {
+ 	u8 status = link->ap->ops->sff_check_status(link->ap);
+ 
+-	if (!(status & ATA_BUSY))
+-		return 1;
+-	if (status == 0xff)
+-		return -ENODEV;
+-	return 0;
++	return ata_check_ready(status);
+ }
+ 
+ /**
+diff --git a/drivers/ata/pata_acpi.c b/drivers/ata/pata_acpi.c
+index c5f91e6..fbe6057 100644
+--- a/drivers/ata/pata_acpi.c
++++ b/drivers/ata/pata_acpi.c
+@@ -259,6 +259,12 @@ static int pacpi_init_one (struct pci_dev *pdev, const struct pci_device_id *id)
+ 		.port_ops	= &pacpi_ops,
+ 	};
+ 	const struct ata_port_info *ppi[] = { &info, NULL };
++	if (pdev->vendor == PCI_VENDOR_ID_ATI) {
++		int rc = pcim_enable_device(pdev);
++		if (rc < 0)
++			return rc;
++		pcim_pin_device(pdev);
++	}
+ 	return ata_pci_sff_init_one(pdev, ppi, &pacpi_sht, NULL);
+ }
+ 
+diff --git a/drivers/ata/pata_sch.c b/drivers/ata/pata_sch.c
+new file mode 100644
+index 0000000..c8cc027
+--- /dev/null
++++ b/drivers/ata/pata_sch.c
+@@ -0,0 +1,206 @@
++/*
++ *  pata_sch.c - Intel SCH PATA controllers
++ *
++ *  Copyright (c) 2008 Alek Du <alek.du at intel.com>
++ *
++ *  This program is free software; you can redistribute it and/or modify
++ *  it under the terms of the GNU General Public License 2 as published
++ *  by the Free Software Foundation.
++ *
++ *  This program is distributed in the hope that it will be useful,
++ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
++ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ *  GNU General Public License for more details.
++ *
++ *  You should have received a copy of the GNU General Public License
++ *  along with this program; see the file COPYING.  If not, write to
++ *  the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
++ *
++ */
++
++/*
++ *  Supports:
++ *    Intel SCH (AF82US15W, AF82US15L, AF82UL11L) chipsets -- see spec at:
++ *    http://download.intel.com/design/chipsets/embedded/datashts/319537.pdf
++ */
++
++#include <linux/kernel.h>
++#include <linux/module.h>
++#include <linux/pci.h>
++#include <linux/init.h>
++#include <linux/blkdev.h>
++#include <linux/delay.h>
++#include <linux/device.h>
++#include <scsi/scsi_host.h>
++#include <linux/libata.h>
++#include <linux/dmi.h>
++
++#define DRV_NAME	"pata_sch"
++#define DRV_VERSION	"0.2"
++
++/* see SCH datasheet page 351 */
++enum {
++	D0TIM	= 0x80,		/* Device 0 Timing Register */
++	D1TIM	= 0x84,		/* Device 1 Timing Register */
++	PM	= 0x07,		/* PIO Mode Bit Mask */
++	MDM	= (0x03 << 8),	/* Multi-word DMA Mode Bit Mask */
++	UDM	= (0x07 << 16), /* Ultra DMA Mode Bit Mask */
++	PPE	= (1 << 30),	/* Prefetch/Post Enable */
++	USD	= (1 << 31),	/* Use Synchronous DMA */
++};
++
++static int sch_init_one(struct pci_dev *pdev,
++			 const struct pci_device_id *ent);
++static void sch_set_piomode(struct ata_port *ap, struct ata_device *adev);
++static void sch_set_dmamode(struct ata_port *ap, struct ata_device *adev);
++
++static const struct pci_device_id sch_pci_tbl[] = {
++	/* Intel SCH PATA Controller */
++	{ PCI_VDEVICE(INTEL, PCI_DEVICE_ID_INTEL_SCH_IDE), 0 },
++	{ }	/* terminate list */
++};
++
++static struct pci_driver sch_pci_driver = {
++	.name			= DRV_NAME,
++	.id_table		= sch_pci_tbl,
++	.probe			= sch_init_one,
++	.remove			= ata_pci_remove_one,
++#ifdef CONFIG_PM
++	.suspend		= ata_pci_device_suspend,
++	.resume			= ata_pci_device_resume,
++#endif
++};
++
++static struct scsi_host_template sch_sht = {
++	ATA_BMDMA_SHT(DRV_NAME),
++};
++
++static struct ata_port_operations sch_pata_ops = {
++	.inherits		= &ata_bmdma_port_ops,
++	.cable_detect		= ata_cable_unknown,
++	.set_piomode		= sch_set_piomode,
++	.set_dmamode		= sch_set_dmamode,
++};
++
++static struct ata_port_info sch_port_info = {
++	.flags		= 0,
++	.pio_mask	= ATA_PIO4,   /* pio0-4 */
++	.mwdma_mask	= ATA_MWDMA2, /* mwdma0-2 */
++	.udma_mask	= ATA_UDMA5,  /* udma0-5 */
++	.port_ops	= &sch_pata_ops,
++};
++
++MODULE_AUTHOR("Alek Du <alek.du at intel.com>");
++MODULE_DESCRIPTION("SCSI low-level driver for Intel SCH PATA controllers");
++MODULE_LICENSE("GPL");
++MODULE_DEVICE_TABLE(pci, sch_pci_tbl);
++MODULE_VERSION(DRV_VERSION);
++
++/**
++ *	sch_set_piomode - Initialize host controller PATA PIO timings
++ *	@ap: Port whose timings we are configuring
++ *	@adev: ATA device
++ *
++ *	Set PIO mode for device, in host controller PCI config space.
++ *
++ *	LOCKING:
++ *	None (inherited from caller).
++ */
++
++static void sch_set_piomode(struct ata_port *ap, struct ata_device *adev)
++{
++	unsigned int pio	= adev->pio_mode - XFER_PIO_0;
++	struct pci_dev *dev	= to_pci_dev(ap->host->dev);
++	unsigned int port	= adev->devno ? D1TIM : D0TIM;
++	unsigned int data;
++
++	pci_read_config_dword(dev, port, &data);
++	/* see SCH datasheet page 351 */
++	/* set PIO mode */
++	data &= ~(PM | PPE);
++	data |= pio;
++	/* enable PPE for block device */
++	if (adev->class == ATA_DEV_ATA)
++		data |= PPE;
++	pci_write_config_dword(dev, port, data);
++}
++
++/**
++ *	sch_set_dmamode - Initialize host controller PATA DMA timings
++ *	@ap: Port whose timings we are configuring
++ *	@adev: ATA device
++ *
++ *	Set MW/UDMA mode for device, in host controller PCI config space.
++ *
++ *	LOCKING:
++ *	None (inherited from caller).
++ */
++
++static void sch_set_dmamode(struct ata_port *ap, struct ata_device *adev)
++{
++	unsigned int dma_mode	= adev->dma_mode;
++	struct pci_dev *dev	= to_pci_dev(ap->host->dev);
++	unsigned int port	= adev->devno ? D1TIM : D0TIM;
++	unsigned int data;
++
++	pci_read_config_dword(dev, port, &data);
++	/* see SCH datasheet page 351 */
++	if (dma_mode >= XFER_UDMA_0) {
++		/* enable Synchronous DMA mode */
++		data |= USD;
++		data &= ~UDM;
++		data |= (dma_mode - XFER_UDMA_0) << 16;
++	} else { /* must be MWDMA mode, since we masked SWDMA already */
++		data &= ~(USD | MDM);
++		data |= (dma_mode - XFER_MW_DMA_0) << 8;
++	}
++	pci_write_config_dword(dev, port, data);
++}
++
++/**
++ *	sch_init_one - Register SCH ATA PCI device with kernel services
++ *	@pdev: PCI device to register
++ *	@ent: Entry in sch_pci_tbl matching with @pdev
++ *
++ *	LOCKING:
++ *	Inherited from PCI layer (may sleep).
++ *
++ *	RETURNS:
++ *	Zero on success, or -ERRNO value.
++ */
++
++static int __devinit sch_init_one(struct pci_dev *pdev,
++				   const struct pci_device_id *ent)
++{
++	static int printed_version;
++	const struct ata_port_info *ppi[] = { &sch_port_info, NULL };
++	struct ata_host *host;
++	int rc;
++
++	if (!printed_version++)
++		dev_printk(KERN_DEBUG, &pdev->dev,
++			   "version " DRV_VERSION "\n");
++
++	/* enable device and prepare host */
++	rc = pcim_enable_device(pdev);
++	if (rc)
++		return rc;
++	rc = ata_pci_sff_prepare_host(pdev, ppi, &host);
++	if (rc)
++		return rc;
++	pci_set_master(pdev);
++	return ata_pci_sff_activate_host(host, ata_sff_interrupt, &sch_sht);
++}
++
++static int __init sch_init(void)
++{
++	return pci_register_driver(&sch_pci_driver);
++}
++
++static void __exit sch_exit(void)
++{
++	pci_unregister_driver(&sch_pci_driver);
++}
++
++module_init(sch_init);
++module_exit(sch_exit);
+diff --git a/drivers/ata/sata_inic162x.c b/drivers/ata/sata_inic162x.c
+index d27bb9a..3ead02f 100644
+--- a/drivers/ata/sata_inic162x.c
++++ b/drivers/ata/sata_inic162x.c
+@@ -10,13 +10,33 @@
+  * right.  Documentation is available at initio's website but it only
+  * documents registers (not programming model).
+  *
+- * - ATA disks work.
+- * - Hotplug works.
+- * - ATAPI read works but burning doesn't.  This thing is really
+- *   peculiar about ATAPI and I couldn't figure out how ATAPI PIO and
+- *   ATAPI DMA WRITE should be programmed.  If you've got a clue, be
+- *   my guest.
+- * - Both STR and STD work.
++ * This driver has interesting history.  The first version was written
++ * from the documentation and a 2.4 IDE driver posted on a Taiwan
++ * company, which didn't use any IDMA features and couldn't handle
++ * LBA48.  The resulting driver couldn't handle LBA48 devices either
++ * making it pretty useless.
++ *
++ * After a while, initio picked the driver up, renamed it to
++ * sata_initio162x, updated it to use IDMA for ATA DMA commands and
++ * posted it on their website.  It only used ATA_PROT_DMA for IDMA and
++ * attaching both devices and issuing IDMA and !IDMA commands
++ * simultaneously broke it due to PIRQ masking interaction but it did
++ * show how to use the IDMA (ADMA + some initio specific twists)
++ * engine.
++ *
++ * Then, I picked up their changes again and here's the usable driver
++ * which uses IDMA for everything.  Everything works now including
++ * LBA48, CD/DVD burning, suspend/resume and hotplug.  There are some
++ * issues tho.  Result Tf is not resported properly, NCQ isn't
++ * supported yet and CD/DVD writing works with DMA assisted PIO
++ * protocol (which, for native SATA devices, shouldn't cause any
++ * noticeable difference).
++ *
++ * Anyways, so, here's finally a working driver for inic162x.  Enjoy!
++ *
++ * initio: If you guys wanna improve the driver regarding result TF
++ * access and other stuff, please feel free to contact me.  I'll be
++ * happy to assist.
+  */
+ 
+ #include <linux/kernel.h>
+@@ -28,13 +48,19 @@
+ #include <scsi/scsi_device.h>
+ 
+ #define DRV_NAME	"sata_inic162x"
+-#define DRV_VERSION	"0.3"
++#define DRV_VERSION	"0.4"
+ 
+ enum {
+-	MMIO_BAR		= 5,
++	MMIO_BAR_PCI		= 5,
++	MMIO_BAR_CARDBUS	= 1,
+ 
+ 	NR_PORTS		= 2,
+ 
++	IDMA_CPB_TBL_SIZE	= 4 * 32,
++
++	INIC_DMA_BOUNDARY	= 0xffffff,
++
++	HOST_ACTRL		= 0x08,
+ 	HOST_CTL		= 0x7c,
+ 	HOST_STAT		= 0x7e,
+ 	HOST_IRQ_STAT		= 0xbc,
+@@ -43,22 +69,37 @@ enum {
+ 	PORT_SIZE		= 0x40,
+ 
+ 	/* registers for ATA TF operation */
+-	PORT_TF			= 0x00,
+-	PORT_ALT_STAT		= 0x08,
++	PORT_TF_DATA		= 0x00,
++	PORT_TF_FEATURE		= 0x01,
++	PORT_TF_NSECT		= 0x02,
++	PORT_TF_LBAL		= 0x03,
++	PORT_TF_LBAM		= 0x04,
++	PORT_TF_LBAH		= 0x05,
++	PORT_TF_DEVICE		= 0x06,
++	PORT_TF_COMMAND		= 0x07,
++	PORT_TF_ALT_STAT	= 0x08,
+ 	PORT_IRQ_STAT		= 0x09,
+ 	PORT_IRQ_MASK		= 0x0a,
+ 	PORT_PRD_CTL		= 0x0b,
+ 	PORT_PRD_ADDR		= 0x0c,
+ 	PORT_PRD_XFERLEN	= 0x10,
++	PORT_CPB_CPBLAR		= 0x18,
++	PORT_CPB_PTQFIFO	= 0x1c,
+ 
+ 	/* IDMA register */
+ 	PORT_IDMA_CTL		= 0x14,
++	PORT_IDMA_STAT		= 0x16,
++
++	PORT_RPQ_FIFO		= 0x1e,
++	PORT_RPQ_CNT		= 0x1f,
+ 
+ 	PORT_SCR		= 0x20,
+ 
+ 	/* HOST_CTL bits */
+ 	HCTL_IRQOFF		= (1 << 8),  /* global IRQ off */
+-	HCTL_PWRDWN		= (1 << 13), /* power down PHYs */
++	HCTL_FTHD0		= (1 << 10), /* fifo threshold 0 */
++	HCTL_FTHD1		= (1 << 11), /* fifo threshold 1*/
++	HCTL_PWRDWN		= (1 << 12), /* power down PHYs */
+ 	HCTL_SOFTRST		= (1 << 13), /* global reset (no phy reset) */
+ 	HCTL_RPGSEL		= (1 << 15), /* register page select */
+ 
+@@ -81,9 +122,7 @@ enum {
+ 	PIRQ_PENDING		= (1 << 7),  /* port IRQ pending (STAT only) */
+ 
+ 	PIRQ_ERR		= PIRQ_OFFLINE | PIRQ_ONLINE | PIRQ_FATAL,
+-
+-	PIRQ_MASK_DMA_READ	= PIRQ_REPLY | PIRQ_ATA,
+-	PIRQ_MASK_OTHER		= PIRQ_REPLY | PIRQ_COMPLETE,
++	PIRQ_MASK_DEFAULT	= PIRQ_REPLY | PIRQ_ATA,
+ 	PIRQ_MASK_FREEZE	= 0xff,
+ 
+ 	/* PORT_PRD_CTL bits */
+@@ -96,20 +135,104 @@ enum {
+ 	IDMA_CTL_RST_IDMA	= (1 << 5),  /* reset IDMA machinary */
+ 	IDMA_CTL_GO		= (1 << 7),  /* IDMA mode go */
+ 	IDMA_CTL_ATA_NIEN	= (1 << 8),  /* ATA IRQ disable */
++
++	/* PORT_IDMA_STAT bits */
++	IDMA_STAT_PERR		= (1 << 0),  /* PCI ERROR MODE */
++	IDMA_STAT_CPBERR	= (1 << 1),  /* ADMA CPB error */
++	IDMA_STAT_LGCY		= (1 << 3),  /* ADMA legacy */
++	IDMA_STAT_UIRQ		= (1 << 4),  /* ADMA unsolicited irq */
++	IDMA_STAT_STPD		= (1 << 5),  /* ADMA stopped */
++	IDMA_STAT_PSD		= (1 << 6),  /* ADMA pause */
++	IDMA_STAT_DONE		= (1 << 7),  /* ADMA done */
++
++	IDMA_STAT_ERR		= IDMA_STAT_PERR | IDMA_STAT_CPBERR,
++
++	/* CPB Control Flags*/
++	CPB_CTL_VALID		= (1 << 0),  /* CPB valid */
++	CPB_CTL_QUEUED		= (1 << 1),  /* queued command */
++	CPB_CTL_DATA		= (1 << 2),  /* data, rsvd in datasheet */
++	CPB_CTL_IEN		= (1 << 3),  /* PCI interrupt enable */
++	CPB_CTL_DEVDIR		= (1 << 4),  /* device direction control */
++
++	/* CPB Response Flags */
++	CPB_RESP_DONE		= (1 << 0),  /* ATA command complete */
++	CPB_RESP_REL		= (1 << 1),  /* ATA release */
++	CPB_RESP_IGNORED	= (1 << 2),  /* CPB ignored */
++	CPB_RESP_ATA_ERR	= (1 << 3),  /* ATA command error */
++	CPB_RESP_SPURIOUS	= (1 << 4),  /* ATA spurious interrupt error */
++	CPB_RESP_UNDERFLOW	= (1 << 5),  /* APRD deficiency length error */
++	CPB_RESP_OVERFLOW	= (1 << 6),  /* APRD exccess length error */
++	CPB_RESP_CPB_ERR	= (1 << 7),  /* CPB error flag */
++
++	/* PRD Control Flags */
++	PRD_DRAIN		= (1 << 1),  /* ignore data excess */
++	PRD_CDB			= (1 << 2),  /* atapi packet command pointer */
++	PRD_DIRECT_INTR		= (1 << 3),  /* direct interrupt */
++	PRD_DMA			= (1 << 4),  /* data transfer method */
++	PRD_WRITE		= (1 << 5),  /* data dir, rsvd in datasheet */
++	PRD_IOM			= (1 << 6),  /* io/memory transfer */
++	PRD_END			= (1 << 7),  /* APRD chain end */
+ };
+ 
++/* Comman Parameter Block */
++struct inic_cpb {
++	u8		resp_flags;	/* Response Flags */
++	u8		error;		/* ATA Error */
++	u8		status;		/* ATA Status */
++	u8		ctl_flags;	/* Control Flags */
++	__le32		len;		/* Total Transfer Length */
++	__le32		prd;		/* First PRD pointer */
++	u8		rsvd[4];
++	/* 16 bytes */
++	u8		feature;	/* ATA Feature */
++	u8		hob_feature;	/* ATA Ex. Feature */
++	u8		device;		/* ATA Device/Head */
++	u8		mirctl;		/* Mirror Control */
++	u8		nsect;		/* ATA Sector Count */
++	u8		hob_nsect;	/* ATA Ex. Sector Count */
++	u8		lbal;		/* ATA Sector Number */
++	u8		hob_lbal;	/* ATA Ex. Sector Number */
++	u8		lbam;		/* ATA Cylinder Low */
++	u8		hob_lbam;	/* ATA Ex. Cylinder Low */
++	u8		lbah;		/* ATA Cylinder High */
++	u8		hob_lbah;	/* ATA Ex. Cylinder High */
++	u8		command;	/* ATA Command */
++	u8		ctl;		/* ATA Control */
++	u8		slave_error;	/* Slave ATA Error */
++	u8		slave_status;	/* Slave ATA Status */
++	/* 32 bytes */
++} __packed;
++
++/* Physical Region Descriptor */
++struct inic_prd {
++	__le32		mad;		/* Physical Memory Address */
++	__le16		len;		/* Transfer Length */
++	u8		rsvd;
++	u8		flags;		/* Control Flags */
++} __packed;
++
++struct inic_pkt {
++	struct inic_cpb	cpb;
++	struct inic_prd	prd[LIBATA_MAX_PRD + 1];	/* + 1 for cdb */
++	u8		cdb[ATAPI_CDB_LEN];
++} __packed;
++
+ struct inic_host_priv {
+-	u16	cached_hctl;
++	void __iomem	*mmio_base;
++	u16		cached_hctl;
+ };
+ 
+ struct inic_port_priv {
+-	u8	dfl_prdctl;
+-	u8	cached_prdctl;
+-	u8	cached_pirq_mask;
++	struct inic_pkt	*pkt;
++	dma_addr_t	pkt_dma;
++	u32		*cpb_tbl;
++	dma_addr_t	cpb_tbl_dma;
+ };
+ 
+ static struct scsi_host_template inic_sht = {
+-	ATA_BMDMA_SHT(DRV_NAME),
++	ATA_BASE_SHT(DRV_NAME),
++	.sg_tablesize	= LIBATA_MAX_PRD,	/* maybe it can be larger? */
++	.dma_boundary	= INIC_DMA_BOUNDARY,
+ };
+ 
+ static const int scr_map[] = {
+@@ -120,54 +243,34 @@ static const int scr_map[] = {
+ 
+ static void __iomem *inic_port_base(struct ata_port *ap)
+ {
+-	return ap->host->iomap[MMIO_BAR] + ap->port_no * PORT_SIZE;
+-}
+-
+-static void __inic_set_pirq_mask(struct ata_port *ap, u8 mask)
+-{
+-	void __iomem *port_base = inic_port_base(ap);
+-	struct inic_port_priv *pp = ap->private_data;
++	struct inic_host_priv *hpriv = ap->host->private_data;
+ 
+-	writeb(mask, port_base + PORT_IRQ_MASK);
+-	pp->cached_pirq_mask = mask;
+-}
+-
+-static void inic_set_pirq_mask(struct ata_port *ap, u8 mask)
+-{
+-	struct inic_port_priv *pp = ap->private_data;
+-
+-	if (pp->cached_pirq_mask != mask)
+-		__inic_set_pirq_mask(ap, mask);
++	return hpriv->mmio_base + ap->port_no * PORT_SIZE;
+ }
+ 
+ static void inic_reset_port(void __iomem *port_base)
+ {
+ 	void __iomem *idma_ctl = port_base + PORT_IDMA_CTL;
+-	u16 ctl;
+ 
+-	ctl = readw(idma_ctl);
+-	ctl &= ~(IDMA_CTL_RST_IDMA | IDMA_CTL_ATA_NIEN | IDMA_CTL_GO);
++	/* stop IDMA engine */
++	readw(idma_ctl); /* flush */
++	msleep(1);
+ 
+ 	/* mask IRQ and assert reset */
+-	writew(ctl | IDMA_CTL_RST_IDMA | IDMA_CTL_ATA_NIEN, idma_ctl);
++	writew(IDMA_CTL_RST_IDMA, idma_ctl);
+ 	readw(idma_ctl); /* flush */
+-
+-	/* give it some time */
+ 	msleep(1);
+ 
+ 	/* release reset */
+-	writew(ctl | IDMA_CTL_ATA_NIEN, idma_ctl);
++	writew(0, idma_ctl);
+ 
+ 	/* clear irq */
+ 	writeb(0xff, port_base + PORT_IRQ_STAT);
+-
+-	/* reenable ATA IRQ, turn off IDMA mode */
+-	writew(ctl, idma_ctl);
+ }
+ 
+ static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
+ {
+-	void __iomem *scr_addr = ap->ioaddr.scr_addr;
++	void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
+ 	void __iomem *addr;
+ 
+ 	if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
+@@ -184,120 +287,126 @@ static int inic_scr_read(struct ata_port *ap, unsigned sc_reg, u32 *val)
+ 
+ static int inic_scr_write(struct ata_port *ap, unsigned sc_reg, u32 val)
+ {
+-	void __iomem *scr_addr = ap->ioaddr.scr_addr;
+-	void __iomem *addr;
++	void __iomem *scr_addr = inic_port_base(ap) + PORT_SCR;
+ 
+ 	if (unlikely(sc_reg >= ARRAY_SIZE(scr_map)))
+ 		return -EINVAL;
+ 
+-	addr = scr_addr + scr_map[sc_reg] * 4;
+ 	writel(val, scr_addr + scr_map[sc_reg] * 4);
+ 	return 0;
+ }
+ 
+-/*
+- * In TF mode, inic162x is very similar to SFF device.  TF registers
+- * function the same.  DMA engine behaves similary using the same PRD
+- * format as BMDMA but different command register, interrupt and event
+- * notification methods are used.  The following inic_bmdma_*()
+- * functions do the impedance matching.
+- */
+-static void inic_bmdma_setup(struct ata_queued_cmd *qc)
++static void inic_stop_idma(struct ata_port *ap)
+ {
+-	struct ata_port *ap = qc->ap;
+-	struct inic_port_priv *pp = ap->private_data;
+ 	void __iomem *port_base = inic_port_base(ap);
+-	int rw = qc->tf.flags & ATA_TFLAG_WRITE;
+-
+-	/* make sure device sees PRD table writes */
+-	wmb();
+-
+-	/* load transfer length */
+-	writel(qc->nbytes, port_base + PORT_PRD_XFERLEN);
+-
+-	/* turn on DMA and specify data direction */
+-	pp->cached_prdctl = pp->dfl_prdctl | PRD_CTL_DMAEN;
+-	if (!rw)
+-		pp->cached_prdctl |= PRD_CTL_WR;
+-	writeb(pp->cached_prdctl, port_base + PORT_PRD_CTL);
+ 
+-	/* issue r/w command */
+-	ap->ops->sff_exec_command(ap, &qc->tf);
++	readb(port_base + PORT_RPQ_FIFO);
++	readb(port_base + PORT_RPQ_CNT);
++	writew(0, port_base + PORT_IDMA_CTL);
+ }
+ 
+-static void inic_bmdma_start(struct ata_queued_cmd *qc)
++static void inic_host_err_intr(struct ata_port *ap, u8 irq_stat, u16 idma_stat)
+ {
+-	struct ata_port *ap = qc->ap;
++	struct ata_eh_info *ehi = &ap->link.eh_info;
+ 	struct inic_port_priv *pp = ap->private_data;
+-	void __iomem *port_base = inic_port_base(ap);
++	struct inic_cpb *cpb = &pp->pkt->cpb;
++	bool freeze = false;
+ 
+-	/* start host DMA transaction */
+-	pp->cached_prdctl |= PRD_CTL_START;
+-	writeb(pp->cached_prdctl, port_base + PORT_PRD_CTL);
+-}
++	ata_ehi_clear_desc(ehi);
++	ata_ehi_push_desc(ehi, "irq_stat=0x%x idma_stat=0x%x",
++			  irq_stat, idma_stat);
+ 
+-static void inic_bmdma_stop(struct ata_queued_cmd *qc)
+-{
+-	struct ata_port *ap = qc->ap;
+-	struct inic_port_priv *pp = ap->private_data;
+-	void __iomem *port_base = inic_port_base(ap);
++	inic_stop_idma(ap);
+ 
+-	/* stop DMA engine */
+-	writeb(pp->dfl_prdctl, port_base + PORT_PRD_CTL);
+-}
++	if (irq_stat & (PIRQ_OFFLINE | PIRQ_ONLINE)) {
++		ata_ehi_push_desc(ehi, "hotplug");
++		ata_ehi_hotplugged(ehi);
++		freeze = true;
++	}
+ 
+-static u8 inic_bmdma_status(struct ata_port *ap)
+-{
+-	/* event is already verified by the interrupt handler */
+-	return ATA_DMA_INTR;
++	if (idma_stat & IDMA_STAT_PERR) {
++		ata_ehi_push_desc(ehi, "PCI error");
++		freeze = true;
++	}
++
++	if (idma_stat & IDMA_STAT_CPBERR) {
++		ata_ehi_push_desc(ehi, "CPB error");
++
++		if (cpb->resp_flags & CPB_RESP_IGNORED) {
++			__ata_ehi_push_desc(ehi, " ignored");
++			ehi->err_mask |= AC_ERR_INVALID;
++			freeze = true;
++		}
++
++		if (cpb->resp_flags & CPB_RESP_ATA_ERR)
++			ehi->err_mask |= AC_ERR_DEV;
++
++		if (cpb->resp_flags & CPB_RESP_SPURIOUS) {
++			__ata_ehi_push_desc(ehi, " spurious-intr");
++			ehi->err_mask |= AC_ERR_HSM;
++			freeze = true;
++		}
++
++		if (cpb->resp_flags &
++		    (CPB_RESP_UNDERFLOW | CPB_RESP_OVERFLOW)) {
++			__ata_ehi_push_desc(ehi, " data-over/underflow");
++			ehi->err_mask |= AC_ERR_HSM;
++			freeze = true;
++		}
++	}
++
++	if (freeze)
++		ata_port_freeze(ap);
++	else
++		ata_port_abort(ap);
+ }
+ 
+ static void inic_host_intr(struct ata_port *ap)
+ {
+ 	void __iomem *port_base = inic_port_base(ap);
+-	struct ata_eh_info *ehi = &ap->link.eh_info;
++	struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag);
+ 	u8 irq_stat;
++	u16 idma_stat;
+ 
+-	/* fetch and clear irq */
++	/* read and clear IRQ status */
+ 	irq_stat = readb(port_base + PORT_IRQ_STAT);
+ 	writeb(irq_stat, port_base + PORT_IRQ_STAT);
++	idma_stat = readw(port_base + PORT_IDMA_STAT);
+ 
+-	if (likely(!(irq_stat & PIRQ_ERR))) {
+-		struct ata_queued_cmd *qc =
+-			ata_qc_from_tag(ap, ap->link.active_tag);
++	if (unlikely((irq_stat & PIRQ_ERR) || (idma_stat & IDMA_STAT_ERR)))
++		inic_host_err_intr(ap, irq_stat, idma_stat);
+ 
+-		if (unlikely(!qc || (qc->tf.flags & ATA_TFLAG_POLLING))) {
+-			ap->ops->sff_check_status(ap); /* clear ATA interrupt */
+-			return;
+-		}
++	if (unlikely(!qc))
++		goto spurious;
+ 
+-		if (likely(ata_sff_host_intr(ap, qc)))
+-			return;
++	if (likely(idma_stat & IDMA_STAT_DONE)) {
++		inic_stop_idma(ap);
+ 
+-		ap->ops->sff_check_status(ap); /* clear ATA interrupt */
+-		ata_port_printk(ap, KERN_WARNING, "unhandled "
+-				"interrupt, irq_stat=%x\n", irq_stat);
++		/* Depending on circumstances, device error
++		 * isn't reported by IDMA, check it explicitly.
++		 */
++		if (unlikely(readb(port_base + PORT_TF_COMMAND) &
++			     (ATA_DF | ATA_ERR)))
++			qc->err_mask |= AC_ERR_DEV;
++
++		ata_qc_complete(qc);
+ 		return;
+ 	}
+ 
+-	/* error */
+-	ata_ehi_push_desc(ehi, "irq_stat=0x%x", irq_stat);
+-
+-	if (irq_stat & (PIRQ_OFFLINE | PIRQ_ONLINE)) {
+-		ata_ehi_hotplugged(ehi);
+-		ata_port_freeze(ap);
+-	} else
+-		ata_port_abort(ap);
++ spurious:
++	ata_port_printk(ap, KERN_WARNING, "unhandled interrupt: "
++			"cmd=0x%x irq_stat=0x%x idma_stat=0x%x\n",
++			qc ? qc->tf.command : 0xff, irq_stat, idma_stat);
+ }
+ 
+ static irqreturn_t inic_interrupt(int irq, void *dev_instance)
+ {
+ 	struct ata_host *host = dev_instance;
+-	void __iomem *mmio_base = host->iomap[MMIO_BAR];
++	struct inic_host_priv *hpriv = host->private_data;
+ 	u16 host_irq_stat;
+ 	int i, handled = 0;;
+ 
+-	host_irq_stat = readw(mmio_base + HOST_IRQ_STAT);
++	host_irq_stat = readw(hpriv->mmio_base + HOST_IRQ_STAT);
+ 
+ 	if (unlikely(!(host_irq_stat & HIRQ_GLOBAL)))
+ 		goto out;
+@@ -327,60 +436,173 @@ static irqreturn_t inic_interrupt(int irq, void *dev_instance)
+ 	return IRQ_RETVAL(handled);
+ }
+ 
++static int inic_check_atapi_dma(struct ata_queued_cmd *qc)
++{
++	/* For some reason ATAPI_PROT_DMA doesn't work for some
++	 * commands including writes and other misc ops.  Use PIO
++	 * protocol instead, which BTW is driven by the DMA engine
++	 * anyway, so it shouldn't make much difference for native
++	 * SATA devices.
++	 */
++	if (atapi_cmd_type(qc->cdb[0]) == READ)
++		return 0;
++	return 1;
++}
++
++static void inic_fill_sg(struct inic_prd *prd, struct ata_queued_cmd *qc)
++{
++	struct scatterlist *sg;
++	unsigned int si;
++	u8 flags = 0;
++
++	if (qc->tf.flags & ATA_TFLAG_WRITE)
++		flags |= PRD_WRITE;
++
++	if (ata_is_dma(qc->tf.protocol))
++		flags |= PRD_DMA;
++
++	for_each_sg(qc->sg, sg, qc->n_elem, si) {
++		prd->mad = cpu_to_le32(sg_dma_address(sg));
++		prd->len = cpu_to_le16(sg_dma_len(sg));
++		prd->flags = flags;
++		prd++;
++	}
++
++	WARN_ON(!si);
++	prd[-1].flags |= PRD_END;
++}
++
++static void inic_qc_prep(struct ata_queued_cmd *qc)
++{
++	struct inic_port_priv *pp = qc->ap->private_data;
++	struct inic_pkt *pkt = pp->pkt;
++	struct inic_cpb *cpb = &pkt->cpb;
++	struct inic_prd *prd = pkt->prd;
++	bool is_atapi = ata_is_atapi(qc->tf.protocol);
++	bool is_data = ata_is_data(qc->tf.protocol);
++	unsigned int cdb_len = 0;
++
++	VPRINTK("ENTER\n");
++
++	if (is_atapi)
++		cdb_len = qc->dev->cdb_len;
++
++	/* prepare packet, based on initio driver */
++	memset(pkt, 0, sizeof(struct inic_pkt));
++
++	cpb->ctl_flags = CPB_CTL_VALID | CPB_CTL_IEN;
++	if (is_atapi || is_data)
++		cpb->ctl_flags |= CPB_CTL_DATA;
++
++	cpb->len = cpu_to_le32(qc->nbytes + cdb_len);
++	cpb->prd = cpu_to_le32(pp->pkt_dma + offsetof(struct inic_pkt, prd));
++
++	cpb->device = qc->tf.device;
++	cpb->feature = qc->tf.feature;
++	cpb->nsect = qc->tf.nsect;
++	cpb->lbal = qc->tf.lbal;
++	cpb->lbam = qc->tf.lbam;
++	cpb->lbah = qc->tf.lbah;
++
++	if (qc->tf.flags & ATA_TFLAG_LBA48) {
++		cpb->hob_feature = qc->tf.hob_feature;
++		cpb->hob_nsect = qc->tf.hob_nsect;
++		cpb->hob_lbal = qc->tf.hob_lbal;
++		cpb->hob_lbam = qc->tf.hob_lbam;
++		cpb->hob_lbah = qc->tf.hob_lbah;
++	}
++
++	cpb->command = qc->tf.command;
++	/* don't load ctl - dunno why.  it's like that in the initio driver */
++
++	/* setup PRD for CDB */
++	if (is_atapi) {
++		memcpy(pkt->cdb, qc->cdb, ATAPI_CDB_LEN);
++		prd->mad = cpu_to_le32(pp->pkt_dma +
++				       offsetof(struct inic_pkt, cdb));
++		prd->len = cpu_to_le16(cdb_len);
++		prd->flags = PRD_CDB | PRD_WRITE;
++		if (!is_data)
++			prd->flags |= PRD_END;
++		prd++;
++	}
++
++	/* setup sg table */
++	if (is_data)
++		inic_fill_sg(prd, qc);
++
++	pp->cpb_tbl[0] = pp->pkt_dma;
++}
++
+ static unsigned int inic_qc_issue(struct ata_queued_cmd *qc)
+ {
+ 	struct ata_port *ap = qc->ap;
++	void __iomem *port_base = inic_port_base(ap);
+ 
+-	/* ATA IRQ doesn't wait for DMA transfer completion and vice
+-	 * versa.  Mask IRQ selectively to detect command completion.
+-	 * Without it, ATA DMA read command can cause data corruption.
+-	 *
+-	 * Something similar might be needed for ATAPI writes.  I
+-	 * tried a lot of combinations but couldn't find the solution.
+-	 */
+-	if (qc->tf.protocol == ATA_PROT_DMA &&
+-	    !(qc->tf.flags & ATA_TFLAG_WRITE))
+-		inic_set_pirq_mask(ap, PIRQ_MASK_DMA_READ);
+-	else
+-		inic_set_pirq_mask(ap, PIRQ_MASK_OTHER);
++	/* fire up the ADMA engine */
++	writew(HCTL_FTHD0, port_base + HOST_CTL);
++	writew(IDMA_CTL_GO, port_base + PORT_IDMA_CTL);
++	writeb(0, port_base + PORT_CPB_PTQFIFO);
++
++	return 0;
++}
++
++static void inic_tf_read(struct ata_port *ap, struct ata_taskfile *tf)
++{
++	void __iomem *port_base = inic_port_base(ap);
++
++	tf->feature	= readb(port_base + PORT_TF_FEATURE);
++	tf->nsect	= readb(port_base + PORT_TF_NSECT);
++	tf->lbal	= readb(port_base + PORT_TF_LBAL);
++	tf->lbam	= readb(port_base + PORT_TF_LBAM);
++	tf->lbah	= readb(port_base + PORT_TF_LBAH);
++	tf->device	= readb(port_base + PORT_TF_DEVICE);
++	tf->command	= readb(port_base + PORT_TF_COMMAND);
++}
+ 
+-	/* Issuing a command to yet uninitialized port locks up the
+-	 * controller.  Most of the time, this happens for the first
+-	 * command after reset which are ATA and ATAPI IDENTIFYs.
+-	 * Fast fail if stat is 0x7f or 0xff for those commands.
++static bool inic_qc_fill_rtf(struct ata_queued_cmd *qc)
++{
++	struct ata_taskfile *rtf = &qc->result_tf;
++	struct ata_taskfile tf;
++
++	/* FIXME: Except for status and error, result TF access
++	 * doesn't work.  I tried reading from BAR0/2, CPB and BAR5.
++	 * None works regardless of which command interface is used.
++	 * For now return true iff status indicates device error.
++	 * This means that we're reporting bogus sector for RW
++	 * failures.  Eeekk....
+ 	 */
+-	if (unlikely(qc->tf.command == ATA_CMD_ID_ATA ||
+-		     qc->tf.command == ATA_CMD_ID_ATAPI)) {
+-		u8 stat = ap->ops->sff_check_status(ap);
+-		if (stat == 0x7f || stat == 0xff)
+-			return AC_ERR_HSM;
+-	}
++	inic_tf_read(qc->ap, &tf);
+ 
+-	return ata_sff_qc_issue(qc);
++	if (!(tf.command & ATA_ERR))
++		return false;
++
++	rtf->command = tf.command;
++	rtf->feature = tf.feature;
++	return true;
+ }
+ 
+ static void inic_freeze(struct ata_port *ap)
+ {
+ 	void __iomem *port_base = inic_port_base(ap);
+ 
+-	__inic_set_pirq_mask(ap, PIRQ_MASK_FREEZE);
+-
+-	ap->ops->sff_check_status(ap);
++	writeb(PIRQ_MASK_FREEZE, port_base + PORT_IRQ_MASK);
+ 	writeb(0xff, port_base + PORT_IRQ_STAT);
+-
+-	readb(port_base + PORT_IRQ_STAT); /* flush */
+ }
+ 
+ static void inic_thaw(struct ata_port *ap)
+ {
+ 	void __iomem *port_base = inic_port_base(ap);
+ 
+-	ap->ops->sff_check_status(ap);
+ 	writeb(0xff, port_base + PORT_IRQ_STAT);
++	writeb(PIRQ_MASK_DEFAULT, port_base + PORT_IRQ_MASK);
++}
+ 
+-	__inic_set_pirq_mask(ap, PIRQ_MASK_OTHER);
++static int inic_check_ready(struct ata_link *link)
++{
++	void __iomem *port_base = inic_port_base(link->ap);
+ 
+-	readb(port_base + PORT_IRQ_STAT); /* flush */
++	return ata_check_ready(readb(port_base + PORT_TF_COMMAND));
+ }
+ 
+ /*
+@@ -394,17 +616,15 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
+ 	void __iomem *port_base = inic_port_base(ap);
+ 	void __iomem *idma_ctl = port_base + PORT_IDMA_CTL;
+ 	const unsigned long *timing = sata_ehc_deb_timing(&link->eh_context);
+-	u16 val;
+ 	int rc;
+ 
+ 	/* hammer it into sane state */
+ 	inic_reset_port(port_base);
+ 
+-	val = readw(idma_ctl);
+-	writew(val | IDMA_CTL_RST_ATA, idma_ctl);
++	writew(IDMA_CTL_RST_ATA, idma_ctl);
+ 	readw(idma_ctl);	/* flush */
+ 	msleep(1);
+-	writew(val & ~IDMA_CTL_RST_ATA, idma_ctl);
++	writew(0, idma_ctl);
+ 
+ 	rc = sata_link_resume(link, timing, deadline);
+ 	if (rc) {
+@@ -418,7 +638,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
+ 		struct ata_taskfile tf;
+ 
+ 		/* wait for link to become ready */
+-		rc = ata_sff_wait_after_reset(link, 1, deadline);
++		rc = ata_wait_after_reset(link, deadline, inic_check_ready);
+ 		/* link occupied, -ENODEV too is an error */
+ 		if (rc) {
+ 			ata_link_printk(link, KERN_WARNING, "device not ready "
+@@ -426,7 +646,7 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
+ 			return rc;
+ 		}
+ 
+-		ata_sff_tf_read(ap, &tf);
++		inic_tf_read(ap, &tf);
+ 		*class = ata_dev_classify(&tf);
+ 	}
+ 
+@@ -436,18 +656,8 @@ static int inic_hardreset(struct ata_link *link, unsigned int *class,
+ static void inic_error_handler(struct ata_port *ap)
+ {
+ 	void __iomem *port_base = inic_port_base(ap);
+-	struct inic_port_priv *pp = ap->private_data;
+-	unsigned long flags;
+ 
+-	/* reset PIO HSM and stop DMA engine */
+ 	inic_reset_port(port_base);
+-
+-	spin_lock_irqsave(ap->lock, flags);
+-	ap->hsm_task_state = HSM_ST_IDLE;
+-	writeb(pp->dfl_prdctl, port_base + PORT_PRD_CTL);
+-	spin_unlock_irqrestore(ap->lock, flags);
+-
+-	/* PIO and DMA engines have been stopped, perform recovery */
+ 	ata_std_error_handler(ap);
+ }
+ 
+@@ -458,26 +668,18 @@ static void inic_post_internal_cmd(struct ata_queued_cmd *qc)
+ 		inic_reset_port(inic_port_base(qc->ap));
+ }
+ 
+-static void inic_dev_config(struct ata_device *dev)
+-{
+-	/* inic can only handle upto LBA28 max sectors */
+-	if (dev->max_sectors > ATA_MAX_SECTORS)
+-		dev->max_sectors = ATA_MAX_SECTORS;
+-
+-	if (dev->n_sectors >= 1 << 28) {
+-		ata_dev_printk(dev, KERN_ERR,
+-	"ERROR: This driver doesn't support LBA48 yet and may cause\n"
+-	"                data corruption on such devices.  Disabling.\n");
+-		ata_dev_disable(dev);
+-	}
+-}
+-
+ static void init_port(struct ata_port *ap)
+ {
+ 	void __iomem *port_base = inic_port_base(ap);
++	struct inic_port_priv *pp = ap->private_data;
+ 
+-	/* Setup PRD address */
++	/* clear packet and CPB table */
++	memset(pp->pkt, 0, sizeof(struct inic_pkt));
++	memset(pp->cpb_tbl, 0, IDMA_CPB_TBL_SIZE);
++
++	/* setup PRD and CPB lookup table addresses */
+ 	writel(ap->prd_dma, port_base + PORT_PRD_ADDR);
++	writel(pp->cpb_tbl_dma, port_base + PORT_CPB_CPBLAR);
+ }
+ 
+ static int inic_port_resume(struct ata_port *ap)
+@@ -488,28 +690,30 @@ static int inic_port_resume(struct ata_port *ap)
+ 
+ static int inic_port_start(struct ata_port *ap)
+ {
+-	void __iomem *port_base = inic_port_base(ap);
++	struct device *dev = ap->host->dev;
+ 	struct inic_port_priv *pp;
+-	u8 tmp;
+ 	int rc;
+ 
+ 	/* alloc and initialize private data */
+-	pp = devm_kzalloc(ap->host->dev, sizeof(*pp), GFP_KERNEL);
++	pp = devm_kzalloc(dev, sizeof(*pp), GFP_KERNEL);
+ 	if (!pp)
+ 		return -ENOMEM;
+ 	ap->private_data = pp;
+ 
+-	/* default PRD_CTL value, DMAEN, WR and START off */
+-	tmp = readb(port_base + PORT_PRD_CTL);
+-	tmp &= ~(PRD_CTL_DMAEN | PRD_CTL_WR | PRD_CTL_START);
+-	pp->dfl_prdctl = tmp;
+-
+ 	/* Alloc resources */
+ 	rc = ata_port_start(ap);
+-	if (rc) {
+-		kfree(pp);
++	if (rc)
+ 		return rc;
+-	}
++
++	pp->pkt = dmam_alloc_coherent(dev, sizeof(struct inic_pkt),
++				      &pp->pkt_dma, GFP_KERNEL);
++	if (!pp->pkt)
++		return -ENOMEM;
++
++	pp->cpb_tbl = dmam_alloc_coherent(dev, IDMA_CPB_TBL_SIZE,
++					  &pp->cpb_tbl_dma, GFP_KERNEL);
++	if (!pp->cpb_tbl)
++		return -ENOMEM;
+ 
+ 	init_port(ap);
+ 
+@@ -517,21 +721,18 @@ static int inic_port_start(struct ata_port *ap)
+ }
+ 
+ static struct ata_port_operations inic_port_ops = {
+-	.inherits		= &ata_sff_port_ops,
++	.inherits		= &sata_port_ops,
+ 
+-	.bmdma_setup		= inic_bmdma_setup,
+-	.bmdma_start		= inic_bmdma_start,
+-	.bmdma_stop		= inic_bmdma_stop,
+-	.bmdma_status		= inic_bmdma_status,
++	.check_atapi_dma	= inic_check_atapi_dma,
++	.qc_prep		= inic_qc_prep,
+ 	.qc_issue		= inic_qc_issue,
++	.qc_fill_rtf		= inic_qc_fill_rtf,
+ 
+ 	.freeze			= inic_freeze,
+ 	.thaw			= inic_thaw,
+-	.softreset		= ATA_OP_NULL,	/* softreset is broken */
+ 	.hardreset		= inic_hardreset,
+ 	.error_handler		= inic_error_handler,
+ 	.post_internal_cmd	= inic_post_internal_cmd,
+-	.dev_config		= inic_dev_config,
+ 
+ 	.scr_read		= inic_scr_read,
+ 	.scr_write		= inic_scr_write,
+@@ -541,12 +742,6 @@ static struct ata_port_operations inic_port_ops = {
+ };
+ 
+ static struct ata_port_info inic_port_info = {
+-	/* For some reason, ATAPI_PROT_PIO is broken on this
+-	 * controller, and no, PIO_POLLING does't fix it.  It somehow
+-	 * manages to report the wrong ireason and ignoring ireason
+-	 * results in machine lock up.  Tell libata to always prefer
+-	 * DMA.
+-	 */
+ 	.flags			= ATA_FLAG_SATA | ATA_FLAG_PIO_DMA,
+ 	.pio_mask		= 0x1f,	/* pio0-4 */
+ 	.mwdma_mask		= 0x07, /* mwdma0-2 */
+@@ -599,7 +794,6 @@ static int inic_pci_device_resume(struct pci_dev *pdev)
+ {
+ 	struct ata_host *host = dev_get_drvdata(&pdev->dev);
+ 	struct inic_host_priv *hpriv = host->private_data;
+-	void __iomem *mmio_base = host->iomap[MMIO_BAR];
+ 	int rc;
+ 
+ 	rc = ata_pci_device_do_resume(pdev);
+@@ -607,7 +801,7 @@ static int inic_pci_device_resume(struct pci_dev *pdev)
+ 		return rc;
+ 
+ 	if (pdev->dev.power.power_state.event == PM_EVENT_SUSPEND) {
+-		rc = init_controller(mmio_base, hpriv->cached_hctl);
++		rc = init_controller(hpriv->mmio_base, hpriv->cached_hctl);
+ 		if (rc)
+ 			return rc;
+ 	}
+@@ -625,6 +819,7 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ 	struct ata_host *host;
+ 	struct inic_host_priv *hpriv;
+ 	void __iomem * const *iomap;
++	int mmio_bar;
+ 	int i, rc;
+ 
+ 	if (!printed_version++)
+@@ -638,38 +833,31 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ 
+ 	host->private_data = hpriv;
+ 
+-	/* acquire resources and fill host */
++	/* Acquire resources and fill host.  Note that PCI and cardbus
++	 * use different BARs.
++	 */
+ 	rc = pcim_enable_device(pdev);
+ 	if (rc)
+ 		return rc;
+ 
+-	rc = pcim_iomap_regions(pdev, 0x3f, DRV_NAME);
++	if (pci_resource_flags(pdev, MMIO_BAR_PCI) & IORESOURCE_MEM)
++		mmio_bar = MMIO_BAR_PCI;
++	else
++		mmio_bar = MMIO_BAR_CARDBUS;
++
++	rc = pcim_iomap_regions(pdev, 1 << mmio_bar, DRV_NAME);
+ 	if (rc)
+ 		return rc;
+ 	host->iomap = iomap = pcim_iomap_table(pdev);
++	hpriv->mmio_base = iomap[mmio_bar];
++	hpriv->cached_hctl = readw(hpriv->mmio_base + HOST_CTL);
+ 
+ 	for (i = 0; i < NR_PORTS; i++) {
+ 		struct ata_port *ap = host->ports[i];
+-		struct ata_ioports *port = &ap->ioaddr;
+-		unsigned int offset = i * PORT_SIZE;
+-
+-		port->cmd_addr = iomap[2 * i];
+-		port->altstatus_addr =
+-		port->ctl_addr = (void __iomem *)
+-			((unsigned long)iomap[2 * i + 1] | ATA_PCI_CTL_OFS);
+-		port->scr_addr = iomap[MMIO_BAR] + offset + PORT_SCR;
+-
+-		ata_sff_std_ports(port);
+-
+-		ata_port_pbar_desc(ap, MMIO_BAR, -1, "mmio");
+-		ata_port_pbar_desc(ap, MMIO_BAR, offset, "port");
+-		ata_port_desc(ap, "cmd 0x%llx ctl 0x%llx",
+-		  (unsigned long long)pci_resource_start(pdev, 2 * i),
+-		  (unsigned long long)pci_resource_start(pdev, (2 * i + 1)) |
+-				      ATA_PCI_CTL_OFS);
+-	}
+ 
+-	hpriv->cached_hctl = readw(iomap[MMIO_BAR] + HOST_CTL);
++		ata_port_pbar_desc(ap, mmio_bar, -1, "mmio");
++		ata_port_pbar_desc(ap, mmio_bar, i * PORT_SIZE, "port");
++	}
+ 
+ 	/* Set dma_mask.  This devices doesn't support 64bit addressing. */
+ 	rc = pci_set_dma_mask(pdev, DMA_32BIT_MASK);
+@@ -698,7 +886,7 @@ static int inic_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
+ 		return rc;
+ 	}
+ 
+-	rc = init_controller(iomap[MMIO_BAR], hpriv->cached_hctl);
++	rc = init_controller(hpriv->mmio_base, hpriv->cached_hctl);
+ 	if (rc) {
+ 		dev_printk(KERN_ERR, &pdev->dev,
+ 			   "failed to initialize controller\n");
+diff --git a/drivers/ata/sata_mv.c b/drivers/ata/sata_mv.c
+index 842b1a1..bb73b22 100644
+--- a/drivers/ata/sata_mv.c
++++ b/drivers/ata/sata_mv.c
+@@ -65,6 +65,7 @@
+ #include <linux/platform_device.h>
+ #include <linux/ata_platform.h>
+ #include <linux/mbus.h>
++#include <linux/bitops.h>
+ #include <scsi/scsi_host.h>
+ #include <scsi/scsi_cmnd.h>
+ #include <scsi/scsi_device.h>
+@@ -91,9 +92,9 @@ enum {
+ 	MV_IRQ_COAL_TIME_THRESHOLD	= (MV_IRQ_COAL_REG_BASE + 0xd0),
+ 
+ 	MV_SATAHC0_REG_BASE	= 0x20000,
+-	MV_FLASH_CTL		= 0x1046c,
+-	MV_GPIO_PORT_CTL	= 0x104f0,
+-	MV_RESET_CFG		= 0x180d8,
++	MV_FLASH_CTL_OFS	= 0x1046c,
++	MV_GPIO_PORT_CTL_OFS	= 0x104f0,
++	MV_RESET_CFG_OFS	= 0x180d8,
+ 
+ 	MV_PCI_REG_SZ		= MV_MAJOR_REG_AREA_SZ,
+ 	MV_SATAHC_REG_SZ	= MV_MAJOR_REG_AREA_SZ,
+@@ -147,18 +148,21 @@ enum {
+ 	/* PCI interface registers */
+ 
+ 	PCI_COMMAND_OFS		= 0xc00,
++	PCI_COMMAND_MRDTRIG	= (1 << 7),	/* PCI Master Read Trigger */
+ 
+ 	PCI_MAIN_CMD_STS_OFS	= 0xd30,
+ 	STOP_PCI_MASTER		= (1 << 2),
+ 	PCI_MASTER_EMPTY	= (1 << 3),
+ 	GLOB_SFT_RST		= (1 << 4),
+ 
+-	MV_PCI_MODE		= 0xd00,
++	MV_PCI_MODE_OFS		= 0xd00,
++	MV_PCI_MODE_MASK	= 0x30,
++
+ 	MV_PCI_EXP_ROM_BAR_CTL	= 0xd2c,
+ 	MV_PCI_DISC_TIMER	= 0xd04,
+ 	MV_PCI_MSI_TRIGGER	= 0xc38,
+ 	MV_PCI_SERR_MASK	= 0xc28,
+-	MV_PCI_XBAR_TMOUT	= 0x1d04,
++	MV_PCI_XBAR_TMOUT_OFS	= 0x1d04,
+ 	MV_PCI_ERR_LOW_ADDRESS	= 0x1d40,
+ 	MV_PCI_ERR_HIGH_ADDRESS	= 0x1d44,
+ 	MV_PCI_ERR_ATTRIBUTE	= 0x1d48,
+@@ -225,16 +229,18 @@ enum {
+ 	PHY_MODE4		= 0x314,
+ 	PHY_MODE2		= 0x330,
+ 	SATA_IFCTL_OFS		= 0x344,
++	SATA_TESTCTL_OFS	= 0x348,
+ 	SATA_IFSTAT_OFS		= 0x34c,
+ 	VENDOR_UNIQUE_FIS_OFS	= 0x35c,
+ 
+-	FIS_CFG_OFS		= 0x360,
+-	FIS_CFG_SINGLE_SYNC	= (1 << 16),	/* SYNC on DMA activation */
++	FISCFG_OFS		= 0x360,
++	FISCFG_WAIT_DEV_ERR	= (1 << 8),	/* wait for host on DevErr */
++	FISCFG_SINGLE_SYNC	= (1 << 16),	/* SYNC on DMA activation */
+ 
+ 	MV5_PHY_MODE		= 0x74,
+-	MV5_LT_MODE		= 0x30,
+-	MV5_PHY_CTL		= 0x0C,
+-	SATA_INTERFACE_CFG	= 0x050,
++	MV5_LTMODE_OFS		= 0x30,
++	MV5_PHY_CTL_OFS		= 0x0C,
++	SATA_INTERFACE_CFG_OFS	= 0x050,
+ 
+ 	MV_M2_PREAMP_MASK	= 0x7e0,
+ 
+@@ -332,10 +338,16 @@ enum {
+ 	EDMA_CMD_OFS		= 0x28,		/* EDMA command register */
+ 	EDMA_EN			= (1 << 0),	/* enable EDMA */
+ 	EDMA_DS			= (1 << 1),	/* disable EDMA; self-negated */
+-	ATA_RST			= (1 << 2),	/* reset trans/link/phy */
++	EDMA_RESET		= (1 << 2),	/* reset eng/trans/link/phy */
++
++	EDMA_STATUS_OFS		= 0x30,		/* EDMA engine status */
++	EDMA_STATUS_CACHE_EMPTY	= (1 << 6),	/* GenIIe command cache empty */
++	EDMA_STATUS_IDLE	= (1 << 7),	/* GenIIe EDMA enabled/idle */
+ 
+-	EDMA_IORDY_TMOUT	= 0x34,
+-	EDMA_ARB_CFG		= 0x38,
++	EDMA_IORDY_TMOUT_OFS	= 0x34,
++	EDMA_ARB_CFG_OFS	= 0x38,
++
++	EDMA_HALTCOND_OFS	= 0x60,		/* GenIIe halt conditions */
+ 
+ 	GEN_II_NCQ_MAX_SECTORS	= 256,		/* max sects/io on Gen2 w/NCQ */
+ 
+@@ -350,15 +362,19 @@ enum {
+ 	MV_HP_GEN_II		= (1 << 7),	/* Generation II: 60xx */
+ 	MV_HP_GEN_IIE		= (1 << 8),	/* Generation IIE: 6042/7042 */
+ 	MV_HP_PCIE		= (1 << 9),	/* PCIe bus/regs: 7042 */
++	MV_HP_CUT_THROUGH	= (1 << 10),	/* can use EDMA cut-through */
+ 
+ 	/* Port private flags (pp_flags) */
+ 	MV_PP_FLAG_EDMA_EN	= (1 << 0),	/* is EDMA engine enabled? */
+ 	MV_PP_FLAG_NCQ_EN	= (1 << 1),	/* is EDMA set up for NCQ? */
++	MV_PP_FLAG_FBS_EN	= (1 << 2),	/* is EDMA set up for FBS? */
++	MV_PP_FLAG_DELAYED_EH	= (1 << 3),	/* delayed dev err handling */
+ };
+ 
+ #define IS_GEN_I(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_I)
+ #define IS_GEN_II(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_II)
+ #define IS_GEN_IIE(hpriv) ((hpriv)->hp_flags & MV_HP_GEN_IIE)
++#define IS_PCIE(hpriv) ((hpriv)->hp_flags & MV_HP_PCIE)
+ #define HAS_PCI(host) (!((host)->ports[0]->flags & MV_FLAG_SOC))
+ 
+ #define WINDOW_CTRL(i)		(0x20030 + ((i) << 4))
+@@ -433,6 +449,7 @@ struct mv_port_priv {
+ 	unsigned int		resp_idx;
+ 
+ 	u32			pp_flags;
++	unsigned int		delayed_eh_pmp_map;
+ };
+ 
+ struct mv_port_signal {
+@@ -479,6 +496,7 @@ static int mv5_scr_read(struct ata_port *ap, unsigned int sc_reg_in, u32 *val);
+ static int mv5_scr_write(struct ata_port *ap, unsigned int sc_reg_in, u32 val);
+ static int mv_port_start(struct ata_port *ap);
+ static void mv_port_stop(struct ata_port *ap);
++static int mv_qc_defer(struct ata_queued_cmd *qc);
+ static void mv_qc_prep(struct ata_queued_cmd *qc);
+ static void mv_qc_prep_iie(struct ata_queued_cmd *qc);
+ static unsigned int mv_qc_issue(struct ata_queued_cmd *qc);
+@@ -527,6 +545,9 @@ static int mv_pmp_hardreset(struct ata_link *link, unsigned int *class,
+ 				unsigned long deadline);
+ static int  mv_softreset(struct ata_link *link, unsigned int *class,
+ 				unsigned long deadline);
++static void mv_pmp_error_handler(struct ata_port *ap);
++static void mv_process_crpb_entries(struct ata_port *ap,
++					struct mv_port_priv *pp);
+ 
+ /* .sg_tablesize is (MV_MAX_SG_CT / 2) in the structures below
+  * because we have to allow room for worst case splitting of
+@@ -548,6 +569,7 @@ static struct scsi_host_template mv6_sht = {
+ static struct ata_port_operations mv5_ops = {
+ 	.inherits		= &ata_sff_port_ops,
+ 
++	.qc_defer		= mv_qc_defer,
+ 	.qc_prep		= mv_qc_prep,
+ 	.qc_issue		= mv_qc_issue,
+ 
+@@ -566,7 +588,6 @@ static struct ata_port_operations mv5_ops = {
+ 
+ static struct ata_port_operations mv6_ops = {
+ 	.inherits		= &mv5_ops,
+-	.qc_defer		= sata_pmp_qc_defer_cmd_switch,
+ 	.dev_config             = mv6_dev_config,
+ 	.scr_read		= mv_scr_read,
+ 	.scr_write		= mv_scr_write,
+@@ -574,12 +595,11 @@ static struct ata_port_operations mv6_ops = {
+ 	.pmp_hardreset		= mv_pmp_hardreset,
+ 	.pmp_softreset		= mv_softreset,
+ 	.softreset		= mv_softreset,
+-	.error_handler		= sata_pmp_error_handler,
++	.error_handler		= mv_pmp_error_handler,
+ };
+ 
+ static struct ata_port_operations mv_iie_ops = {
+ 	.inherits		= &mv6_ops,
+-	.qc_defer		= ata_std_qc_defer, /* FIS-based switching */
+ 	.dev_config		= ATA_OP_NULL,
+ 	.qc_prep		= mv_qc_prep_iie,
+ };
+@@ -875,6 +895,29 @@ static void mv_start_dma(struct ata_port *ap, void __iomem *port_mmio,
+ 	}
+ }
+ 
++static void mv_wait_for_edma_empty_idle(struct ata_port *ap)
++{
++	void __iomem *port_mmio = mv_ap_base(ap);
++	const u32 empty_idle = (EDMA_STATUS_CACHE_EMPTY | EDMA_STATUS_IDLE);
++	const int per_loop = 5, timeout = (15 * 1000 / per_loop);
++	int i;
++
++	/*
++	 * Wait for the EDMA engine to finish transactions in progress.
++	 * No idea what a good "timeout" value might be, but measurements
++	 * indicate that it often requires hundreds of microseconds
++	 * with two drives in-use.  So we use the 15msec value above
++	 * as a rough guess at what even more drives might require.
++	 */
++	for (i = 0; i < timeout; ++i) {
++		u32 edma_stat = readl(port_mmio + EDMA_STATUS_OFS);
++		if ((edma_stat & empty_idle) == empty_idle)
++			break;
++		udelay(per_loop);
++	}
++	/* ata_port_printk(ap, KERN_INFO, "%s: %u+ usecs\n", __func__, i); */
++}
++
+ /**
+  *      mv_stop_edma_engine - Disable eDMA engine
+  *      @port_mmio: io base address
+@@ -907,6 +950,7 @@ static int mv_stop_edma(struct ata_port *ap)
+ 	if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN))
+ 		return 0;
+ 	pp->pp_flags &= ~MV_PP_FLAG_EDMA_EN;
++	mv_wait_for_edma_empty_idle(ap);
+ 	if (mv_stop_edma_engine(port_mmio)) {
+ 		ata_port_printk(ap, KERN_ERR, "Unable to stop eDMA\n");
+ 		return -EIO;
+@@ -1057,26 +1101,95 @@ static void mv6_dev_config(struct ata_device *adev)
+ 	}
+ }
+ 
+-static void mv_config_fbs(void __iomem *port_mmio, int enable_fbs)
++static int mv_qc_defer(struct ata_queued_cmd *qc)
+ {
+-	u32 old_fcfg, new_fcfg, old_ltmode, new_ltmode;
++	struct ata_link *link = qc->dev->link;
++	struct ata_port *ap = link->ap;
++	struct mv_port_priv *pp = ap->private_data;
++
++	/*
++	 * Don't allow new commands if we're in a delayed EH state
++	 * for NCQ and/or FIS-based switching.
++	 */
++	if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH)
++		return ATA_DEFER_PORT;
+ 	/*
+-	 * Various bit settings required for operation
+-	 * in FIS-based switching (fbs) mode on GenIIe:
++	 * If the port is completely idle, then allow the new qc.
+ 	 */
+-	old_fcfg   = readl(port_mmio + FIS_CFG_OFS);
+-	old_ltmode = readl(port_mmio + LTMODE_OFS);
+-	if (enable_fbs) {
+-		new_fcfg   = old_fcfg   |  FIS_CFG_SINGLE_SYNC;
+-		new_ltmode = old_ltmode |  LTMODE_BIT8;
+-	} else { /* disable fbs */
+-		new_fcfg   = old_fcfg   & ~FIS_CFG_SINGLE_SYNC;
+-		new_ltmode = old_ltmode & ~LTMODE_BIT8;
+-	}
+-	if (new_fcfg != old_fcfg)
+-		writelfl(new_fcfg, port_mmio + FIS_CFG_OFS);
++	if (ap->nr_active_links == 0)
++		return 0;
++
++	if (pp->pp_flags & MV_PP_FLAG_EDMA_EN) {
++		/*
++		 * The port is operating in host queuing mode (EDMA).
++		 * It can accomodate a new qc if the qc protocol
++		 * is compatible with the current host queue mode.
++		 */
++		if (pp->pp_flags & MV_PP_FLAG_NCQ_EN) {
++			/*
++			 * The host queue (EDMA) is in NCQ mode.
++			 * If the new qc is also an NCQ command,
++			 * then allow the new qc.
++			 */
++			if (qc->tf.protocol == ATA_PROT_NCQ)
++				return 0;
++		} else {
++			/*
++			 * The host queue (EDMA) is in non-NCQ, DMA mode.
++			 * If the new qc is also a non-NCQ, DMA command,
++			 * then allow the new qc.
++			 */
++			if (qc->tf.protocol == ATA_PROT_DMA)
++				return 0;
++		}
++	}
++	return ATA_DEFER_PORT;
++}
++
++static void mv_config_fbs(void __iomem *port_mmio, int want_ncq, int want_fbs)
++{
++	u32 new_fiscfg, old_fiscfg;
++	u32 new_ltmode, old_ltmode;
++	u32 new_haltcond, old_haltcond;
++
++	old_fiscfg   = readl(port_mmio + FISCFG_OFS);
++	old_ltmode   = readl(port_mmio + LTMODE_OFS);
++	old_haltcond = readl(port_mmio + EDMA_HALTCOND_OFS);
++
++	new_fiscfg   = old_fiscfg & ~(FISCFG_SINGLE_SYNC | FISCFG_WAIT_DEV_ERR);
++	new_ltmode   = old_ltmode & ~LTMODE_BIT8;
++	new_haltcond = old_haltcond | EDMA_ERR_DEV;
++
++	if (want_fbs) {
++		new_fiscfg = old_fiscfg | FISCFG_SINGLE_SYNC;
++		new_ltmode = old_ltmode | LTMODE_BIT8;
++		if (want_ncq)
++			new_haltcond &= ~EDMA_ERR_DEV;
++		else
++			new_fiscfg |=  FISCFG_WAIT_DEV_ERR;
++	}
++
++	if (new_fiscfg != old_fiscfg)
++		writelfl(new_fiscfg, port_mmio + FISCFG_OFS);
+ 	if (new_ltmode != old_ltmode)
+ 		writelfl(new_ltmode, port_mmio + LTMODE_OFS);
++	if (new_haltcond != old_haltcond)
++		writelfl(new_haltcond, port_mmio + EDMA_HALTCOND_OFS);
++}
++
++static void mv_60x1_errata_sata25(struct ata_port *ap, int want_ncq)
++{
++	struct mv_host_priv *hpriv = ap->host->private_data;
++	u32 old, new;
++
++	/* workaround for 88SX60x1 FEr SATA#25 (part 1) */
++	old = readl(hpriv->base + MV_GPIO_PORT_CTL_OFS);
++	if (want_ncq)
++		new = old | (1 << 22);
++	else
++		new = old & ~(1 << 22);
++	if (new != old)
++		writel(new, hpriv->base + MV_GPIO_PORT_CTL_OFS);
+ }
+ 
+ static void mv_edma_cfg(struct ata_port *ap, int want_ncq)
+@@ -1088,25 +1201,40 @@ static void mv_edma_cfg(struct ata_port *ap, int want_ncq)
+ 
+ 	/* set up non-NCQ EDMA configuration */
+ 	cfg = EDMA_CFG_Q_DEPTH;		/* always 0x1f for *all* chips */
++	pp->pp_flags &= ~MV_PP_FLAG_FBS_EN;
+ 
+ 	if (IS_GEN_I(hpriv))
+ 		cfg |= (1 << 8);	/* enab config burst size mask */
+ 
+-	else if (IS_GEN_II(hpriv))
++	else if (IS_GEN_II(hpriv)) {
+ 		cfg |= EDMA_CFG_RD_BRST_EXT | EDMA_CFG_WR_BUFF_LEN;
++		mv_60x1_errata_sata25(ap, want_ncq);
+ 
+-	else if (IS_GEN_IIE(hpriv)) {
+-		cfg |= (1 << 23);	/* do not mask PM field in rx'd FIS */
+-		cfg |= (1 << 22);	/* enab 4-entry host queue cache */
+-		cfg |= (1 << 18);	/* enab early completion */
+-		cfg |= (1 << 17);	/* enab cut-through (dis stor&forwrd) */
++	} else if (IS_GEN_IIE(hpriv)) {
++		int want_fbs = sata_pmp_attached(ap);
++		/*
++		 * Possible future enhancement:
++		 *
++		 * The chip can use FBS with non-NCQ, if we allow it,
++		 * But first we need to have the error handling in place
++		 * for this mode (datasheet section 7.3.15.4.2.3).
++		 * So disallow non-NCQ FBS for now.
++		 */
++		want_fbs &= want_ncq;
++
++		mv_config_fbs(port_mmio, want_ncq, want_fbs);
+ 
+-		if (want_ncq && sata_pmp_attached(ap)) {
++		if (want_fbs) {
++			pp->pp_flags |= MV_PP_FLAG_FBS_EN;
+ 			cfg |= EDMA_CFG_EDMA_FBS; /* FIS-based switching */
+-			mv_config_fbs(port_mmio, 1);
+-		} else {
+-			mv_config_fbs(port_mmio, 0);
+ 		}
++
++		cfg |= (1 << 23);	/* do not mask PM field in rx'd FIS */
++		cfg |= (1 << 22);	/* enab 4-entry host queue cache */
++		if (HAS_PCI(ap->host))
++			cfg |= (1 << 18);	/* enab early completion */
++		if (hpriv->hp_flags & MV_HP_CUT_THROUGH)
++			cfg |= (1 << 17); /* enab cut-thru (dis stor&forwrd) */
+ 	}
+ 
+ 	if (want_ncq) {
+@@ -1483,25 +1611,186 @@ static struct ata_queued_cmd *mv_get_active_qc(struct ata_port *ap)
+ 	return qc;
+ }
+ 
+-static void mv_unexpected_intr(struct ata_port *ap)
++static void mv_pmp_error_handler(struct ata_port *ap)
+ {
++	unsigned int pmp, pmp_map;
+ 	struct mv_port_priv *pp = ap->private_data;
+-	struct ata_eh_info *ehi = &ap->link.eh_info;
+-	char *when = "";
+ 
++	if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH) {
++		/*
++		 * Perform NCQ error analysis on failed PMPs
++		 * before we freeze the port entirely.
++		 *
++		 * The failed PMPs are marked earlier by mv_pmp_eh_prep().
++		 */
++		pmp_map = pp->delayed_eh_pmp_map;
++		pp->pp_flags &= ~MV_PP_FLAG_DELAYED_EH;
++		for (pmp = 0; pmp_map != 0; pmp++) {
++			unsigned int this_pmp = (1 << pmp);
++			if (pmp_map & this_pmp) {
++				struct ata_link *link = &ap->pmp_link[pmp];
++				pmp_map &= ~this_pmp;
++				ata_eh_analyze_ncq_error(link);
++			}
++		}
++		ata_port_freeze(ap);
++	}
++	sata_pmp_error_handler(ap);
++}
++
++static unsigned int mv_get_err_pmp_map(struct ata_port *ap)
++{
++	void __iomem *port_mmio = mv_ap_base(ap);
++
++	return readl(port_mmio + SATA_TESTCTL_OFS) >> 16;
++}
++
++static void mv_pmp_eh_prep(struct ata_port *ap, unsigned int pmp_map)
++{
++	struct ata_eh_info *ehi;
++	unsigned int pmp;
++
++	/*
++	 * Initialize EH info for PMPs which saw device errors
++	 */
++	ehi = &ap->link.eh_info;
++	for (pmp = 0; pmp_map != 0; pmp++) {
++		unsigned int this_pmp = (1 << pmp);
++		if (pmp_map & this_pmp) {
++			struct ata_link *link = &ap->pmp_link[pmp];
++
++			pmp_map &= ~this_pmp;
++			ehi = &link->eh_info;
++			ata_ehi_clear_desc(ehi);
++			ata_ehi_push_desc(ehi, "dev err");
++			ehi->err_mask |= AC_ERR_DEV;
++			ehi->action |= ATA_EH_RESET;
++			ata_link_abort(link);
++		}
++	}
++}
++
++static int mv_handle_fbs_ncq_dev_err(struct ata_port *ap)
++{
++	struct mv_port_priv *pp = ap->private_data;
++	int failed_links;
++	unsigned int old_map, new_map;
++
++	/*
++	 * Device error during FBS+NCQ operation:
++	 *
++	 * Set a port flag to prevent further I/O being enqueued.
++	 * Leave the EDMA running to drain outstanding commands from this port.
++	 * Perform the post-mortem/EH only when all responses are complete.
++	 * Follow recovery sequence from 6042/7042 datasheet (7.3.15.4.2.2).
++	 */
++	if (!(pp->pp_flags & MV_PP_FLAG_DELAYED_EH)) {
++		pp->pp_flags |= MV_PP_FLAG_DELAYED_EH;
++		pp->delayed_eh_pmp_map = 0;
++	}
++	old_map = pp->delayed_eh_pmp_map;
++	new_map = old_map | mv_get_err_pmp_map(ap);
++
++	if (old_map != new_map) {
++		pp->delayed_eh_pmp_map = new_map;
++		mv_pmp_eh_prep(ap, new_map & ~old_map);
++	}
++	failed_links = hweight16(new_map);
++
++	ata_port_printk(ap, KERN_INFO, "%s: pmp_map=%04x qc_map=%04x "
++			"failed_links=%d nr_active_links=%d\n",
++			__func__, pp->delayed_eh_pmp_map,
++			ap->qc_active, failed_links,
++			ap->nr_active_links);
++
++	if (ap->nr_active_links <= failed_links) {
++		mv_process_crpb_entries(ap, pp);
++		mv_stop_edma(ap);
++		mv_eh_freeze(ap);
++		ata_port_printk(ap, KERN_INFO, "%s: done\n", __func__);
++		return 1;	/* handled */
++	}
++	ata_port_printk(ap, KERN_INFO, "%s: waiting\n", __func__);
++	return 1;	/* handled */
++}
++
++static int mv_handle_fbs_non_ncq_dev_err(struct ata_port *ap)
++{
+ 	/*
+-	 * We got a device interrupt from something that
+-	 * was supposed to be using EDMA or polling.
++	 * Possible future enhancement:
++	 *
++	 * FBS+non-NCQ operation is not yet implemented.
++	 * See related notes in mv_edma_cfg().
++	 *
++	 * Device error during FBS+non-NCQ operation:
++	 *
++	 * We need to snapshot the shadow registers for each failed command.
++	 * Follow recovery sequence from 6042/7042 datasheet (7.3.15.4.2.3).
+ 	 */
++	return 0;	/* not handled */
++}
++
++static int mv_handle_dev_err(struct ata_port *ap, u32 edma_err_cause)
++{
++	struct mv_port_priv *pp = ap->private_data;
++
++	if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN))
++		return 0;	/* EDMA was not active: not handled */
++	if (!(pp->pp_flags & MV_PP_FLAG_FBS_EN))
++		return 0;	/* FBS was not active: not handled */
++
++	if (!(edma_err_cause & EDMA_ERR_DEV))
++		return 0;	/* non DEV error: not handled */
++	edma_err_cause &= ~EDMA_ERR_IRQ_TRANSIENT;
++	if (edma_err_cause & ~(EDMA_ERR_DEV | EDMA_ERR_SELF_DIS))
++		return 0;	/* other problems: not handled */
++
++	if (pp->pp_flags & MV_PP_FLAG_NCQ_EN) {
++		/*
++		 * EDMA should NOT have self-disabled for this case.
++		 * If it did, then something is wrong elsewhere,
++		 * and we cannot handle it here.
++		 */
++		if (edma_err_cause & EDMA_ERR_SELF_DIS) {
++			ata_port_printk(ap, KERN_WARNING,
++				"%s: err_cause=0x%x pp_flags=0x%x\n",
++				__func__, edma_err_cause, pp->pp_flags);
++			return 0; /* not handled */
++		}
++		return mv_handle_fbs_ncq_dev_err(ap);
++	} else {
++		/*
++		 * EDMA should have self-disabled for this case.
++		 * If it did not, then something is wrong elsewhere,
++		 * and we cannot handle it here.
++		 */
++		if (!(edma_err_cause & EDMA_ERR_SELF_DIS)) {
++			ata_port_printk(ap, KERN_WARNING,
++				"%s: err_cause=0x%x pp_flags=0x%x\n",
++				__func__, edma_err_cause, pp->pp_flags);
++			return 0; /* not handled */
++		}
++		return mv_handle_fbs_non_ncq_dev_err(ap);
++	}
++	return 0;	/* not handled */
++}
++
++static void mv_unexpected_intr(struct ata_port *ap, int edma_was_enabled)
++{
++	struct ata_eh_info *ehi = &ap->link.eh_info;
++	char *when = "idle";
++
+ 	ata_ehi_clear_desc(ehi);
+-	if (pp->pp_flags & MV_PP_FLAG_EDMA_EN) {
+-		when = " while EDMA enabled";
++	if (!ap || (ap->flags & ATA_FLAG_DISABLED)) {
++		when = "disabled";
++	} else if (edma_was_enabled) {
++		when = "EDMA enabled";
+ 	} else {
+ 		struct ata_queued_cmd *qc = ata_qc_from_tag(ap, ap->link.active_tag);
+ 		if (qc && (qc->tf.flags & ATA_TFLAG_POLLING))
+-			when = " while polling";
++			when = "polling";
+ 	}
+-	ata_ehi_push_desc(ehi, "unexpected device interrupt%s", when);
++	ata_ehi_push_desc(ehi, "unexpected device interrupt while %s", when);
+ 	ehi->err_mask |= AC_ERR_OTHER;
+ 	ehi->action   |= ATA_EH_RESET;
+ 	ata_port_freeze(ap);
+@@ -1519,7 +1808,7 @@ static void mv_unexpected_intr(struct ata_port *ap)
+  *      LOCKING:
+  *      Inherited from caller.
+  */
+-static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
++static void mv_err_intr(struct ata_port *ap)
+ {
+ 	void __iomem *port_mmio = mv_ap_base(ap);
+ 	u32 edma_err_cause, eh_freeze_mask, serr = 0;
+@@ -1527,24 +1816,42 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
+ 	struct mv_host_priv *hpriv = ap->host->private_data;
+ 	unsigned int action = 0, err_mask = 0;
+ 	struct ata_eh_info *ehi = &ap->link.eh_info;
+-
+-	ata_ehi_clear_desc(ehi);
++	struct ata_queued_cmd *qc;
++	int abort = 0;
+ 
+ 	/*
+-	 * Read and clear the err_cause bits.  This won't actually
+-	 * clear for some errors (eg. SError), but we will be doing
+-	 * a hard reset in those cases regardless, which *will* clear it.
++	 * Read and clear the SError and err_cause bits.
+ 	 */
++	sata_scr_read(&ap->link, SCR_ERROR, &serr);
++	sata_scr_write_flush(&ap->link, SCR_ERROR, serr);
++
+ 	edma_err_cause = readl(port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
+ 	writelfl(~edma_err_cause, port_mmio + EDMA_ERR_IRQ_CAUSE_OFS);
+ 
+-	ata_ehi_push_desc(ehi, "edma_err_cause=%08x", edma_err_cause);
++	ata_port_printk(ap, KERN_INFO, "%s: err_cause=%08x pp_flags=0x%x\n",
++			__func__, edma_err_cause, pp->pp_flags);
++
++	if (edma_err_cause & EDMA_ERR_DEV) {
++		/*
++		 * Device errors during FIS-based switching operation
++		 * require special handling.
++		 */
++		if (mv_handle_dev_err(ap, edma_err_cause))
++			return;
++	}
+ 
++	qc = mv_get_active_qc(ap);
++	ata_ehi_clear_desc(ehi);
++	ata_ehi_push_desc(ehi, "edma_err_cause=%08x pp_flags=%08x",
++			  edma_err_cause, pp->pp_flags);
+ 	/*
+ 	 * All generations share these EDMA error cause bits:
+ 	 */
+-	if (edma_err_cause & EDMA_ERR_DEV)
++	if (edma_err_cause & EDMA_ERR_DEV) {
+ 		err_mask |= AC_ERR_DEV;
++		action |= ATA_EH_RESET;
++		ata_ehi_push_desc(ehi, "dev error");
++	}
+ 	if (edma_err_cause & (EDMA_ERR_D_PAR | EDMA_ERR_PRD_PAR |
+ 			EDMA_ERR_CRQB_PAR | EDMA_ERR_CRPB_PAR |
+ 			EDMA_ERR_INTRL_PAR)) {
+@@ -1576,13 +1883,6 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
+ 			ata_ehi_push_desc(ehi, "EDMA self-disable");
+ 		}
+ 		if (edma_err_cause & EDMA_ERR_SERR) {
+-			/*
+-			 * Ensure that we read our own SCR, not a pmp link SCR:
+-			 */
+-			ap->ops->scr_read(ap, SCR_ERROR, &serr);
+-			/*
+-			 * Don't clear SError here; leave it for libata-eh:
+-			 */
+ 			ata_ehi_push_desc(ehi, "SError=%08x", serr);
+ 			err_mask |= AC_ERR_ATA_BUS;
+ 			action |= ATA_EH_RESET;
+@@ -1602,10 +1902,29 @@ static void mv_err_intr(struct ata_port *ap, struct ata_queued_cmd *qc)
+ 	else
+ 		ehi->err_mask |= err_mask;
+ 
+-	if (edma_err_cause & eh_freeze_mask)
++	if (err_mask == AC_ERR_DEV) {
++		/*
++		 * Cannot do ata_port_freeze() here,
++		 * because it would kill PIO access,
++		 * which is needed for further diagnosis.
++		 */
++		mv_eh_freeze(ap);
++		abort = 1;
++	} else if (edma_err_cause & eh_freeze_mask) {
++		/*
++		 * Note to self: ata_port_freeze() calls ata_port_abort()
++		 */
+ 		ata_port_freeze(ap);
+-	else
+-		ata_port_abort(ap);
++	} else {
++		abort = 1;
++	}
++
++	if (abort) {
++		if (qc)
++			ata_link_abort(qc->dev->link);
++		else
++			ata_port_abort(ap);
++	}
+ }
+ 
+ static void mv_process_crpb_response(struct ata_port *ap,
+@@ -1632,8 +1951,9 @@ static void mv_process_crpb_response(struct ata_port *ap,
+ 			}
+ 		}
+ 		ata_status = edma_status >> CRPB_FLAG_STATUS_SHIFT;
+-		qc->err_mask |= ac_err_mask(ata_status);
+-		ata_qc_complete(qc);
++		if (!ac_err_mask(ata_status))
++			ata_qc_complete(qc);
++		/* else: leave it for mv_err_intr() */
+ 	} else {
+ 		ata_port_printk(ap, KERN_ERR, "%s: no qc for tag=%d\n",
+ 				__func__, tag);
+@@ -1677,6 +1997,44 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp
+ 			 port_mmio + EDMA_RSP_Q_OUT_PTR_OFS);
+ }
+ 
++static void mv_port_intr(struct ata_port *ap, u32 port_cause)
++{
++	struct mv_port_priv *pp;
++	int edma_was_enabled;
++
++	if (!ap || (ap->flags & ATA_FLAG_DISABLED)) {
++		mv_unexpected_intr(ap, 0);
++		return;
++	}
++	/*
++	 * Grab a snapshot of the EDMA_EN flag setting,
++	 * so that we have a consistent view for this port,
++	 * even if something we call of our routines changes it.
++	 */
++	pp = ap->private_data;
++	edma_was_enabled = (pp->pp_flags & MV_PP_FLAG_EDMA_EN);
++	/*
++	 * Process completed CRPB response(s) before other events.
++	 */
++	if (edma_was_enabled && (port_cause & DONE_IRQ)) {
++		mv_process_crpb_entries(ap, pp);
++		if (pp->pp_flags & MV_PP_FLAG_DELAYED_EH)
++			mv_handle_fbs_ncq_dev_err(ap);
++	}
++	/*
++	 * Handle chip-reported errors, or continue on to handle PIO.
++	 */
++	if (unlikely(port_cause & ERR_IRQ)) {
++		mv_err_intr(ap);
++	} else if (!edma_was_enabled) {
++		struct ata_queued_cmd *qc = mv_get_active_qc(ap);
++		if (qc)
++			ata_sff_host_intr(ap, qc);
++		else
++			mv_unexpected_intr(ap, edma_was_enabled);
++	}
++}
++
+ /**
+  *      mv_host_intr - Handle all interrupts on the given host controller
+  *      @host: host specific structure
+@@ -1688,66 +2046,58 @@ static void mv_process_crpb_entries(struct ata_port *ap, struct mv_port_priv *pp
+ static int mv_host_intr(struct ata_host *host, u32 main_irq_cause)
+ {
+ 	struct mv_host_priv *hpriv = host->private_data;
+-	void __iomem *mmio = hpriv->base, *hc_mmio = NULL;
+-	u32 hc_irq_cause = 0;
++	void __iomem *mmio = hpriv->base, *hc_mmio;
+ 	unsigned int handled = 0, port;
+ 
+ 	for (port = 0; port < hpriv->n_ports; port++) {
+ 		struct ata_port *ap = host->ports[port];
+-		struct mv_port_priv *pp;
+-		unsigned int shift, hardport, port_cause;
+-		/*
+-		 * When we move to the second hc, flag our cached
+-		 * copies of hc_mmio (and hc_irq_cause) as invalid again.
+-		 */
+-		if (port == MV_PORTS_PER_HC)
+-			hc_mmio = NULL;
+-		/*
+-		 * Do nothing if port is not interrupting or is disabled:
+-		 */
++		unsigned int p, shift, hardport, port_cause;
++
+ 		MV_PORT_TO_SHIFT_AND_HARDPORT(port, shift, hardport);
+-		port_cause = (main_irq_cause >> shift) & (DONE_IRQ | ERR_IRQ);
+-		if (!port_cause || !ap || (ap->flags & ATA_FLAG_DISABLED))
+-			continue;
+ 		/*
+-		 * Each hc within the host has its own hc_irq_cause register.
+-		 * We defer reading it until we know we need it, right now:
+-		 *
+-		 * FIXME later: we don't really need to read this register
+-		 * (some logic changes required below if we go that way),
+-		 * because it doesn't tell us anything new.  But we do need
+-		 * to write to it, outside the top of this loop,
+-		 * to reset the interrupt triggers for next time.
++		 * Each hc within the host has its own hc_irq_cause register,
++		 * where the interrupting ports bits get ack'd.
+ 		 */
+-		if (!hc_mmio) {
++		if (hardport == 0) {	/* first port on this hc ? */
++			u32 hc_cause = (main_irq_cause >> shift) & HC0_IRQ_PEND;
++			u32 port_mask, ack_irqs;
++			/*
++			 * Skip this entire hc if nothing pending for any ports
++			 */
++			if (!hc_cause) {
++				port += MV_PORTS_PER_HC - 1;
++				continue;
++			}
++			/*
++			 * We don't need/want to read the hc_irq_cause register,
++			 * because doing so hurts performance, and
++			 * main_irq_cause already gives us everything we need.
++			 *
++			 * But we do have to *write* to the hc_irq_cause to ack
++			 * the ports that we are handling this time through.
++			 *
++			 * This requires that we create a bitmap for those
++			 * ports which interrupted us, and use that bitmap
++			 * to ack (only) those ports via hc_irq_cause.
++			 */
++			ack_irqs = 0;
++			for (p = 0; p < MV_PORTS_PER_HC; ++p) {
++				if ((port + p) >= hpriv->n_ports)
++					break;
++				port_mask = (DONE_IRQ | ERR_IRQ) << (p * 2);
++				if (hc_cause & port_mask)
++					ack_irqs |= (DMA_IRQ | DEV_IRQ) << p;
++			}
+ 			hc_mmio = mv_hc_base_from_port(mmio, port);
+-			hc_irq_cause = readl(hc_mmio + HC_IRQ_CAUSE_OFS);
+-			writelfl(~hc_irq_cause, hc_mmio + HC_IRQ_CAUSE_OFS);
++			writelfl(~ack_irqs, hc_mmio + HC_IRQ_CAUSE_OFS);
+ 			handled = 1;
+ 		}
+ 		/*
+-		 * Process completed CRPB response(s) before other events.
+-		 */
+-		pp = ap->private_data;
+-		if (hc_irq_cause & (DMA_IRQ << hardport)) {
+-			if (pp->pp_flags & MV_PP_FLAG_EDMA_EN)
+-				mv_process_crpb_entries(ap, pp);
+-		}
+-		/*
+-		 * Handle chip-reported errors, or continue on to handle PIO.
++		 * Handle interrupts signalled for this port:
+ 		 */
+-		if (unlikely(port_cause & ERR_IRQ)) {
+-			mv_err_intr(ap, mv_get_active_qc(ap));
+-		} else if (hc_irq_cause & (DEV_IRQ << hardport)) {
+-			if (!(pp->pp_flags & MV_PP_FLAG_EDMA_EN)) {
+-				struct ata_queued_cmd *qc = mv_get_active_qc(ap);
+-				if (qc) {
+-					ata_sff_host_intr(ap, qc);
+-					continue;
+-				}
+-			}
+-			mv_unexpected_intr(ap);
+-		}
++		port_cause = (main_irq_cause >> shift) & (DONE_IRQ | ERR_IRQ);
++		if (port_cause)
++			mv_port_intr(ap, port_cause);
+ 	}
+ 	return handled;
+ }
+@@ -1894,7 +2244,7 @@ static void mv5_reset_bus(struct ata_host *host, void __iomem *mmio)
+ 
+ static void mv5_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
+ {
+-	writel(0x0fcfffff, mmio + MV_FLASH_CTL);
++	writel(0x0fcfffff, mmio + MV_FLASH_CTL_OFS);
+ }
+ 
+ static void mv5_read_preamp(struct mv_host_priv *hpriv, int idx,
+@@ -1913,7 +2263,7 @@ static void mv5_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio)
+ {
+ 	u32 tmp;
+ 
+-	writel(0, mmio + MV_GPIO_PORT_CTL);
++	writel(0, mmio + MV_GPIO_PORT_CTL_OFS);
+ 
+ 	/* FIXME: handle MV_HP_ERRATA_50XXB2 errata */
+ 
+@@ -1931,14 +2281,14 @@ static void mv5_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
+ 	int fix_apm_sq = (hpriv->hp_flags & MV_HP_ERRATA_50XXB0);
+ 
+ 	if (fix_apm_sq) {
+-		tmp = readl(phy_mmio + MV5_LT_MODE);
++		tmp = readl(phy_mmio + MV5_LTMODE_OFS);
+ 		tmp |= (1 << 19);
+-		writel(tmp, phy_mmio + MV5_LT_MODE);
++		writel(tmp, phy_mmio + MV5_LTMODE_OFS);
+ 
+-		tmp = readl(phy_mmio + MV5_PHY_CTL);
++		tmp = readl(phy_mmio + MV5_PHY_CTL_OFS);
+ 		tmp &= ~0x3;
+ 		tmp |= 0x1;
+-		writel(tmp, phy_mmio + MV5_PHY_CTL);
++		writel(tmp, phy_mmio + MV5_PHY_CTL_OFS);
+ 	}
+ 
+ 	tmp = readl(phy_mmio + MV5_PHY_MODE);
+@@ -1956,11 +2306,6 @@ static void mv5_reset_hc_port(struct mv_host_priv *hpriv, void __iomem *mmio,
+ {
+ 	void __iomem *port_mmio = mv_port_base(mmio, port);
+ 
+-	/*
+-	 * The datasheet warns against setting ATA_RST when EDMA is active
+-	 * (but doesn't say what the problem might be).  So we first try
+-	 * to disable the EDMA engine before doing the ATA_RST operation.
+-	 */
+ 	mv_reset_channel(hpriv, mmio, port);
+ 
+ 	ZERO(0x028);	/* command */
+@@ -1975,7 +2320,7 @@ static void mv5_reset_hc_port(struct mv_host_priv *hpriv, void __iomem *mmio,
+ 	ZERO(0x024);	/* respq outp */
+ 	ZERO(0x020);	/* respq inp */
+ 	ZERO(0x02c);	/* test control */
+-	writel(0xbc, port_mmio + EDMA_IORDY_TMOUT);
++	writel(0xbc, port_mmio + EDMA_IORDY_TMOUT_OFS);
+ }
+ #undef ZERO
+ 
+@@ -2021,13 +2366,13 @@ static void mv_reset_pci_bus(struct ata_host *host, void __iomem *mmio)
+ 	struct mv_host_priv *hpriv = host->private_data;
+ 	u32 tmp;
+ 
+-	tmp = readl(mmio + MV_PCI_MODE);
++	tmp = readl(mmio + MV_PCI_MODE_OFS);
+ 	tmp &= 0xff00ffff;
+-	writel(tmp, mmio + MV_PCI_MODE);
++	writel(tmp, mmio + MV_PCI_MODE_OFS);
+ 
+ 	ZERO(MV_PCI_DISC_TIMER);
+ 	ZERO(MV_PCI_MSI_TRIGGER);
+-	writel(0x000100ff, mmio + MV_PCI_XBAR_TMOUT);
++	writel(0x000100ff, mmio + MV_PCI_XBAR_TMOUT_OFS);
+ 	ZERO(PCI_HC_MAIN_IRQ_MASK_OFS);
+ 	ZERO(MV_PCI_SERR_MASK);
+ 	ZERO(hpriv->irq_cause_ofs);
+@@ -2045,10 +2390,10 @@ static void mv6_reset_flash(struct mv_host_priv *hpriv, void __iomem *mmio)
+ 
+ 	mv5_reset_flash(hpriv, mmio);
+ 
+-	tmp = readl(mmio + MV_GPIO_PORT_CTL);
++	tmp = readl(mmio + MV_GPIO_PORT_CTL_OFS);
+ 	tmp &= 0x3;
+ 	tmp |= (1 << 5) | (1 << 6);
+-	writel(tmp, mmio + MV_GPIO_PORT_CTL);
++	writel(tmp, mmio + MV_GPIO_PORT_CTL_OFS);
+ }
+ 
+ /**
+@@ -2121,7 +2466,7 @@ static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
+ 	void __iomem *port_mmio;
+ 	u32 tmp;
+ 
+-	tmp = readl(mmio + MV_RESET_CFG);
++	tmp = readl(mmio + MV_RESET_CFG_OFS);
+ 	if ((tmp & (1 << 0)) == 0) {
+ 		hpriv->signal[idx].amps = 0x7 << 8;
+ 		hpriv->signal[idx].pre = 0x1 << 5;
+@@ -2137,7 +2482,7 @@ static void mv6_read_preamp(struct mv_host_priv *hpriv, int idx,
+ 
+ static void mv6_enable_leds(struct mv_host_priv *hpriv, void __iomem *mmio)
+ {
+-	writel(0x00000060, mmio + MV_GPIO_PORT_CTL);
++	writel(0x00000060, mmio + MV_GPIO_PORT_CTL_OFS);
+ }
+ 
+ static void mv6_phy_errata(struct mv_host_priv *hpriv, void __iomem *mmio,
+@@ -2235,11 +2580,6 @@ static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv,
+ {
+ 	void __iomem *port_mmio = mv_port_base(mmio, port);
+ 
+-	/*
+-	 * The datasheet warns against setting ATA_RST when EDMA is active
+-	 * (but doesn't say what the problem might be).  So we first try
+-	 * to disable the EDMA engine before doing the ATA_RST operation.
+-	 */
+ 	mv_reset_channel(hpriv, mmio, port);
+ 
+ 	ZERO(0x028);		/* command */
+@@ -2254,7 +2594,7 @@ static void mv_soc_reset_hc_port(struct mv_host_priv *hpriv,
+ 	ZERO(0x024);		/* respq outp */
+ 	ZERO(0x020);		/* respq inp */
+ 	ZERO(0x02c);		/* test control */
+-	writel(0xbc, port_mmio + EDMA_IORDY_TMOUT);
++	writel(0xbc, port_mmio + EDMA_IORDY_TMOUT_OFS);
+ }
+ 
+ #undef ZERO
+@@ -2297,38 +2637,39 @@ static void mv_soc_reset_bus(struct ata_host *host, void __iomem *mmio)
+ 	return;
+ }
+ 
+-static void mv_setup_ifctl(void __iomem *port_mmio, int want_gen2i)
++static void mv_setup_ifcfg(void __iomem *port_mmio, int want_gen2i)
+ {
+-	u32 ifctl = readl(port_mmio + SATA_INTERFACE_CFG);
++	u32 ifcfg = readl(port_mmio + SATA_INTERFACE_CFG_OFS);
+ 
+-	ifctl = (ifctl & 0xf7f) | 0x9b1000;	/* from chip spec */
++	ifcfg = (ifcfg & 0xf7f) | 0x9b1000;	/* from chip spec */
+ 	if (want_gen2i)
+-		ifctl |= (1 << 7);		/* enable gen2i speed */
+-	writelfl(ifctl, port_mmio + SATA_INTERFACE_CFG);
++		ifcfg |= (1 << 7);		/* enable gen2i speed */
++	writelfl(ifcfg, port_mmio + SATA_INTERFACE_CFG_OFS);
+ }
+ 
+-/*
+- * Caller must ensure that EDMA is not active,
+- * by first doing mv_stop_edma() where needed.
+- */
+ static void mv_reset_channel(struct mv_host_priv *hpriv, void __iomem *mmio,
+ 			     unsigned int port_no)
+ {
+ 	void __iomem *port_mmio = mv_port_base(mmio, port_no);
+ 
++	/*
++	 * The datasheet warns against setting EDMA_RESET when EDMA is active
++	 * (but doesn't say what the problem might be).  So we first try
++	 * to disable the EDMA engine before doing the EDMA_RESET operation.
++	 */
+ 	mv_stop_edma_engine(port_mmio);
+-	writelfl(ATA_RST, port_mmio + EDMA_CMD_OFS);
++	writelfl(EDMA_RESET, port_mmio + EDMA_CMD_OFS);
+ 
+ 	if (!IS_GEN_I(hpriv)) {
+-		/* Enable 3.0gb/s link speed */
+-		mv_setup_ifctl(port_mmio, 1);
++		/* Enable 3.0gb/s link speed: this survives EDMA_RESET */
++		mv_setup_ifcfg(port_mmio, 1);
+ 	}
+ 	/*
+-	 * Strobing ATA_RST here causes a hard reset of the SATA transport,
++	 * Strobing EDMA_RESET here causes a hard reset of the SATA transport,
+ 	 * link, and physical layers.  It resets all SATA interface registers
+ 	 * (except for SATA_INTERFACE_CFG), and issues a COMRESET to the dev.
+ 	 */
+-	writelfl(ATA_RST, port_mmio + EDMA_CMD_OFS);
++	writelfl(EDMA_RESET, port_mmio + EDMA_CMD_OFS);
+ 	udelay(25);	/* allow reset propagation */
+ 	writelfl(0, port_mmio + EDMA_CMD_OFS);
+ 
+@@ -2392,7 +2733,7 @@ static int mv_hardreset(struct ata_link *link, unsigned int *class,
+ 		sata_scr_read(link, SCR_STATUS, &sstatus);
+ 		if (!IS_GEN_I(hpriv) && ++attempts >= 5 && sstatus == 0x121) {
+ 			/* Force 1.5gb/s link speed and try again */
+-			mv_setup_ifctl(mv_ap_base(ap), 0);
++			mv_setup_ifcfg(mv_ap_base(ap), 0);
+ 			if (time_after(jiffies + HZ, deadline))
+ 				extra = HZ; /* only extend it once, max */
+ 		}
+@@ -2493,6 +2834,34 @@ static void mv_port_init(struct ata_ioports *port,  void __iomem *port_mmio)
+ 		readl(port_mmio + EDMA_ERR_IRQ_MASK_OFS));
+ }
+ 
++static unsigned int mv_in_pcix_mode(struct ata_host *host)
++{
++	struct mv_host_priv *hpriv = host->private_data;
++	void __iomem *mmio = hpriv->base;
++	u32 reg;
++
++	if (!HAS_PCI(host) || !IS_PCIE(hpriv))
++		return 0;	/* not PCI-X capable */
++	reg = readl(mmio + MV_PCI_MODE_OFS);
++	if ((reg & MV_PCI_MODE_MASK) == 0)
++		return 0;	/* conventional PCI mode */
++	return 1;	/* chip is in PCI-X mode */
++}
++
++static int mv_pci_cut_through_okay(struct ata_host *host)
++{
++	struct mv_host_priv *hpriv = host->private_data;
++	void __iomem *mmio = hpriv->base;
++	u32 reg;
++
++	if (!mv_in_pcix_mode(host)) {
++		reg = readl(mmio + PCI_COMMAND_OFS);
++		if (reg & PCI_COMMAND_MRDTRIG)
++			return 0; /* not okay */
++	}
++	return 1; /* okay */
++}
++
+ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
+ {
+ 	struct pci_dev *pdev = to_pci_dev(host->dev);
+@@ -2560,7 +2929,7 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
+ 		break;
+ 
+ 	case chip_7042:
+-		hp_flags |= MV_HP_PCIE;
++		hp_flags |= MV_HP_PCIE | MV_HP_CUT_THROUGH;
+ 		if (pdev->vendor == PCI_VENDOR_ID_TTI &&
+ 		    (pdev->device == 0x2300 || pdev->device == 0x2310))
+ 		{
+@@ -2590,9 +2959,12 @@ static int mv_chip_id(struct ata_host *host, unsigned int board_idx)
+ 				" and avoid the final two gigabytes on"
+ 				" all RocketRAID BIOS initialized drives.\n");
+ 		}
++		/* drop through */
+ 	case chip_6042:
+ 		hpriv->ops = &mv6xxx_ops;
+ 		hp_flags |= MV_HP_GEN_IIE;
++		if (board_idx == chip_6042 && mv_pci_cut_through_okay(host))
++			hp_flags |= MV_HP_CUT_THROUGH;
+ 
+ 		switch (pdev->revision) {
+ 		case 0x0:
+diff --git a/drivers/base/sys.c b/drivers/base/sys.c
+index 4fbb56b..358bb0b 100644
+--- a/drivers/base/sys.c
++++ b/drivers/base/sys.c
+@@ -175,8 +175,7 @@ int sysdev_driver_register(struct sysdev_class *cls, struct sysdev_driver *drv)
+ 	}
+ 
+ 	/* Check whether this driver has already been added to a class. */
+-	if ((drv->entry.next != drv->entry.prev) ||
+-	    (drv->entry.next != NULL)) {
++	if (drv->entry.next && !list_empty(&drv->entry)) {
+ 		printk(KERN_WARNING "sysdev: class %s: driver (%p) has already"
+ 			" been registered to a class, something is wrong, but "
+ 			"will forge on!\n", cls->name, drv);
+diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c
+index 8fc429c..41f818b 100644
+--- a/drivers/block/aoe/aoecmd.c
++++ b/drivers/block/aoe/aoecmd.c
+@@ -755,11 +755,13 @@ diskstats(struct gendisk *disk, struct bio *bio, ulong duration, sector_t sector
+ {
+ 	unsigned long n_sect = bio->bi_size >> 9;
+ 	const int rw = bio_data_dir(bio);
++	struct hd_struct *part;
+ 
+-	all_stat_inc(disk, ios[rw], sector);
+-	all_stat_add(disk, ticks[rw], duration, sector);
+-	all_stat_add(disk, sectors[rw], n_sect, sector);
+-	all_stat_add(disk, io_ticks, duration, sector);
++	part = get_part(disk, sector);
++	all_stat_inc(disk, part, ios[rw], sector);
++	all_stat_add(disk, part, ticks[rw], duration, sector);
++	all_stat_add(disk, part, sectors[rw], n_sect, sector);
++	all_stat_add(disk, part, io_ticks, duration, sector);
+ }
+ 
+ void
+diff --git a/drivers/char/serial167.c b/drivers/char/serial167.c
+index fd2db07..3b23270 100644
+--- a/drivers/char/serial167.c
++++ b/drivers/char/serial167.c
+@@ -1073,7 +1073,7 @@ static int cy_put_char(struct tty_struct *tty, unsigned char ch)
+ 		return 0;
+ 
+ 	if (!info->xmit_buf)
+-		return;
++		return 0;
+ 
+ 	local_irq_save(flags);
+ 	if (info->xmit_cnt >= PAGE_SIZE - 1) {
+diff --git a/drivers/char/sx.c b/drivers/char/sx.c
+index f39f6fd..b1a7a8c 100644
+--- a/drivers/char/sx.c
++++ b/drivers/char/sx.c
+@@ -970,7 +970,8 @@ static int sx_set_real_termios(void *ptr)
+ 		sx_write_channel_byte(port, hi_mask, 0x1f);
+ 		break;
+ 	default:
+-		printk(KERN_INFO "sx: Invalid wordsize: %u\n", CFLAG & CSIZE);
++		printk(KERN_INFO "sx: Invalid wordsize: %u\n",
++			(unsigned int)CFLAG & CSIZE);
+ 		break;
+ 	}
+ 
+@@ -997,7 +998,8 @@ static int sx_set_real_termios(void *ptr)
+ 		set_bit(TTY_HW_COOK_IN, &port->gs.tty->flags);
+ 	}
+ 	sx_dprintk(SX_DEBUG_TERMIOS, "iflags: %x(%d) ",
+-			port->gs.tty->termios->c_iflag, I_OTHER(port->gs.tty));
++			(unsigned int)port->gs.tty->termios->c_iflag,
++			I_OTHER(port->gs.tty));
+ 
+ /* Tell line discipline whether we will do output cooking.
+  * If OPOST is set and no other output flags are set then we can do output
+@@ -1010,7 +1012,8 @@ static int sx_set_real_termios(void *ptr)
+ 		clear_bit(TTY_HW_COOK_OUT, &port->gs.tty->flags);
+ 	}
+ 	sx_dprintk(SX_DEBUG_TERMIOS, "oflags: %x(%d)\n",
+-			port->gs.tty->termios->c_oflag, O_OTHER(port->gs.tty));
++			(unsigned int)port->gs.tty->termios->c_oflag,
++			O_OTHER(port->gs.tty));
+ 	/* port->c_dcd = sx_get_CD (port); */
+ 	func_exit();
+ 	return 0;
+diff --git a/drivers/char/vt.c b/drivers/char/vt.c
+index e458b08..fa1ffbf 100644
+--- a/drivers/char/vt.c
++++ b/drivers/char/vt.c
+@@ -2742,6 +2742,10 @@ static int con_open(struct tty_struct *tty, struct file *filp)
+ 				tty->winsize.ws_row = vc_cons[currcons].d->vc_rows;
+ 				tty->winsize.ws_col = vc_cons[currcons].d->vc_cols;
+ 			}
++			if (vc->vc_utf)
++				tty->termios->c_iflag |= IUTF8;
++			else
++				tty->termios->c_iflag &= ~IUTF8;
+ 			release_console_sem();
+ 			vcs_make_sysfs(tty);
+ 			return ret;
+@@ -2918,6 +2922,8 @@ int __init vty_init(void)
+ 	console_driver->minor_start = 1;
+ 	console_driver->type = TTY_DRIVER_TYPE_CONSOLE;
+ 	console_driver->init_termios = tty_std_termios;
++	if (default_utf8)
++		console_driver->init_termios.c_iflag |= IUTF8;
+ 	console_driver->flags = TTY_DRIVER_REAL_RAW | TTY_DRIVER_RESET_TERMIOS;
+ 	tty_set_operations(console_driver, &con_ops);
+ 	if (tty_register_driver(console_driver))
+diff --git a/drivers/edac/edac_core.h b/drivers/edac/edac_core.h
+index a9aa845..b27b13c 100644
+--- a/drivers/edac/edac_core.h
++++ b/drivers/edac/edac_core.h
+@@ -97,7 +97,7 @@ extern int edac_debug_level;
+ #define PCI_VEND_DEV(vend, dev) PCI_VENDOR_ID_ ## vend, \
+ 	PCI_DEVICE_ID_ ## vend ## _ ## dev
+ 
+-#define dev_name(dev) (dev)->dev_name
++#define edac_dev_name(dev) (dev)->dev_name
+ 
+ /* memory devices */
+ enum dev_type {
+diff --git a/drivers/edac/edac_device.c b/drivers/edac/edac_device.c
+index 63372fa..5fcd3d8 100644
+--- a/drivers/edac/edac_device.c
++++ b/drivers/edac/edac_device.c
+@@ -333,7 +333,7 @@ static int add_edac_dev_to_global_list(struct edac_device_ctl_info *edac_dev)
+ fail0:
+ 	edac_printk(KERN_WARNING, EDAC_MC,
+ 			"%s (%s) %s %s already assigned %d\n",
+-			rover->dev->bus_id, dev_name(rover),
++			rover->dev->bus_id, edac_dev_name(rover),
+ 			rover->mod_name, rover->ctl_name, rover->dev_idx);
+ 	return 1;
+ 
+@@ -538,7 +538,7 @@ int edac_device_add_device(struct edac_device_ctl_info *edac_dev)
+ 				"'%s': DEV '%s' (%s)\n",
+ 				edac_dev->mod_name,
+ 				edac_dev->ctl_name,
+-				dev_name(edac_dev),
++				edac_dev_name(edac_dev),
+ 				edac_op_state_to_string(edac_dev->op_state));
+ 
+ 	mutex_unlock(&device_ctls_mutex);
+@@ -599,7 +599,7 @@ struct edac_device_ctl_info *edac_device_del_device(struct device *dev)
+ 	edac_printk(KERN_INFO, EDAC_MC,
+ 		"Removed device %d for %s %s: DEV %s\n",
+ 		edac_dev->dev_idx,
+-		edac_dev->mod_name, edac_dev->ctl_name, dev_name(edac_dev));
++		edac_dev->mod_name, edac_dev->ctl_name, edac_dev_name(edac_dev));
+ 
+ 	return edac_dev;
+ }
+diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
+index a4cf164..d110392 100644
+--- a/drivers/edac/edac_mc.c
++++ b/drivers/edac/edac_mc.c
+@@ -402,7 +402,7 @@ static int add_mc_to_global_list(struct mem_ctl_info *mci)
+ fail0:
+ 	edac_printk(KERN_WARNING, EDAC_MC,
+ 		"%s (%s) %s %s already assigned %d\n", p->dev->bus_id,
+-		dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
++		edac_dev_name(mci), p->mod_name, p->ctl_name, p->mc_idx);
+ 	return 1;
+ 
+ fail1:
+@@ -517,7 +517,7 @@ int edac_mc_add_mc(struct mem_ctl_info *mci)
+ 
+ 	/* Report action taken */
+ 	edac_mc_printk(mci, KERN_INFO, "Giving out device to '%s' '%s':"
+-		" DEV %s\n", mci->mod_name, mci->ctl_name, dev_name(mci));
++		" DEV %s\n", mci->mod_name, mci->ctl_name, edac_dev_name(mci));
+ 
+ 	mutex_unlock(&mem_ctls_mutex);
+ 	return 0;
+@@ -565,7 +565,7 @@ struct mem_ctl_info *edac_mc_del_mc(struct device *dev)
+ 
+ 	edac_printk(KERN_INFO, EDAC_MC,
+ 		"Removed device %d for %s %s: DEV %s\n", mci->mc_idx,
+-		mci->mod_name, mci->ctl_name, dev_name(mci));
++		mci->mod_name, mci->ctl_name, edac_dev_name(mci));
+ 
+ 	return mci;
+ }
+diff --git a/drivers/edac/edac_pci.c b/drivers/edac/edac_pci.c
+index 9b24340..22ec9d5 100644
+--- a/drivers/edac/edac_pci.c
++++ b/drivers/edac/edac_pci.c
+@@ -150,7 +150,7 @@ static int add_edac_pci_to_global_list(struct edac_pci_ctl_info *pci)
+ fail0:
+ 	edac_printk(KERN_WARNING, EDAC_PCI,
+ 		"%s (%s) %s %s already assigned %d\n",
+-		rover->dev->bus_id, dev_name(rover),
++		rover->dev->bus_id, edac_dev_name(rover),
+ 		rover->mod_name, rover->ctl_name, rover->pci_idx);
+ 	return 1;
+ 
+@@ -360,7 +360,7 @@ int edac_pci_add_device(struct edac_pci_ctl_info *pci, int edac_idx)
+ 			" DEV '%s' (%s)\n",
+ 			pci->mod_name,
+ 			pci->ctl_name,
+-			dev_name(pci), edac_op_state_to_string(pci->op_state));
++			edac_dev_name(pci), edac_op_state_to_string(pci->op_state));
+ 
+ 	mutex_unlock(&edac_pci_ctls_mutex);
+ 	return 0;
+@@ -415,7 +415,7 @@ struct edac_pci_ctl_info *edac_pci_del_device(struct device *dev)
+ 
+ 	edac_printk(KERN_INFO, EDAC_PCI,
+ 		"Removed device %d for %s %s: DEV %s\n",
+-		pci->pci_idx, pci->mod_name, pci->ctl_name, dev_name(pci));
++		pci->pci_idx, pci->mod_name, pci->ctl_name, edac_dev_name(pci));
+ 
+ 	return pci;
+ }
+diff --git a/drivers/ide/ide-probe.c b/drivers/ide/ide-probe.c
+index 591deda..34b0d4f 100644
+--- a/drivers/ide/ide-probe.c
++++ b/drivers/ide/ide-probe.c
+@@ -1355,12 +1355,6 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
+ 	if (hwif->chipset != ide_dtc2278 || hwif->channel == 0)
+ 		hwif->port_ops = d->port_ops;
+ 
+-	if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
+-	    ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base)) {
+-		if (hwif->mate)
+-			hwif->mate->serialized = hwif->serialized = 1;
+-	}
+-
+ 	hwif->swdma_mask = d->swdma_mask;
+ 	hwif->mwdma_mask = d->mwdma_mask;
+ 	hwif->ultra_mask = d->udma_mask;
+@@ -1382,6 +1376,12 @@ static void ide_init_port(ide_hwif_t *hwif, unsigned int port,
+ 			hwif->dma_ops = d->dma_ops;
+ 	}
+ 
++	if ((d->host_flags & IDE_HFLAG_SERIALIZE) ||
++	    ((d->host_flags & IDE_HFLAG_SERIALIZE_DMA) && hwif->dma_base)) {
++		if (hwif->mate)
++			hwif->mate->serialized = hwif->serialized = 1;
++	}
++
+ 	if (d->host_flags & IDE_HFLAG_RQSIZE_256)
+ 		hwif->rqsize = 256;
+ 
+diff --git a/drivers/ide/legacy/falconide.c b/drivers/ide/legacy/falconide.c
+index 83555ca..9e449a0 100644
+--- a/drivers/ide/legacy/falconide.c
++++ b/drivers/ide/legacy/falconide.c
+@@ -61,7 +61,7 @@ static void falconide_output_data(ide_drive_t *drive, struct request *rq,
+ 	unsigned long data_addr = drive->hwif->io_ports.data_addr;
+ 
+ 	if (drive->media == ide_disk && rq && rq->cmd_type == REQ_TYPE_FS)
+-		return outsw(data_adr, buf, (len + 1) / 2);
++		return outsw(data_addr, buf, (len + 1) / 2);
+ 
+ 	outsw_swapw(data_addr, buf, (len + 1) / 2);
+ }
+diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.c b/drivers/infiniband/hw/cxgb3/cxio_hal.c
+index ed2ee4b..ebf9d30 100644
+--- a/drivers/infiniband/hw/cxgb3/cxio_hal.c
++++ b/drivers/infiniband/hw/cxgb3/cxio_hal.c
+@@ -359,9 +359,10 @@ static void insert_recv_cqe(struct t3_wq *wq, struct t3_cq *cq)
+ 	cq->sw_wptr++;
+ }
+ 
+-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
++int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+ {
+ 	u32 ptr;
++	int flushed = 0;
+ 
+ 	PDBG("%s wq %p cq %p\n", __func__, wq, cq);
+ 
+@@ -369,8 +370,11 @@ void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count)
+ 	PDBG("%s rq_rptr %u rq_wptr %u skip count %u\n", __func__,
+ 	    wq->rq_rptr, wq->rq_wptr, count);
+ 	ptr = wq->rq_rptr + count;
+-	while (ptr++ != wq->rq_wptr)
++	while (ptr++ != wq->rq_wptr) {
+ 		insert_recv_cqe(wq, cq);
++		flushed++;
++	}
++	return flushed;
+ }
+ 
+ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
+@@ -394,9 +398,10 @@ static void insert_sq_cqe(struct t3_wq *wq, struct t3_cq *cq,
+ 	cq->sw_wptr++;
+ }
+ 
+-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
++int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+ {
+ 	__u32 ptr;
++	int flushed = 0;
+ 	struct t3_swsq *sqp = wq->sq + Q_PTR2IDX(wq->sq_rptr, wq->sq_size_log2);
+ 
+ 	ptr = wq->sq_rptr + count;
+@@ -405,7 +410,9 @@ void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count)
+ 		insert_sq_cqe(wq, cq, sqp);
+ 		sqp++;
+ 		ptr++;
++		flushed++;
+ 	}
++	return flushed;
+ }
+ 
+ /*
+@@ -581,7 +588,7 @@ static int cxio_hal_destroy_ctrl_qp(struct cxio_rdev *rdev_p)
+  * caller aquires the ctrl_qp lock before the call
+  */
+ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+-				      u32 len, void *data, int completion)
++				      u32 len, void *data)
+ {
+ 	u32 i, nr_wqe, copy_len;
+ 	u8 *copy_data;
+@@ -617,7 +624,7 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+ 		flag = 0;
+ 		if (i == (nr_wqe - 1)) {
+ 			/* last WQE */
+-			flag = completion ? T3_COMPLETION_FLAG : 0;
++			flag = T3_COMPLETION_FLAG;
+ 			if (len % 32)
+ 				utx_len = len / 32 + 1;
+ 			else
+@@ -676,21 +683,20 @@ static int cxio_hal_ctrl_qp_write_mem(struct cxio_rdev *rdev_p, u32 addr,
+ 	return 0;
+ }
+ 
+-/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl, and pbl_size
+- * OUT: stag index, actual pbl_size, pbl_addr allocated.
++/* IN: stag key, pdid, perm, zbva, to, len, page_size, pbl_size and pbl_addr
++ * OUT: stag index
+  * TBD: shared memory region support
+  */
+ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+ 			 u32 *stag, u8 stag_state, u32 pdid,
+ 			 enum tpt_mem_type type, enum tpt_mem_perm perm,
+-			 u32 zbva, u64 to, u32 len, u8 page_size, __be64 *pbl,
+-			 u32 *pbl_size, u32 *pbl_addr)
++			 u32 zbva, u64 to, u32 len, u8 page_size,
++			 u32 pbl_size, u32 pbl_addr)
+ {
+ 	int err;
+ 	struct tpt_entry tpt;
+ 	u32 stag_idx;
+ 	u32 wptr;
+-	int rereg = (*stag != T3_STAG_UNSET);
+ 
+ 	stag_state = stag_state > 0;
+ 	stag_idx = (*stag) >> 8;
+@@ -704,30 +710,8 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+ 	PDBG("%s stag_state 0x%0x type 0x%0x pdid 0x%0x, stag_idx 0x%x\n",
+ 	     __func__, stag_state, type, pdid, stag_idx);
+ 
+-	if (reset_tpt_entry)
+-		cxio_hal_pblpool_free(rdev_p, *pbl_addr, *pbl_size << 3);
+-	else if (!rereg) {
+-		*pbl_addr = cxio_hal_pblpool_alloc(rdev_p, *pbl_size << 3);
+-		if (!*pbl_addr) {
+-			return -ENOMEM;
+-		}
+-	}
+-
+ 	mutex_lock(&rdev_p->ctrl_qp.lock);
+ 
+-	/* write PBL first if any - update pbl only if pbl list exist */
+-	if (pbl) {
+-
+-		PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
+-		     __func__, *pbl_addr, rdev_p->rnic_info.pbl_base,
+-		     *pbl_size);
+-		err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+-				(*pbl_addr >> 5),
+-				(*pbl_size << 3), pbl, 0);
+-		if (err)
+-			goto ret;
+-	}
+-
+ 	/* write TPT entry */
+ 	if (reset_tpt_entry)
+ 		memset(&tpt, 0, sizeof(tpt));
+@@ -742,23 +726,23 @@ static int __cxio_tpt_op(struct cxio_rdev *rdev_p, u32 reset_tpt_entry,
+ 				V_TPT_ADDR_TYPE((zbva ? TPT_ZBTO : TPT_VATO)) |
+ 				V_TPT_PAGE_SIZE(page_size));
+ 		tpt.rsvd_pbl_addr = reset_tpt_entry ? 0 :
+-				    cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, *pbl_addr)>>3));
++				    cpu_to_be32(V_TPT_PBL_ADDR(PBL_OFF(rdev_p, pbl_addr)>>3));
+ 		tpt.len = cpu_to_be32(len);
+ 		tpt.va_hi = cpu_to_be32((u32) (to >> 32));
+ 		tpt.va_low_or_fbo = cpu_to_be32((u32) (to & 0xFFFFFFFFULL));
+ 		tpt.rsvd_bind_cnt_or_pstag = 0;
+ 		tpt.rsvd_pbl_size = reset_tpt_entry ? 0 :
+-				  cpu_to_be32(V_TPT_PBL_SIZE((*pbl_size) >> 2));
++				  cpu_to_be32(V_TPT_PBL_SIZE(pbl_size >> 2));
+ 	}
+ 	err = cxio_hal_ctrl_qp_write_mem(rdev_p,
+ 				       stag_idx +
+ 				       (rdev_p->rnic_info.tpt_base >> 5),
+-				       sizeof(tpt), &tpt, 1);
++				       sizeof(tpt), &tpt);
+ 
+ 	/* release the stag index to free pool */
+ 	if (reset_tpt_entry)
+ 		cxio_hal_put_stag(rdev_p->rscp, stag_idx);
+-ret:
++
+ 	wptr = rdev_p->ctrl_qp.wptr;
+ 	mutex_unlock(&rdev_p->ctrl_qp.lock);
+ 	if (!err)
+@@ -769,44 +753,67 @@ ret:
+ 	return err;
+ }
+ 
++int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
++		   u32 pbl_addr, u32 pbl_size)
++{
++	u32 wptr;
++	int err;
++
++	PDBG("%s *pdb_addr 0x%x, pbl_base 0x%x, pbl_size %d\n",
++	     __func__, pbl_addr, rdev_p->rnic_info.pbl_base,
++	     pbl_size);
++
++	mutex_lock(&rdev_p->ctrl_qp.lock);
++	err = cxio_hal_ctrl_qp_write_mem(rdev_p, pbl_addr >> 5, pbl_size << 3,
++					 pbl);
++	wptr = rdev_p->ctrl_qp.wptr;
++	mutex_unlock(&rdev_p->ctrl_qp.lock);
++	if (err)
++		return err;
++
++	if (wait_event_interruptible(rdev_p->ctrl_qp.waitq,
++				     SEQ32_GE(rdev_p->ctrl_qp.rptr,
++					      wptr)))
++		return -ERESTARTSYS;
++
++	return 0;
++}
++
+ int cxio_register_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+-			   u32 *pbl_addr)
++			   u8 page_size, u32 pbl_size, u32 pbl_addr)
+ {
+ 	*stag = T3_STAG_UNSET;
+ 	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+-			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
++			     zbva, to, len, page_size, pbl_size, pbl_addr);
+ }
+ 
+ int cxio_reregister_phys_mem(struct cxio_rdev *rdev_p, u32 *stag, u32 pdid,
+ 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+-			   u32 *pbl_addr)
++			   u8 page_size, u32 pbl_size, u32 pbl_addr)
+ {
+ 	return __cxio_tpt_op(rdev_p, 0, stag, 1, pdid, TPT_NON_SHARED_MR, perm,
+-			     zbva, to, len, page_size, pbl, pbl_size, pbl_addr);
++			     zbva, to, len, page_size, pbl_size, pbl_addr);
+ }
+ 
+ int cxio_dereg_mem(struct cxio_rdev *rdev_p, u32 stag, u32 pbl_size,
+ 		   u32 pbl_addr)
+ {
+-	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+-			     &pbl_size, &pbl_addr);
++	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
++			     pbl_size, pbl_addr);
+ }
+ 
+ int cxio_allocate_window(struct cxio_rdev *rdev_p, u32 * stag, u32 pdid)
+ {
+-	u32 pbl_size = 0;
+ 	*stag = T3_STAG_UNSET;
+ 	return __cxio_tpt_op(rdev_p, 0, stag, 0, pdid, TPT_MW, 0, 0, 0ULL, 0, 0,
+-			     NULL, &pbl_size, NULL);
++			     0, 0);
+ }
+ 
+ int cxio_deallocate_window(struct cxio_rdev *rdev_p, u32 stag)
+ {
+-	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0, NULL,
+-			     NULL, NULL);
++	return __cxio_tpt_op(rdev_p, 1, &stag, 0, 0, 0, 0, 0, 0ULL, 0, 0,
++			     0, 0);
+ }
+ 
+ int cxio_rdma_init(struct cxio_rdev *rdev_p, struct t3_rdma_init_attr *attr)
+diff --git a/drivers/infiniband/hw/cxgb3/cxio_hal.h b/drivers/infiniband/hw/cxgb3/cxio_hal.h
+index 2bcff7f..6e128f6 100644
+--- a/drivers/infiniband/hw/cxgb3/cxio_hal.h
++++ b/drivers/infiniband/hw/cxgb3/cxio_hal.h
+@@ -154,14 +154,14 @@ int cxio_create_qp(struct cxio_rdev *rdev, u32 kernel_domain, struct t3_wq *wq,
+ int cxio_destroy_qp(struct cxio_rdev *rdev, struct t3_wq *wq,
+ 		    struct cxio_ucontext *uctx);
+ int cxio_peek_cq(struct t3_wq *wr, struct t3_cq *cq, int opcode);
++int cxio_write_pbl(struct cxio_rdev *rdev_p, __be64 *pbl,
++		   u32 pbl_addr, u32 pbl_size);
+ int cxio_register_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+-			   u32 *pbl_addr);
++			   u8 page_size, u32 pbl_size, u32 pbl_addr);
+ int cxio_reregister_phys_mem(struct cxio_rdev *rdev, u32 * stag, u32 pdid,
+ 			   enum tpt_mem_perm perm, u32 zbva, u64 to, u32 len,
+-			   u8 page_size, __be64 *pbl, u32 *pbl_size,
+-			   u32 *pbl_addr);
++			   u8 page_size, u32 pbl_size, u32 pbl_addr);
+ int cxio_dereg_mem(struct cxio_rdev *rdev, u32 stag, u32 pbl_size,
+ 		   u32 pbl_addr);
+ int cxio_allocate_window(struct cxio_rdev *rdev, u32 * stag, u32 pdid);
+@@ -173,8 +173,8 @@ u32 cxio_hal_get_pdid(struct cxio_hal_resource *rscp);
+ void cxio_hal_put_pdid(struct cxio_hal_resource *rscp, u32 pdid);
+ int __init cxio_hal_init(void);
+ void __exit cxio_hal_exit(void);
+-void cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
+-void cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
++int cxio_flush_rq(struct t3_wq *wq, struct t3_cq *cq, int count);
++int cxio_flush_sq(struct t3_wq *wq, struct t3_cq *cq, int count);
+ void cxio_count_rcqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+ void cxio_count_scqes(struct t3_cq *cq, struct t3_wq *wq, int *count);
+ void cxio_flush_hw_cq(struct t3_cq *cq);
+diff --git a/drivers/infiniband/hw/cxgb3/cxio_resource.c b/drivers/infiniband/hw/cxgb3/cxio_resource.c
+index 45ed4f2..bd233c0 100644
+--- a/drivers/infiniband/hw/cxgb3/cxio_resource.c
++++ b/drivers/infiniband/hw/cxgb3/cxio_resource.c
+@@ -250,7 +250,6 @@ void cxio_hal_destroy_resource(struct cxio_hal_resource *rscp)
+  */
+ 
+ #define MIN_PBL_SHIFT 8			/* 256B == min PBL size (32 entries) */
+-#define PBL_CHUNK 2*1024*1024
+ 
+ u32 cxio_hal_pblpool_alloc(struct cxio_rdev *rdev_p, int size)
+ {
+@@ -267,14 +266,35 @@ void cxio_hal_pblpool_free(struct cxio_rdev *rdev_p, u32 addr, int size)
+ 
+ int cxio_hal_pblpool_create(struct cxio_rdev *rdev_p)
+ {
+-	unsigned long i;
++	unsigned pbl_start, pbl_chunk;
++
+ 	rdev_p->pbl_pool = gen_pool_create(MIN_PBL_SHIFT, -1);
+-	if (rdev_p->pbl_pool)
+-		for (i = rdev_p->rnic_info.pbl_base;
+-		     i <= rdev_p->rnic_info.pbl_top - PBL_CHUNK + 1;
+-		     i += PBL_CHUNK)
+-			gen_pool_add(rdev_p->pbl_pool, i, PBL_CHUNK, -1);
+-	return rdev_p->pbl_pool ? 0 : -ENOMEM;
++	if (!rdev_p->pbl_pool)
++		return -ENOMEM;
++
++	pbl_start = rdev_p->rnic_info.pbl_base;
++	pbl_chunk = rdev_p->rnic_info.pbl_top - pbl_start + 1;
++
++	while (pbl_start < rdev_p->rnic_info.pbl_top) {
++		pbl_chunk = min(rdev_p->rnic_info.pbl_top - pbl_start + 1,
++				pbl_chunk);
++		if (gen_pool_add(rdev_p->pbl_pool, pbl_start, pbl_chunk, -1)) {
++			PDBG("%s failed to add PBL chunk (%x/%x)\n",
++			     __func__, pbl_start, pbl_chunk);
++			if (pbl_chunk <= 1024 << MIN_PBL_SHIFT) {
++				printk(KERN_WARNING MOD "%s: Failed to add all PBL chunks (%x/%x)\n",
++				       __func__, pbl_start, rdev_p->rnic_info.pbl_top - pbl_start);
++				return 0;
++			}
++			pbl_chunk >>= 1;
++		} else {
++			PDBG("%s added PBL chunk (%x/%x)\n",
++			     __func__, pbl_start, pbl_chunk);
++			pbl_start += pbl_chunk;
++		}
++	}
++
++	return 0;
+ }
+ 
+ void cxio_hal_pblpool_destroy(struct cxio_rdev *rdev_p)
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_cm.c b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+index d44a6df..c325c44 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_cm.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_cm.c
+@@ -67,10 +67,10 @@ int peer2peer = 0;
+ module_param(peer2peer, int, 0644);
+ MODULE_PARM_DESC(peer2peer, "Support peer2peer ULPs (default=0)");
+ 
+-static int ep_timeout_secs = 10;
++static int ep_timeout_secs = 60;
+ module_param(ep_timeout_secs, int, 0644);
+ MODULE_PARM_DESC(ep_timeout_secs, "CM Endpoint operation timeout "
+-				   "in seconds (default=10)");
++				   "in seconds (default=60)");
+ 
+ static int mpa_rev = 1;
+ module_param(mpa_rev, int, 0644);
+@@ -1650,8 +1650,8 @@ static int close_con_rpl(struct t3cdev *tdev, struct sk_buff *skb, void *ctx)
+ 		release = 1;
+ 		break;
+ 	case ABORTING:
+-		break;
+ 	case DEAD:
++		break;
+ 	default:
+ 		BUG_ON(1);
+ 		break;
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_mem.c b/drivers/infiniband/hw/cxgb3/iwch_mem.c
+index 58c3d61..ec49a5c 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_mem.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_mem.c
+@@ -35,17 +35,26 @@
+ #include <rdma/ib_verbs.h>
+ 
+ #include "cxio_hal.h"
++#include "cxio_resource.h"
+ #include "iwch.h"
+ #include "iwch_provider.h"
+ 
+-int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+-					struct iwch_mr *mhp,
+-					int shift,
+-					__be64 *page_list)
++static void iwch_finish_mem_reg(struct iwch_mr *mhp, u32 stag)
+ {
+-	u32 stag;
+ 	u32 mmid;
+ 
++	mhp->attr.state = 1;
++	mhp->attr.stag = stag;
++	mmid = stag >> 8;
++	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
++	insert_handle(mhp->rhp, &mhp->rhp->mmidr, mhp, mmid);
++	PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
++}
++
++int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
++		      struct iwch_mr *mhp, int shift)
++{
++	u32 stag;
+ 
+ 	if (cxio_register_phys_mem(&rhp->rdev,
+ 				   &stag, mhp->attr.pdid,
+@@ -53,28 +62,21 @@ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ 				   mhp->attr.zbva,
+ 				   mhp->attr.va_fbo,
+ 				   mhp->attr.len,
+-				   shift-12,
+-				   page_list,
+-				   &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
++				   shift - 12,
++				   mhp->attr.pbl_size, mhp->attr.pbl_addr))
+ 		return -ENOMEM;
+-	mhp->attr.state = 1;
+-	mhp->attr.stag = stag;
+-	mmid = stag >> 8;
+-	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+-	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+-	PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
++
++	iwch_finish_mem_reg(mhp, stag);
++
+ 	return 0;
+ }
+ 
+ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ 					struct iwch_mr *mhp,
+ 					int shift,
+-					__be64 *page_list,
+ 					int npages)
+ {
+ 	u32 stag;
+-	u32 mmid;
+-
+ 
+ 	/* We could support this... */
+ 	if (npages > mhp->attr.pbl_size)
+@@ -87,19 +89,40 @@ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ 				   mhp->attr.zbva,
+ 				   mhp->attr.va_fbo,
+ 				   mhp->attr.len,
+-				   shift-12,
+-				   page_list,
+-				   &mhp->attr.pbl_size, &mhp->attr.pbl_addr))
++				   shift - 12,
++				   mhp->attr.pbl_size, mhp->attr.pbl_addr))
+ 		return -ENOMEM;
+-	mhp->attr.state = 1;
+-	mhp->attr.stag = stag;
+-	mmid = stag >> 8;
+-	mhp->ibmr.rkey = mhp->ibmr.lkey = stag;
+-	insert_handle(rhp, &rhp->mmidr, mhp, mmid);
+-	PDBG("%s mmid 0x%x mhp %p\n", __func__, mmid, mhp);
++
++	iwch_finish_mem_reg(mhp, stag);
++
++	return 0;
++}
++
++int iwch_alloc_pbl(struct iwch_mr *mhp, int npages)
++{
++	mhp->attr.pbl_addr = cxio_hal_pblpool_alloc(&mhp->rhp->rdev,
++						    npages << 3);
++
++	if (!mhp->attr.pbl_addr)
++		return -ENOMEM;
++
++	mhp->attr.pbl_size = npages;
++
+ 	return 0;
+ }
+ 
++void iwch_free_pbl(struct iwch_mr *mhp)
++{
++	cxio_hal_pblpool_free(&mhp->rhp->rdev, mhp->attr.pbl_addr,
++			      mhp->attr.pbl_size << 3);
++}
++
++int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset)
++{
++	return cxio_write_pbl(&mhp->rhp->rdev, pages,
++			      mhp->attr.pbl_addr + (offset << 3), npages);
++}
++
+ int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ 					int num_phys_buf,
+ 					u64 *iova_start,
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c
+index d07d3a3..8934178 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_provider.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c
+@@ -442,6 +442,7 @@ static int iwch_dereg_mr(struct ib_mr *ib_mr)
+ 	mmid = mhp->attr.stag >> 8;
+ 	cxio_dereg_mem(&rhp->rdev, mhp->attr.stag, mhp->attr.pbl_size,
+ 		       mhp->attr.pbl_addr);
++	iwch_free_pbl(mhp);
+ 	remove_handle(rhp, &rhp->mmidr, mmid);
+ 	if (mhp->kva)
+ 		kfree((void *) (unsigned long) mhp->kva);
+@@ -475,6 +476,8 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
+ 	if (!mhp)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	mhp->rhp = rhp;
++
+ 	/* First check that we have enough alignment */
+ 	if ((*iova_start & ~PAGE_MASK) != (buffer_list[0].addr & ~PAGE_MASK)) {
+ 		ret = -EINVAL;
+@@ -492,7 +495,17 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
+ 	if (ret)
+ 		goto err;
+ 
+-	mhp->rhp = rhp;
++	ret = iwch_alloc_pbl(mhp, npages);
++	if (ret) {
++		kfree(page_list);
++		goto err_pbl;
++	}
++
++	ret = iwch_write_pbl(mhp, page_list, npages, 0);
++	kfree(page_list);
++	if (ret)
++		goto err_pbl;
++
+ 	mhp->attr.pdid = php->pdid;
+ 	mhp->attr.zbva = 0;
+ 
+@@ -502,12 +515,15 @@ static struct ib_mr *iwch_register_phys_mem(struct ib_pd *pd,
+ 
+ 	mhp->attr.len = (u32) total_size;
+ 	mhp->attr.pbl_size = npages;
+-	ret = iwch_register_mem(rhp, php, mhp, shift, page_list);
+-	kfree(page_list);
+-	if (ret) {
+-		goto err;
+-	}
++	ret = iwch_register_mem(rhp, php, mhp, shift);
++	if (ret)
++		goto err_pbl;
++
+ 	return &mhp->ibmr;
++
++err_pbl:
++	iwch_free_pbl(mhp);
++
+ err:
+ 	kfree(mhp);
+ 	return ERR_PTR(ret);
+@@ -560,7 +576,7 @@ static int iwch_reregister_phys_mem(struct ib_mr *mr,
+ 			return ret;
+ 	}
+ 
+-	ret = iwch_reregister_mem(rhp, php, &mh, shift, page_list, npages);
++	ret = iwch_reregister_mem(rhp, php, &mh, shift, npages);
+ 	kfree(page_list);
+ 	if (ret) {
+ 		return ret;
+@@ -602,6 +618,8 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ 	if (!mhp)
+ 		return ERR_PTR(-ENOMEM);
+ 
++	mhp->rhp = rhp;
++
+ 	mhp->umem = ib_umem_get(pd->uobject->context, start, length, acc, 0);
+ 	if (IS_ERR(mhp->umem)) {
+ 		err = PTR_ERR(mhp->umem);
+@@ -615,10 +633,14 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ 	list_for_each_entry(chunk, &mhp->umem->chunk_list, list)
+ 		n += chunk->nents;
+ 
+-	pages = kmalloc(n * sizeof(u64), GFP_KERNEL);
++	err = iwch_alloc_pbl(mhp, n);
++	if (err)
++		goto err;
++
++	pages = (__be64 *) __get_free_page(GFP_KERNEL);
+ 	if (!pages) {
+ 		err = -ENOMEM;
+-		goto err;
++		goto err_pbl;
+ 	}
+ 
+ 	i = n = 0;
+@@ -630,25 +652,38 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ 				pages[i++] = cpu_to_be64(sg_dma_address(
+ 					&chunk->page_list[j]) +
+ 					mhp->umem->page_size * k);
++				if (i == PAGE_SIZE / sizeof *pages) {
++					err = iwch_write_pbl(mhp, pages, i, n);
++					if (err)
++						goto pbl_done;
++					n += i;
++					i = 0;
++				}
+ 			}
+ 		}
+ 
+-	mhp->rhp = rhp;
++	if (i)
++		err = iwch_write_pbl(mhp, pages, i, n);
++
++pbl_done:
++	free_page((unsigned long) pages);
++	if (err)
++		goto err_pbl;
++
+ 	mhp->attr.pdid = php->pdid;
+ 	mhp->attr.zbva = 0;
+ 	mhp->attr.perms = iwch_ib_to_tpt_access(acc);
+ 	mhp->attr.va_fbo = virt;
+ 	mhp->attr.page_size = shift - 12;
+ 	mhp->attr.len = (u32) length;
+-	mhp->attr.pbl_size = i;
+-	err = iwch_register_mem(rhp, php, mhp, shift, pages);
+-	kfree(pages);
++
++	err = iwch_register_mem(rhp, php, mhp, shift);
+ 	if (err)
+-		goto err;
++		goto err_pbl;
+ 
+ 	if (udata && !t3a_device(rhp)) {
+ 		uresp.pbl_addr = (mhp->attr.pbl_addr -
+-	                         rhp->rdev.rnic_info.pbl_base) >> 3;
++				 rhp->rdev.rnic_info.pbl_base) >> 3;
+ 		PDBG("%s user resp pbl_addr 0x%x\n", __func__,
+ 		     uresp.pbl_addr);
+ 
+@@ -661,6 +696,9 @@ static struct ib_mr *iwch_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
+ 
+ 	return &mhp->ibmr;
+ 
++err_pbl:
++	iwch_free_pbl(mhp);
++
+ err:
+ 	ib_umem_release(mhp->umem);
+ 	kfree(mhp);
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.h b/drivers/infiniband/hw/cxgb3/iwch_provider.h
+index db5100d..836163f 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_provider.h
++++ b/drivers/infiniband/hw/cxgb3/iwch_provider.h
+@@ -340,14 +340,14 @@ int iwch_quiesce_qps(struct iwch_cq *chp);
+ int iwch_resume_qps(struct iwch_cq *chp);
+ void stop_read_rep_timer(struct iwch_qp *qhp);
+ int iwch_register_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+-					struct iwch_mr *mhp,
+-					int shift,
+-					__be64 *page_list);
++		      struct iwch_mr *mhp, int shift);
+ int iwch_reregister_mem(struct iwch_dev *rhp, struct iwch_pd *php,
+ 					struct iwch_mr *mhp,
+ 					int shift,
+-					__be64 *page_list,
+ 					int npages);
++int iwch_alloc_pbl(struct iwch_mr *mhp, int npages);
++void iwch_free_pbl(struct iwch_mr *mhp);
++int iwch_write_pbl(struct iwch_mr *mhp, __be64 *pages, int npages, int offset);
+ int build_phys_page_list(struct ib_phys_buf *buffer_list,
+ 					int num_phys_buf,
+ 					u64 *iova_start,
+diff --git a/drivers/infiniband/hw/cxgb3/iwch_qp.c b/drivers/infiniband/hw/cxgb3/iwch_qp.c
+index 9b4be88..79dbe5b 100644
+--- a/drivers/infiniband/hw/cxgb3/iwch_qp.c
++++ b/drivers/infiniband/hw/cxgb3/iwch_qp.c
+@@ -655,6 +655,7 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+ {
+ 	struct iwch_cq *rchp, *schp;
+ 	int count;
++	int flushed;
+ 
+ 	rchp = get_chp(qhp->rhp, qhp->attr.rcq);
+ 	schp = get_chp(qhp->rhp, qhp->attr.scq);
+@@ -669,20 +670,22 @@ static void __flush_qp(struct iwch_qp *qhp, unsigned long *flag)
+ 	spin_lock(&qhp->lock);
+ 	cxio_flush_hw_cq(&rchp->cq);
+ 	cxio_count_rcqes(&rchp->cq, &qhp->wq, &count);
+-	cxio_flush_rq(&qhp->wq, &rchp->cq, count);
++	flushed = cxio_flush_rq(&qhp->wq, &rchp->cq, count);
+ 	spin_unlock(&qhp->lock);
+ 	spin_unlock_irqrestore(&rchp->lock, *flag);
+-	(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
++	if (flushed)
++		(*rchp->ibcq.comp_handler)(&rchp->ibcq, rchp->ibcq.cq_context);
+ 
+ 	/* locking heirarchy: cq lock first, then qp lock. */
+ 	spin_lock_irqsave(&schp->lock, *flag);
+ 	spin_lock(&qhp->lock);
+ 	cxio_flush_hw_cq(&schp->cq);
+ 	cxio_count_scqes(&schp->cq, &qhp->wq, &count);
+-	cxio_flush_sq(&qhp->wq, &schp->cq, count);
++	flushed = cxio_flush_sq(&qhp->wq, &schp->cq, count);
+ 	spin_unlock(&qhp->lock);
+ 	spin_unlock_irqrestore(&schp->lock, *flag);
+-	(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
++	if (flushed)
++		(*schp->ibcq.comp_handler)(&schp->ibcq, schp->ibcq.cq_context);
+ 
+ 	/* deref */
+ 	if (atomic_dec_and_test(&qhp->refcnt))
+@@ -880,7 +883,6 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ 				ep = qhp->ep;
+ 				get_ep(&ep->com);
+ 			}
+-			flush_qp(qhp, &flag);
+ 			break;
+ 		case IWCH_QP_STATE_TERMINATE:
+ 			qhp->attr.state = IWCH_QP_STATE_TERMINATE;
+@@ -911,6 +913,7 @@ int iwch_modify_qp(struct iwch_dev *rhp, struct iwch_qp *qhp,
+ 		}
+ 		switch (attrs->next_state) {
+ 			case IWCH_QP_STATE_IDLE:
++				flush_qp(qhp, &flag);
+ 				qhp->attr.state = IWCH_QP_STATE_IDLE;
+ 				qhp->attr.llp_stream_handle = NULL;
+ 				put_ep(&qhp->ep->com);
+diff --git a/drivers/infiniband/hw/ehca/ehca_classes.h b/drivers/infiniband/hw/ehca/ehca_classes.h
+index 00bab60..1e9e99a 100644
+--- a/drivers/infiniband/hw/ehca/ehca_classes.h
++++ b/drivers/infiniband/hw/ehca/ehca_classes.h
+@@ -192,6 +192,8 @@ struct ehca_qp {
+ 	int mtu_shift;
+ 	u32 message_count;
+ 	u32 packet_count;
++	atomic_t nr_events; /* events seen */
++	wait_queue_head_t wait_completion;
+ };
+ 
+ #define IS_SRQ(qp) (qp->ext_type == EQPT_SRQ)
+diff --git a/drivers/infiniband/hw/ehca/ehca_hca.c b/drivers/infiniband/hw/ehca/ehca_hca.c
+index 2515cbd..bc3b37d 100644
+--- a/drivers/infiniband/hw/ehca/ehca_hca.c
++++ b/drivers/infiniband/hw/ehca/ehca_hca.c
+@@ -101,7 +101,6 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+ 	props->max_ee          = limit_uint(rblock->max_rd_ee_context);
+ 	props->max_rdd         = limit_uint(rblock->max_rd_domain);
+ 	props->max_fmr         = limit_uint(rblock->max_mr);
+-	props->local_ca_ack_delay  = limit_uint(rblock->local_ca_ack_delay);
+ 	props->max_qp_rd_atom  = limit_uint(rblock->max_rr_qp);
+ 	props->max_ee_rd_atom  = limit_uint(rblock->max_rr_ee_context);
+ 	props->max_res_rd_atom = limit_uint(rblock->max_rr_hca);
+@@ -115,7 +114,7 @@ int ehca_query_device(struct ib_device *ibdev, struct ib_device_attr *props)
+ 	}
+ 
+ 	props->max_pkeys           = 16;
+-	props->local_ca_ack_delay  = limit_uint(rblock->local_ca_ack_delay);
++	props->local_ca_ack_delay  = min_t(u8, rblock->local_ca_ack_delay, 255);
+ 	props->max_raw_ipv6_qp     = limit_uint(rblock->max_raw_ipv6_qp);
+ 	props->max_raw_ethy_qp     = limit_uint(rblock->max_raw_ethy_qp);
+ 	props->max_mcast_grp       = limit_uint(rblock->max_mcast_grp);
+@@ -136,7 +135,7 @@ query_device1:
+ 	return ret;
+ }
+ 
+-static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
++static enum ib_mtu map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+ {
+ 	switch (fw_mtu) {
+ 	case 0x1:
+@@ -156,7 +155,7 @@ static int map_mtu(struct ehca_shca *shca, u32 fw_mtu)
+ 	}
+ }
+ 
+-static int map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
++static u8 map_number_of_vls(struct ehca_shca *shca, u32 vl_cap)
+ {
+ 	switch (vl_cap) {
+ 	case 0x1:
+diff --git a/drivers/infiniband/hw/ehca/ehca_irq.c b/drivers/infiniband/hw/ehca/ehca_irq.c
+index ca5eb0c..ce1ab05 100644
+--- a/drivers/infiniband/hw/ehca/ehca_irq.c
++++ b/drivers/infiniband/hw/ehca/ehca_irq.c
+@@ -204,6 +204,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
+ 
+ 	read_lock(&ehca_qp_idr_lock);
+ 	qp = idr_find(&ehca_qp_idr, token);
++	if (qp)
++		atomic_inc(&qp->nr_events);
+ 	read_unlock(&ehca_qp_idr_lock);
+ 
+ 	if (!qp)
+@@ -223,6 +225,8 @@ static void qp_event_callback(struct ehca_shca *shca, u64 eqe,
+ 	if (fatal && qp->ext_type == EQPT_SRQBASE)
+ 		dispatch_qp_event(shca, qp, IB_EVENT_QP_LAST_WQE_REACHED);
+ 
++	if (atomic_dec_and_test(&qp->nr_events))
++		wake_up(&qp->wait_completion);
+ 	return;
+ }
+ 
+diff --git a/drivers/infiniband/hw/ehca/ehca_qp.c b/drivers/infiniband/hw/ehca/ehca_qp.c
+index 18fba92..3f59587 100644
+--- a/drivers/infiniband/hw/ehca/ehca_qp.c
++++ b/drivers/infiniband/hw/ehca/ehca_qp.c
+@@ -566,6 +566,8 @@ static struct ehca_qp *internal_create_qp(
+ 		return ERR_PTR(-ENOMEM);
+ 	}
+ 
++	atomic_set(&my_qp->nr_events, 0);
++	init_waitqueue_head(&my_qp->wait_completion);
+ 	spin_lock_init(&my_qp->spinlock_s);
+ 	spin_lock_init(&my_qp->spinlock_r);
+ 	my_qp->qp_type = qp_type;
+@@ -1934,6 +1936,9 @@ static int internal_destroy_qp(struct ib_device *dev, struct ehca_qp *my_qp,
+ 	idr_remove(&ehca_qp_idr, my_qp->token);
+ 	write_unlock_irqrestore(&ehca_qp_idr_lock, flags);
+ 
++	/* now wait until all pending events have completed */
++	wait_event(my_qp->wait_completion, !atomic_read(&my_qp->nr_events));
++
+ 	h_ret = hipz_h_destroy_qp(shca->ipz_hca_handle, my_qp);
+ 	if (h_ret != H_SUCCESS) {
+ 		ehca_err(dev, "hipz_h_destroy_qp() failed h_ret=%li "
+diff --git a/drivers/infiniband/hw/ipath/ipath_driver.c b/drivers/infiniband/hw/ipath/ipath_driver.c
+index acf30c0..ce7b7c3 100644
+--- a/drivers/infiniband/hw/ipath/ipath_driver.c
++++ b/drivers/infiniband/hw/ipath/ipath_driver.c
+@@ -1197,7 +1197,7 @@ void ipath_kreceive(struct ipath_portdata *pd)
+ 	}
+ 
+ reloop:
+-	for (last = 0, i = 1; !last; i++) {
++	for (last = 0, i = 1; !last; i += !last) {
+ 		hdr = dd->ipath_f_get_msgheader(dd, rhf_addr);
+ 		eflags = ipath_hdrget_err_flags(rhf_addr);
+ 		etype = ipath_hdrget_rcv_type(rhf_addr);
+@@ -1428,6 +1428,40 @@ static void ipath_update_pio_bufs(struct ipath_devdata *dd)
+ 	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
+ }
+ 
++/*
++ * used to force update of pioavailshadow if we can't get a pio buffer.
++ * Needed primarily due to exitting freeze mode after recovering
++ * from errors.  Done lazily, because it's safer (known to not
++ * be writing pio buffers).
++ */
++static void ipath_reset_availshadow(struct ipath_devdata *dd)
++{
++	int i, im;
++	unsigned long flags;
++
++	spin_lock_irqsave(&ipath_pioavail_lock, flags);
++	for (i = 0; i < dd->ipath_pioavregs; i++) {
++		u64 val, oldval;
++		/* deal with 6110 chip bug on high register #s */
++		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
++			i ^ 1 : i;
++		val = le64_to_cpu(dd->ipath_pioavailregs_dma[im]);
++		/*
++		 * busy out the buffers not in the kernel avail list,
++		 * without changing the generation bits.
++		 */
++		oldval = dd->ipath_pioavailshadow[i];
++		dd->ipath_pioavailshadow[i] = val |
++			((~dd->ipath_pioavailkernel[i] <<
++			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT) &
++			0xaaaaaaaaaaaaaaaaULL); /* All BUSY bits in qword */
++		if (oldval != dd->ipath_pioavailshadow[i])
++			ipath_dbg("shadow[%d] was %Lx, now %lx\n",
++				i, oldval, dd->ipath_pioavailshadow[i]);
++	}
++	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
++}
++
+ /**
+  * ipath_setrcvhdrsize - set the receive header size
+  * @dd: the infinipath device
+@@ -1482,9 +1516,12 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
+ 	 */
+ 	ipath_stats.sps_nopiobufs++;
+ 	if (!(++dd->ipath_consec_nopiobuf % 100000)) {
+-		ipath_dbg("%u pio sends with no bufavail; dmacopy: "
+-			"%llx %llx %llx %llx; shadow:  %lx %lx %lx %lx\n",
++		ipath_force_pio_avail_update(dd); /* at start */
++		ipath_dbg("%u tries no piobufavail ts%lx; dmacopy: "
++			"%llx %llx %llx %llx\n"
++			"ipath  shadow:  %lx %lx %lx %lx\n",
+ 			dd->ipath_consec_nopiobuf,
++			(unsigned long)get_cycles(),
+ 			(unsigned long long) le64_to_cpu(dma[0]),
+ 			(unsigned long long) le64_to_cpu(dma[1]),
+ 			(unsigned long long) le64_to_cpu(dma[2]),
+@@ -1496,14 +1533,17 @@ static noinline void no_pio_bufs(struct ipath_devdata *dd)
+ 		 */
+ 		if ((dd->ipath_piobcnt2k + dd->ipath_piobcnt4k) >
+ 		    (sizeof(shadow[0]) * 4 * 4))
+-			ipath_dbg("2nd group: dmacopy: %llx %llx "
+-				  "%llx %llx; shadow: %lx %lx %lx %lx\n",
++			ipath_dbg("2nd group: dmacopy: "
++				  "%llx %llx %llx %llx\n"
++				  "ipath  shadow:  %lx %lx %lx %lx\n",
+ 				  (unsigned long long)le64_to_cpu(dma[4]),
+ 				  (unsigned long long)le64_to_cpu(dma[5]),
+ 				  (unsigned long long)le64_to_cpu(dma[6]),
+ 				  (unsigned long long)le64_to_cpu(dma[7]),
+-				  shadow[4], shadow[5], shadow[6],
+-				  shadow[7]);
++				  shadow[4], shadow[5], shadow[6], shadow[7]);
++
++		/* at end, so update likely happened */
++		ipath_reset_availshadow(dd);
+ 	}
+ }
+ 
+@@ -1652,19 +1692,46 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+ 			      unsigned len, int avail)
+ {
+ 	unsigned long flags;
+-	unsigned end;
++	unsigned end, cnt = 0, next;
+ 
+ 	/* There are two bits per send buffer (busy and generation) */
+ 	start *= 2;
+-	len *= 2;
+-	end = start + len;
++	end = start + len * 2;
+ 
+-	/* Set or clear the generation bits. */
+ 	spin_lock_irqsave(&ipath_pioavail_lock, flags);
++	/* Set or clear the busy bit in the shadow. */
+ 	while (start < end) {
+ 		if (avail) {
+-			__clear_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
+-				dd->ipath_pioavailshadow);
++			unsigned long dma;
++			int i, im;
++			/*
++			 * the BUSY bit will never be set, because we disarm
++			 * the user buffers before we hand them back to the
++			 * kernel.  We do have to make sure the generation
++			 * bit is set correctly in shadow, since it could
++			 * have changed many times while allocated to user.
++			 * We can't use the bitmap functions on the full
++			 * dma array because it is always little-endian, so
++			 * we have to flip to host-order first.
++			 * BITS_PER_LONG is slightly wrong, since it's
++			 * always 64 bits per register in chip...
++			 * We only work on 64 bit kernels, so that's OK.
++			 */
++			/* deal with 6110 chip bug on high register #s */
++			i = start / BITS_PER_LONG;
++			im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
++				i ^ 1 : i;
++			__clear_bit(INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT
++				+ start, dd->ipath_pioavailshadow);
++			dma = (unsigned long) le64_to_cpu(
++				dd->ipath_pioavailregs_dma[im]);
++			if (test_bit((INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
++				+ start) % BITS_PER_LONG, &dma))
++				__set_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
++					+ start, dd->ipath_pioavailshadow);
++			else
++				__clear_bit(INFINIPATH_SENDPIOAVAIL_CHECK_SHIFT
++					+ start, dd->ipath_pioavailshadow);
+ 			__set_bit(start, dd->ipath_pioavailkernel);
+ 		} else {
+ 			__set_bit(start + INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT,
+@@ -1673,7 +1740,44 @@ void ipath_chg_pioavailkernel(struct ipath_devdata *dd, unsigned start,
+ 		}
+ 		start += 2;
+ 	}
++
++	if (dd->ipath_pioupd_thresh) {
++		end = 2 * (dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
++		next = find_first_bit(dd->ipath_pioavailkernel, end);
++		while (next < end) {
++			cnt++;
++			next = find_next_bit(dd->ipath_pioavailkernel, end,
++					next + 1);
++		}
++	}
+ 	spin_unlock_irqrestore(&ipath_pioavail_lock, flags);
++
++	/*
++	 * When moving buffers from kernel to user, if number assigned to
++	 * the user is less than the pio update threshold, and threshold
++	 * is supported (cnt was computed > 0), drop the update threshold
++	 * so we update at least once per allocated number of buffers.
++	 * In any case, if the kernel buffers are less than the threshold,
++	 * drop the threshold.  We don't bother increasing it, having once
++	 * decreased it, since it would typically just cycle back and forth.
++	 * If we don't decrease below buffers in use, we can wait a long
++	 * time for an update, until some other context uses PIO buffers.
++	 */
++	if (!avail && len < cnt)
++		cnt = len;
++	if (cnt < dd->ipath_pioupd_thresh) {
++		dd->ipath_pioupd_thresh = cnt;
++		ipath_dbg("Decreased pio update threshold to %u\n",
++			dd->ipath_pioupd_thresh);
++		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
++		dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
++			<< INFINIPATH_S_UPDTHRESH_SHIFT);
++		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
++			<< INFINIPATH_S_UPDTHRESH_SHIFT;
++		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
++			dd->ipath_sendctrl);
++		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
++	}
+ }
+ 
+ /**
+@@ -1794,8 +1898,8 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
+ 
+ 		spin_lock_irqsave(&dd->ipath_sdma_lock, flags);
+ 		skip_cancel =
+-			!test_bit(IPATH_SDMA_DISABLED, statp) &&
+-			test_and_set_bit(IPATH_SDMA_ABORTING, statp);
++			test_and_set_bit(IPATH_SDMA_ABORTING, statp)
++			&& !test_bit(IPATH_SDMA_DISABLED, statp);
+ 		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ 		if (skip_cancel)
+ 			goto bail;
+@@ -1826,6 +1930,9 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
+ 	ipath_disarm_piobufs(dd, 0,
+ 		dd->ipath_piobcnt2k + dd->ipath_piobcnt4k);
+ 
++	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
++		set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
++
+ 	if (restore_sendctrl) {
+ 		/* else done by caller later if needed */
+ 		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+@@ -1845,7 +1952,6 @@ void ipath_cancel_sends(struct ipath_devdata *dd, int restore_sendctrl)
+ 		/* only wait so long for intr */
+ 		dd->ipath_sdma_abort_intr_timeout = jiffies + HZ;
+ 		dd->ipath_sdma_reset_wait = 200;
+-		__set_bit(IPATH_SDMA_DISARMED, &dd->ipath_sdma_status);
+ 		if (!test_bit(IPATH_SDMA_SHUTDOWN, &dd->ipath_sdma_status))
+ 			tasklet_hi_schedule(&dd->ipath_sdma_abort_task);
+ 		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+diff --git a/drivers/infiniband/hw/ipath/ipath_file_ops.c b/drivers/infiniband/hw/ipath/ipath_file_ops.c
+index 8b17522..3295177 100644
+--- a/drivers/infiniband/hw/ipath/ipath_file_ops.c
++++ b/drivers/infiniband/hw/ipath/ipath_file_ops.c
+@@ -173,47 +173,25 @@ static int ipath_get_base_info(struct file *fp,
+ 		(void *) dd->ipath_statusp -
+ 		(void *) dd->ipath_pioavailregs_dma;
+ 	if (!shared) {
+-		kinfo->spi_piocnt = dd->ipath_pbufsport;
++		kinfo->spi_piocnt = pd->port_piocnt;
+ 		kinfo->spi_piobufbase = (u64) pd->port_piobufs;
+ 		kinfo->__spi_uregbase = (u64) dd->ipath_uregbase +
+ 			dd->ipath_ureg_align * pd->port_port;
+ 	} else if (master) {
+-		kinfo->spi_piocnt = (dd->ipath_pbufsport / subport_cnt) +
+-				    (dd->ipath_pbufsport % subport_cnt);
++		kinfo->spi_piocnt = (pd->port_piocnt / subport_cnt) +
++				    (pd->port_piocnt % subport_cnt);
+ 		/* Master's PIO buffers are after all the slave's */
+ 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ 			dd->ipath_palign *
+-			(dd->ipath_pbufsport - kinfo->spi_piocnt);
++			(pd->port_piocnt - kinfo->spi_piocnt);
+ 	} else {
+ 		unsigned slave = subport_fp(fp) - 1;
+ 
+-		kinfo->spi_piocnt = dd->ipath_pbufsport / subport_cnt;
++		kinfo->spi_piocnt = pd->port_piocnt / subport_cnt;
+ 		kinfo->spi_piobufbase = (u64) pd->port_piobufs +
+ 			dd->ipath_palign * kinfo->spi_piocnt * slave;
+ 	}
+ 
+-	/*
+-	 * Set the PIO avail update threshold to no larger
+-	 * than the number of buffers per process. Note that
+-	 * we decrease it here, but won't ever increase it.
+-	 */
+-	if (dd->ipath_pioupd_thresh &&
+-	    kinfo->spi_piocnt < dd->ipath_pioupd_thresh) {
+-		unsigned long flags;
+-
+-		dd->ipath_pioupd_thresh = kinfo->spi_piocnt;
+-		ipath_dbg("Decreased pio update threshold to %u\n",
+-			dd->ipath_pioupd_thresh);
+-		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+-		dd->ipath_sendctrl &= ~(INFINIPATH_S_UPDTHRESH_MASK
+-			<< INFINIPATH_S_UPDTHRESH_SHIFT);
+-		dd->ipath_sendctrl |= dd->ipath_pioupd_thresh
+-			<< INFINIPATH_S_UPDTHRESH_SHIFT;
+-		ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl,
+-			dd->ipath_sendctrl);
+-		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+-	}
+-
+ 	if (shared) {
+ 		kinfo->spi_port_uregbase = (u64) dd->ipath_uregbase +
+ 			dd->ipath_ureg_align * pd->port_port;
+@@ -1309,19 +1287,19 @@ static int ipath_mmap(struct file *fp, struct vm_area_struct *vma)
+ 	ureg = dd->ipath_uregbase + dd->ipath_ureg_align * pd->port_port;
+ 	if (!pd->port_subport_cnt) {
+ 		/* port is not shared */
+-		piocnt = dd->ipath_pbufsport;
++		piocnt = pd->port_piocnt;
+ 		piobufs = pd->port_piobufs;
+ 	} else if (!subport_fp(fp)) {
+ 		/* caller is the master */
+-		piocnt = (dd->ipath_pbufsport / pd->port_subport_cnt) +
+-			 (dd->ipath_pbufsport % pd->port_subport_cnt);
++		piocnt = (pd->port_piocnt / pd->port_subport_cnt) +
++			 (pd->port_piocnt % pd->port_subport_cnt);
+ 		piobufs = pd->port_piobufs +
+-			dd->ipath_palign * (dd->ipath_pbufsport - piocnt);
++			dd->ipath_palign * (pd->port_piocnt - piocnt);
+ 	} else {
+ 		unsigned slave = subport_fp(fp) - 1;
+ 
+ 		/* caller is a slave */
+-		piocnt = dd->ipath_pbufsport / pd->port_subport_cnt;
++		piocnt = pd->port_piocnt / pd->port_subport_cnt;
+ 		piobufs = pd->port_piobufs + dd->ipath_palign * piocnt * slave;
+ 	}
+ 
+@@ -1633,9 +1611,6 @@ static int try_alloc_port(struct ipath_devdata *dd, int port,
+ 		port_fp(fp) = pd;
+ 		pd->port_pid = current->pid;
+ 		strncpy(pd->port_comm, current->comm, sizeof(pd->port_comm));
+-		ipath_chg_pioavailkernel(dd,
+-			dd->ipath_pbufsport * (pd->port_port - 1),
+-			dd->ipath_pbufsport, 0);
+ 		ipath_stats.sps_ports++;
+ 		ret = 0;
+ 	} else
+@@ -1938,11 +1913,25 @@ static int ipath_do_user_init(struct file *fp,
+ 
+ 	/* for now we do nothing with rcvhdrcnt: uinfo->spu_rcvhdrcnt */
+ 
++	/* some ports may get extra buffers, calculate that here */
++	if (pd->port_port <= dd->ipath_ports_extrabuf)
++		pd->port_piocnt = dd->ipath_pbufsport + 1;
++	else
++		pd->port_piocnt = dd->ipath_pbufsport;
++
+ 	/* for right now, kernel piobufs are at end, so port 1 is at 0 */
++	if (pd->port_port <= dd->ipath_ports_extrabuf)
++		pd->port_pio_base = (dd->ipath_pbufsport + 1)
++			* (pd->port_port - 1);
++	else
++		pd->port_pio_base = dd->ipath_ports_extrabuf +
++			dd->ipath_pbufsport * (pd->port_port - 1);
+ 	pd->port_piobufs = dd->ipath_piobufbase +
+-		dd->ipath_pbufsport * (pd->port_port - 1) * dd->ipath_palign;
+-	ipath_cdbg(VERBOSE, "Set base of piobufs for port %u to 0x%x\n",
+-		   pd->port_port, pd->port_piobufs);
++		pd->port_pio_base * dd->ipath_palign;
++	ipath_cdbg(VERBOSE, "piobuf base for port %u is 0x%x, piocnt %u,"
++		" first pio %u\n", pd->port_port, pd->port_piobufs,
++		pd->port_piocnt, pd->port_pio_base);
++	ipath_chg_pioavailkernel(dd, pd->port_pio_base, pd->port_piocnt, 0);
+ 
+ 	/*
+ 	 * Now allocate the rcvhdr Q and eager TIDs; skip the TID
+@@ -2107,7 +2096,6 @@ static int ipath_close(struct inode *in, struct file *fp)
+ 	}
+ 
+ 	if (dd->ipath_kregbase) {
+-		int i;
+ 		/* atomically clear receive enable port and intr avail. */
+ 		clear_bit(dd->ipath_r_portenable_shift + port,
+ 			  &dd->ipath_rcvctrl);
+@@ -2136,9 +2124,9 @@ static int ipath_close(struct inode *in, struct file *fp)
+ 		ipath_write_kreg_port(dd, dd->ipath_kregs->kr_rcvhdraddr,
+ 			pd->port_port, dd->ipath_dummy_hdrq_phys);
+ 
+-		i = dd->ipath_pbufsport * (port - 1);
+-		ipath_disarm_piobufs(dd, i, dd->ipath_pbufsport);
+-		ipath_chg_pioavailkernel(dd, i, dd->ipath_pbufsport, 1);
++		ipath_disarm_piobufs(dd, pd->port_pio_base, pd->port_piocnt);
++		ipath_chg_pioavailkernel(dd, pd->port_pio_base,
++			pd->port_piocnt, 1);
+ 
+ 		dd->ipath_f_clear_tids(dd, pd->port_port);
+ 
+diff --git a/drivers/infiniband/hw/ipath/ipath_iba7220.c b/drivers/infiniband/hw/ipath/ipath_iba7220.c
+index e3ec0d1..8eee783 100644
+--- a/drivers/infiniband/hw/ipath/ipath_iba7220.c
++++ b/drivers/infiniband/hw/ipath/ipath_iba7220.c
+@@ -595,7 +595,7 @@ static void ipath_7220_txe_recover(struct ipath_devdata *dd)
+ 
+ 	dev_info(&dd->pcidev->dev,
+ 		"Recovering from TXE PIO parity error\n");
+-	ipath_disarm_senderrbufs(dd, 1);
++	ipath_disarm_senderrbufs(dd);
+ }
+ 
+ 
+@@ -675,10 +675,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+ 	ctrl = ipath_read_kreg32(dd, dd->ipath_kregs->kr_control);
+ 	if ((ctrl & INFINIPATH_C_FREEZEMODE) && !ipath_diag_inuse) {
+ 		/*
+-		 * Parity errors in send memory are recoverable,
+-		 * just cancel the send (if indicated in * sendbuffererror),
+-		 * count the occurrence, unfreeze (if no other handled
+-		 * hardware error bits are set), and continue.
++		 * Parity errors in send memory are recoverable by h/w
++		 * just do housekeeping, exit freeze mode and continue.
+ 		 */
+ 		if (hwerrs & ((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ 			       INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+@@ -687,13 +685,6 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+ 			hwerrs &= ~((INFINIPATH_HWE_TXEMEMPARITYERR_PIOBUF |
+ 				     INFINIPATH_HWE_TXEMEMPARITYERR_PIOPBC)
+ 				    << INFINIPATH_HWE_TXEMEMPARITYERR_SHIFT);
+-			if (!hwerrs) {
+-				/* else leave in freeze mode */
+-				ipath_write_kreg(dd,
+-						 dd->ipath_kregs->kr_control,
+-						 dd->ipath_control);
+-				goto bail;
+-			}
+ 		}
+ 		if (hwerrs) {
+ 			/*
+@@ -723,8 +714,8 @@ static void ipath_7220_handle_hwerrors(struct ipath_devdata *dd, char *msg,
+ 			*dd->ipath_statusp |= IPATH_STATUS_HWERROR;
+ 			dd->ipath_flags &= ~IPATH_INITTED;
+ 		} else {
+-			ipath_dbg("Clearing freezemode on ignored hardware "
+-				  "error\n");
++			ipath_dbg("Clearing freezemode on ignored or "
++				"recovered hardware error\n");
+ 			ipath_clear_freeze(dd);
+ 		}
+ 	}
+@@ -870,8 +861,9 @@ static int ipath_7220_boardname(struct ipath_devdata *dd, char *name,
+ 			      "revision %u.%u!\n",
+ 			      dd->ipath_majrev, dd->ipath_minrev);
+ 		ret = 1;
+-	} else if (dd->ipath_minrev == 1) {
+-		/* Rev1 chips are prototype. Complain, but allow use */
++	} else if (dd->ipath_minrev == 1 &&
++		!(dd->ipath_flags & IPATH_INITTED)) {
++		/* Rev1 chips are prototype. Complain at init, but allow use */
+ 		ipath_dev_err(dd, "Unsupported hardware "
+ 			      "revision %u.%u, Contact support at qlogic.com\n",
+ 			      dd->ipath_majrev, dd->ipath_minrev);
+@@ -1966,7 +1958,7 @@ static void ipath_7220_config_ports(struct ipath_devdata *dd, ushort cfgports)
+ 			 dd->ipath_rcvctrl);
+ 	dd->ipath_p0_rcvegrcnt = 2048; /* always */
+ 	if (dd->ipath_flags & IPATH_HAS_SEND_DMA)
+-		dd->ipath_pioreserved = 1; /* reserve a buffer */
++		dd->ipath_pioreserved = 3; /* kpiobufs used for PIO */
+ }
+ 
+ 
+diff --git a/drivers/infiniband/hw/ipath/ipath_init_chip.c b/drivers/infiniband/hw/ipath/ipath_init_chip.c
+index 27dd894..3e5baa4 100644
+--- a/drivers/infiniband/hw/ipath/ipath_init_chip.c
++++ b/drivers/infiniband/hw/ipath/ipath_init_chip.c
+@@ -41,7 +41,7 @@
+ /*
+  * min buffers we want to have per port, after driver
+  */
+-#define IPATH_MIN_USER_PORT_BUFCNT 8
++#define IPATH_MIN_USER_PORT_BUFCNT 7
+ 
+ /*
+  * Number of ports we are configured to use (to allow for more pio
+@@ -54,13 +54,9 @@ MODULE_PARM_DESC(cfgports, "Set max number of ports to use");
+ 
+ /*
+  * Number of buffers reserved for driver (verbs and layered drivers.)
+- * Reserved at end of buffer list.   Initialized based on
+- * number of PIO buffers if not set via module interface.
++ * Initialized based on number of PIO buffers if not set via module interface.
+  * The problem with this is that it's global, but we'll use different
+- * numbers for different chip types.  So the default value is not
+- * very useful.  I've redefined it for the 1.3 release so that it's
+- * zero unless set by the user to something else, in which case we
+- * try to respect it.
++ * numbers for different chip types.
+  */
+ static ushort ipath_kpiobufs;
+ 
+@@ -546,9 +542,12 @@ static void enable_chip(struct ipath_devdata *dd, int reinit)
+ 			pioavail = dd->ipath_pioavailregs_dma[i ^ 1];
+ 		else
+ 			pioavail = dd->ipath_pioavailregs_dma[i];
+-		dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail) |
+-			(~dd->ipath_pioavailkernel[i] <<
+-			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
++		/*
++		 * don't need to worry about ipath_pioavailkernel here
++		 * because we will call ipath_chg_pioavailkernel() later
++		 * in initialization, to busy out buffers as needed
++		 */
++		dd->ipath_pioavailshadow[i] = le64_to_cpu(pioavail);
+ 	}
+ 	/* can get counters, stats, etc. */
+ 	dd->ipath_flags |= IPATH_PRESENT;
+@@ -708,12 +707,11 @@ static void verify_interrupt(unsigned long opaque)
+ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
+ {
+ 	int ret = 0;
+-	u32 val32, kpiobufs;
++	u32 kpiobufs, defkbufs;
+ 	u32 piobufs, uports;
+ 	u64 val;
+ 	struct ipath_portdata *pd;
+ 	gfp_t gfp_flags = GFP_USER | __GFP_COMP;
+-	unsigned long flags;
+ 
+ 	ret = init_housekeeping(dd, reinit);
+ 	if (ret)
+@@ -753,56 +751,46 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
+ 	dd->ipath_pioavregs = ALIGN(piobufs, sizeof(u64) * BITS_PER_BYTE / 2)
+ 		/ (sizeof(u64) * BITS_PER_BYTE / 2);
+ 	uports = dd->ipath_cfgports ? dd->ipath_cfgports - 1 : 0;
+-	if (ipath_kpiobufs == 0) {
+-		/* not set by user (this is default) */
+-		if (piobufs > 144)
+-			kpiobufs = 32;
+-		else
+-			kpiobufs = 16;
+-	}
++	if (piobufs > 144)
++		defkbufs = 32 + dd->ipath_pioreserved;
+ 	else
+-		kpiobufs = ipath_kpiobufs;
++		defkbufs = 16 + dd->ipath_pioreserved;
+ 
+-	if (kpiobufs + (uports * IPATH_MIN_USER_PORT_BUFCNT) > piobufs) {
++	if (ipath_kpiobufs && (ipath_kpiobufs +
++		(uports * IPATH_MIN_USER_PORT_BUFCNT)) > piobufs) {
+ 		int i = (int) piobufs -
+ 			(int) (uports * IPATH_MIN_USER_PORT_BUFCNT);
+ 		if (i < 1)
+ 			i = 1;
+ 		dev_info(&dd->pcidev->dev, "Allocating %d PIO bufs of "
+ 			 "%d for kernel leaves too few for %d user ports "
+-			 "(%d each); using %u\n", kpiobufs,
++			 "(%d each); using %u\n", ipath_kpiobufs,
+ 			 piobufs, uports, IPATH_MIN_USER_PORT_BUFCNT, i);
+ 		/*
+ 		 * shouldn't change ipath_kpiobufs, because could be
+ 		 * different for different devices...
+ 		 */
+ 		kpiobufs = i;
+-	}
++	} else if (ipath_kpiobufs)
++		kpiobufs = ipath_kpiobufs;
++	else
++		kpiobufs = defkbufs;
+ 	dd->ipath_lastport_piobuf = piobufs - kpiobufs;
+ 	dd->ipath_pbufsport =
+ 		uports ? dd->ipath_lastport_piobuf / uports : 0;
+-	val32 = dd->ipath_lastport_piobuf - (dd->ipath_pbufsport * uports);
+-	if (val32 > 0) {
+-		ipath_dbg("allocating %u pbufs/port leaves %u unused, "
+-			  "add to kernel\n", dd->ipath_pbufsport, val32);
+-		dd->ipath_lastport_piobuf -= val32;
+-		kpiobufs += val32;
+-		ipath_dbg("%u pbufs/port leaves %u unused, add to kernel\n",
+-			  dd->ipath_pbufsport, val32);
+-	}
++	/* if not an even divisor, some user ports get extra buffers */
++	dd->ipath_ports_extrabuf = dd->ipath_lastport_piobuf -
++		(dd->ipath_pbufsport * uports);
++	if (dd->ipath_ports_extrabuf)
++		ipath_dbg("%u pbufs/port leaves some unused, add 1 buffer to "
++			"ports <= %u\n", dd->ipath_pbufsport,
++			dd->ipath_ports_extrabuf);
+ 	dd->ipath_lastpioindex = 0;
+ 	dd->ipath_lastpioindexl = dd->ipath_piobcnt2k;
+-	ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
++	/* ipath_pioavailshadow initialized earlier */
+ 	ipath_cdbg(VERBOSE, "%d PIO bufs for kernel out of %d total %u "
+ 		   "each for %u user ports\n", kpiobufs,
+ 		   piobufs, dd->ipath_pbufsport, uports);
+-	if (dd->ipath_pioupd_thresh) {
+-		if (dd->ipath_pbufsport < dd->ipath_pioupd_thresh)
+-			dd->ipath_pioupd_thresh = dd->ipath_pbufsport;
+-		if (kpiobufs < dd->ipath_pioupd_thresh)
+-			dd->ipath_pioupd_thresh = kpiobufs;
+-	}
+-
+ 	ret = dd->ipath_f_early_init(dd);
+ 	if (ret) {
+ 		ipath_dev_err(dd, "Early initialization failure\n");
+@@ -810,13 +798,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
+ 	}
+ 
+ 	/*
+-	 * Cancel any possible active sends from early driver load.
+-	 * Follows early_init because some chips have to initialize
+-	 * PIO buffers in early_init to avoid false parity errors.
+-	 */
+-	ipath_cancel_sends(dd, 0);
+-
+-	/*
+ 	 * Early_init sets rcvhdrentsize and rcvhdrsize, so this must be
+ 	 * done after early_init.
+ 	 */
+@@ -836,6 +817,7 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
+ 
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendpioavailaddr,
+ 			 dd->ipath_pioavailregs_phys);
++
+ 	/*
+ 	 * this is to detect s/w errors, which the h/w works around by
+ 	 * ignoring the low 6 bits of address, if it wasn't aligned.
+@@ -862,12 +844,6 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
+ 			 ~0ULL&~INFINIPATH_HWE_MEMBISTFAILED);
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control, 0ULL);
+ 
+-	spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+-	dd->ipath_sendctrl = INFINIPATH_S_PIOENABLE;
+-	ipath_write_kreg(dd, dd->ipath_kregs->kr_sendctrl, dd->ipath_sendctrl);
+-	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+-	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+-
+ 	/*
+ 	 * before error clears, since we expect serdes pll errors during
+ 	 * this, the first time after reset
+@@ -940,6 +916,19 @@ int ipath_init_chip(struct ipath_devdata *dd, int reinit)
+ 	else
+ 		enable_chip(dd, reinit);
+ 
++	/* after enable_chip, so pioavailshadow setup */
++	ipath_chg_pioavailkernel(dd, 0, piobufs, 1);
++
++	/*
++	 * Cancel any possible active sends from early driver load.
++	 * Follows early_init because some chips have to initialize
++	 * PIO buffers in early_init to avoid false parity errors.
++	 * After enable and ipath_chg_pioavailkernel so we can safely
++	 * enable pioavail updates and PIOENABLE; packets are now
++	 * ready to go out.
++	 */
++	ipath_cancel_sends(dd, 1);
++
+ 	if (!reinit) {
+ 		/*
+ 		 * Used when we close a port, for DMA already in flight
+diff --git a/drivers/infiniband/hw/ipath/ipath_intr.c b/drivers/infiniband/hw/ipath/ipath_intr.c
+index 1b58f47..26900b3 100644
+--- a/drivers/infiniband/hw/ipath/ipath_intr.c
++++ b/drivers/infiniband/hw/ipath/ipath_intr.c
+@@ -38,42 +38,12 @@
+ #include "ipath_verbs.h"
+ #include "ipath_common.h"
+ 
+-/*
+- * clear (write) a pio buffer, to clear a parity error.   This routine
+- * should only be called when in freeze mode, and the buffer should be
+- * canceled afterwards.
+- */
+-static void ipath_clrpiobuf(struct ipath_devdata *dd, u32 pnum)
+-{
+-	u32 __iomem *pbuf;
+-	u32 dwcnt; /* dword count to write */
+-	if (pnum < dd->ipath_piobcnt2k) {
+-		pbuf = (u32 __iomem *) (dd->ipath_pio2kbase + pnum *
+-			dd->ipath_palign);
+-		dwcnt = dd->ipath_piosize2k >> 2;
+-	}
+-	else {
+-		pbuf = (u32 __iomem *) (dd->ipath_pio4kbase +
+-			(pnum - dd->ipath_piobcnt2k) * dd->ipath_4kalign);
+-		dwcnt = dd->ipath_piosize4k >> 2;
+-	}
+-	dev_info(&dd->pcidev->dev,
+-		"Rewrite PIO buffer %u, to recover from parity error\n",
+-		pnum);
+-
+-	/* no flush required, since already in freeze */
+-	writel(dwcnt + 1, pbuf);
+-	while (--dwcnt)
+-		writel(0, pbuf++);
+-}
+ 
+ /*
+  * Called when we might have an error that is specific to a particular
+  * PIO buffer, and may need to cancel that buffer, so it can be re-used.
+- * If rewrite is true, and bits are set in the sendbufferror registers,
+- * we'll write to the buffer, for error recovery on parity errors.
+  */
+-void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
++void ipath_disarm_senderrbufs(struct ipath_devdata *dd)
+ {
+ 	u32 piobcnt;
+ 	unsigned long sbuf[4];
+@@ -109,11 +79,8 @@ void ipath_disarm_senderrbufs(struct ipath_devdata *dd, int rewrite)
+ 		}
+ 
+ 		for (i = 0; i < piobcnt; i++)
+-			if (test_bit(i, sbuf)) {
+-				if (rewrite)
+-					ipath_clrpiobuf(dd, i);
++			if (test_bit(i, sbuf))
+ 				ipath_disarm_piobufs(dd, i, 1);
+-			}
+ 		/* ignore armlaunch errs for a bit */
+ 		dd->ipath_lastcancel = jiffies+3;
+ 	}
+@@ -164,7 +131,7 @@ static u64 handle_e_sum_errs(struct ipath_devdata *dd, ipath_err_t errs)
+ {
+ 	u64 ignore_this_time = 0;
+ 
+-	ipath_disarm_senderrbufs(dd, 0);
++	ipath_disarm_senderrbufs(dd);
+ 	if ((errs & E_SUM_LINK_PKTERRS) &&
+ 	    !(dd->ipath_flags & IPATH_LINKACTIVE)) {
+ 		/*
+@@ -909,8 +876,8 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
+  * processes (causing armlaunch), send errors due to going into freeze mode,
+  * etc., and try to avoid causing extra interrupts while doing so.
+  * Forcibly update the in-memory pioavail register copies after cleanup
+- * because the chip won't do it for anything changing while in freeze mode
+- * (we don't want to wait for the next pio buffer state change).
++ * because the chip won't do it while in freeze mode (the register values
++ * themselves are kept correct).
+  * Make sure that we don't lose any important interrupts by using the chip
+  * feature that says that writing 0 to a bit in *clear that is set in
+  * *status will cause an interrupt to be generated again (if allowed by
+@@ -918,44 +885,23 @@ static int handle_errors(struct ipath_devdata *dd, ipath_err_t errs)
+  */
+ void ipath_clear_freeze(struct ipath_devdata *dd)
+ {
+-	int i, im;
+-	u64 val;
+-
+ 	/* disable error interrupts, to avoid confusion */
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_errormask, 0ULL);
+ 
+ 	/* also disable interrupts; errormask is sometimes overwriten */
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_intmask, 0ULL);
+ 
+-	/*
+-	 * clear all sends, because they have may been
+-	 * completed by usercode while in freeze mode, and
+-	 * therefore would not be sent, and eventually
+-	 * might cause the process to run out of bufs
+-	 */
+-	ipath_cancel_sends(dd, 0);
++	ipath_cancel_sends(dd, 1);
++
++	/* clear the freeze, and be sure chip saw it */
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_control,
+ 			 dd->ipath_control);
++	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ 
+-	/* ensure pio avail updates continue */
++	/* force in-memory update now we are out of freeze */
+ 	ipath_force_pio_avail_update(dd);
+ 
+ 	/*
+-	 * We just enabled pioavailupdate, so dma copy is almost certainly
+-	 * not yet right, so read the registers directly.  Similar to init
+-	 */
+-	for (i = 0; i < dd->ipath_pioavregs; i++) {
+-		/* deal with 6110 chip bug */
+-		im = (i > 3 && (dd->ipath_flags & IPATH_SWAP_PIOBUFS)) ?
+-			i ^ 1 : i;
+-		val = ipath_read_kreg64(dd, (0x1000 / sizeof(u64)) + im);
+-		dd->ipath_pioavailregs_dma[i] = cpu_to_le64(val);
+-		dd->ipath_pioavailshadow[i] = val |
+-			(~dd->ipath_pioavailkernel[i] <<
+-			INFINIPATH_SENDPIOAVAIL_BUSY_SHIFT);
+-	}
+-
+-	/*
+ 	 * force new interrupt if any hwerr, error or interrupt bits are
+ 	 * still set, and clear "safe" send packet errors related to freeze
+ 	 * and cancelling sends.  Re-enable error interrupts before possible
+@@ -1312,10 +1258,8 @@ irqreturn_t ipath_intr(int irq, void *data)
+ 		ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ 		spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ 
+-		if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA))
+-			handle_layer_pioavail(dd);
+-		else
+-			ipath_dbg("unexpected BUFAVAIL intr\n");
++		/* always process; sdma verbs uses PIO for acks and VL15  */
++		handle_layer_pioavail(dd);
+ 	}
+ 
+ 	ret = IRQ_HANDLED;
+diff --git a/drivers/infiniband/hw/ipath/ipath_kernel.h b/drivers/infiniband/hw/ipath/ipath_kernel.h
+index 202337a..02b24a3 100644
+--- a/drivers/infiniband/hw/ipath/ipath_kernel.h
++++ b/drivers/infiniband/hw/ipath/ipath_kernel.h
+@@ -117,6 +117,10 @@ struct ipath_portdata {
+ 	u16 port_subport_cnt;
+ 	/* non-zero if port is being shared. */
+ 	u16 port_subport_id;
++	/* number of pio bufs for this port (all procs, if shared) */
++	u32 port_piocnt;
++	/* first pio buffer for this port */
++	u32 port_pio_base;
+ 	/* chip offset of PIO buffers for this port */
+ 	u32 port_piobufs;
+ 	/* how many alloc_pages() chunks in port_rcvegrbuf_pages */
+@@ -384,6 +388,8 @@ struct ipath_devdata {
+ 	u32 ipath_lastrpkts;
+ 	/* pio bufs allocated per port */
+ 	u32 ipath_pbufsport;
++	/* if remainder on bufs/port, ports < extrabuf get 1 extra */
++	u32 ipath_ports_extrabuf;
+ 	u32 ipath_pioupd_thresh; /* update threshold, some chips */
+ 	/*
+ 	 * number of ports configured as max; zero is set to number chip
+@@ -1011,7 +1017,7 @@ void ipath_get_eeprom_info(struct ipath_devdata *);
+ int ipath_update_eeprom_log(struct ipath_devdata *dd);
+ void ipath_inc_eeprom_err(struct ipath_devdata *dd, u32 eidx, u32 incr);
+ u64 ipath_snap_cntr(struct ipath_devdata *, ipath_creg);
+-void ipath_disarm_senderrbufs(struct ipath_devdata *, int);
++void ipath_disarm_senderrbufs(struct ipath_devdata *);
+ void ipath_force_pio_avail_update(struct ipath_devdata *);
+ void signal_ib_event(struct ipath_devdata *dd, enum ib_event_type ev);
+ 
+diff --git a/drivers/infiniband/hw/ipath/ipath_rc.c b/drivers/infiniband/hw/ipath/ipath_rc.c
+index c405dfb..08b11b5 100644
+--- a/drivers/infiniband/hw/ipath/ipath_rc.c
++++ b/drivers/infiniband/hw/ipath/ipath_rc.c
+@@ -1746,7 +1746,11 @@ void ipath_rc_rcv(struct ipath_ibdev *dev, struct ipath_ib_header *hdr,
+ 		qp->r_wrid_valid = 0;
+ 		wc.wr_id = qp->r_wr_id;
+ 		wc.status = IB_WC_SUCCESS;
+-		wc.opcode = IB_WC_RECV;
++		if (opcode == OP(RDMA_WRITE_LAST_WITH_IMMEDIATE) ||
++		    opcode == OP(RDMA_WRITE_ONLY_WITH_IMMEDIATE))
++			wc.opcode = IB_WC_RECV_RDMA_WITH_IMM;
++		else
++			wc.opcode = IB_WC_RECV;
+ 		wc.vendor_err = 0;
+ 		wc.qp = &qp->ibqp;
+ 		wc.src_qp = qp->remote_qpn;
+diff --git a/drivers/infiniband/hw/ipath/ipath_ruc.c b/drivers/infiniband/hw/ipath/ipath_ruc.c
+index 8ac5c1d..9e3fe61 100644
+--- a/drivers/infiniband/hw/ipath/ipath_ruc.c
++++ b/drivers/infiniband/hw/ipath/ipath_ruc.c
+@@ -481,9 +481,10 @@ done:
+ 		wake_up(&qp->wait);
+ }
+ 
+-static void want_buffer(struct ipath_devdata *dd)
++static void want_buffer(struct ipath_devdata *dd, struct ipath_qp *qp)
+ {
+-	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA)) {
++	if (!(dd->ipath_flags & IPATH_HAS_SEND_DMA) ||
++		qp->ibqp.qp_type == IB_QPT_SMI) {
+ 		unsigned long flags;
+ 
+ 		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+@@ -519,7 +520,7 @@ static void ipath_no_bufs_available(struct ipath_qp *qp,
+ 	spin_lock_irqsave(&dev->pending_lock, flags);
+ 	list_add_tail(&qp->piowait, &dev->piowait);
+ 	spin_unlock_irqrestore(&dev->pending_lock, flags);
+-	want_buffer(dev->dd);
++	want_buffer(dev->dd, qp);
+ 	dev->n_piowait++;
+ }
+ 
+diff --git a/drivers/infiniband/hw/ipath/ipath_sdma.c b/drivers/infiniband/hw/ipath/ipath_sdma.c
+index 1974df7..3697449 100644
+--- a/drivers/infiniband/hw/ipath/ipath_sdma.c
++++ b/drivers/infiniband/hw/ipath/ipath_sdma.c
+@@ -308,13 +308,15 @@ static void sdma_abort_task(unsigned long opaque)
+ 		spin_unlock_irqrestore(&dd->ipath_sdma_lock, flags);
+ 
+ 		/*
+-		 * Don't restart sdma here. Wait until link is up to ACTIVE.
+-		 * VL15 MADs used to bring the link up use PIO, and multiple
+-		 * link transitions otherwise cause the sdma engine to be
++		 * Don't restart sdma here (with the exception
++		 * below). Wait until link is up to ACTIVE.  VL15 MADs
++		 * used to bring the link up use PIO, and multiple link
++		 * transitions otherwise cause the sdma engine to be
+ 		 * stopped and started multiple times.
+-		 * The disable is done here, including the shadow, so the
+-		 * state is kept consistent.
+-		 * See ipath_restart_sdma() for the actual starting of sdma.
++		 * The disable is done here, including the shadow,
++		 * so the state is kept consistent.
++		 * See ipath_restart_sdma() for the actual starting
++		 * of sdma.
+ 		 */
+ 		spin_lock_irqsave(&dd->ipath_sendctrl_lock, flags);
+ 		dd->ipath_sendctrl &= ~INFINIPATH_S_SDMAENABLE;
+@@ -326,6 +328,13 @@ static void sdma_abort_task(unsigned long opaque)
+ 		/* make sure I see next message */
+ 		dd->ipath_sdma_abort_jiffies = 0;
+ 
++		/*
++		 * Not everything that takes SDMA offline is a link
++		 * status change.  If the link was up, restart SDMA.
++		 */
++		if (dd->ipath_flags & IPATH_LINKACTIVE)
++			ipath_restart_sdma(dd);
++
+ 		goto done;
+ 	}
+ 
+@@ -427,7 +436,12 @@ int setup_sdma(struct ipath_devdata *dd)
+ 		goto done;
+ 	}
+ 
+-	dd->ipath_sdma_status = 0;
++	/*
++	 * Set initial status as if we had been up, then gone down.
++	 * This lets initial start on transition to ACTIVE be the
++	 * same as restart after link flap.
++	 */
++	dd->ipath_sdma_status = IPATH_SDMA_ABORT_ABORTED;
+ 	dd->ipath_sdma_abort_jiffies = 0;
+ 	dd->ipath_sdma_generation = 0;
+ 	dd->ipath_sdma_descq_tail = 0;
+@@ -449,16 +463,19 @@ int setup_sdma(struct ipath_devdata *dd)
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmaheadaddr,
+ 			 dd->ipath_sdma_head_phys);
+ 
+-	/* Reserve all the former "kernel" piobufs */
+-	n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k - dd->ipath_pioreserved;
+-	for (i = dd->ipath_lastport_piobuf; i < n; ++i) {
++	/*
++	 * Reserve all the former "kernel" piobufs, using high number range
++	 * so we get as many 4K buffers as possible
++	 */
++	n = dd->ipath_piobcnt2k + dd->ipath_piobcnt4k;
++	i = dd->ipath_lastport_piobuf + dd->ipath_pioreserved;
++	ipath_chg_pioavailkernel(dd, i, n - i , 0);
++	for (; i < n; ++i) {
+ 		unsigned word = i / 64;
+ 		unsigned bit = i & 63;
+ 		BUG_ON(word >= 3);
+ 		senddmabufmask[word] |= 1ULL << bit;
+ 	}
+-	ipath_chg_pioavailkernel(dd, dd->ipath_lastport_piobuf,
+-		n - dd->ipath_lastport_piobuf, 0);
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask0,
+ 			 senddmabufmask[0]);
+ 	ipath_write_kreg(dd, dd->ipath_kregs->kr_senddmabufmask1,
+@@ -615,6 +632,9 @@ void ipath_restart_sdma(struct ipath_devdata *dd)
+ 	ipath_read_kreg64(dd, dd->ipath_kregs->kr_scratch);
+ 	spin_unlock_irqrestore(&dd->ipath_sendctrl_lock, flags);
+ 
++	/* notify upper layers */
++	ipath_ib_piobufavail(dd->verbs_dev);
++
+ bail:
+ 	return;
+ }
+diff --git a/drivers/infiniband/hw/ipath/ipath_verbs.c b/drivers/infiniband/hw/ipath/ipath_verbs.c
+index e63927c..5015cd2 100644
+--- a/drivers/infiniband/hw/ipath/ipath_verbs.c
++++ b/drivers/infiniband/hw/ipath/ipath_verbs.c
+@@ -396,7 +396,6 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+ 
+ 	wqe = get_swqe_ptr(qp, qp->s_head);
+ 	wqe->wr = *wr;
+-	wqe->ssn = qp->s_ssn++;
+ 	wqe->length = 0;
+ 	if (wr->num_sge) {
+ 		acc = wr->opcode >= IB_WR_RDMA_READ ?
+@@ -422,6 +421,7 @@ static int ipath_post_one_send(struct ipath_qp *qp, struct ib_send_wr *wr)
+ 			goto bail_inval;
+ 	} else if (wqe->length > to_idev(qp->ibqp.device)->dd->ipath_ibmtu)
+ 		goto bail_inval;
++	wqe->ssn = qp->s_ssn++;
+ 	qp->s_head = next;
+ 
+ 	ret = 0;
+diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
+index 2f199c5..4521319 100644
+--- a/drivers/infiniband/hw/mlx4/cq.c
++++ b/drivers/infiniband/hw/mlx4/cq.c
+@@ -246,7 +246,7 @@ err_mtt:
+ 	if (context)
+ 		ib_umem_release(cq->umem);
+ 	else
+-		mlx4_ib_free_cq_buf(dev, &cq->buf, entries);
++		mlx4_ib_free_cq_buf(dev, &cq->buf, cq->ibcq.cqe);
+ 
+ err_db:
+ 	if (!context)
+@@ -434,7 +434,7 @@ int mlx4_ib_destroy_cq(struct ib_cq *cq)
+ 		mlx4_ib_db_unmap_user(to_mucontext(cq->uobject->context), &mcq->db);
+ 		ib_umem_release(mcq->umem);
+ 	} else {
+-		mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe + 1);
++		mlx4_ib_free_cq_buf(dev, &mcq->buf, cq->cqe);
+ 		mlx4_db_free(dev->dev, &mcq->db);
+ 	}
+ 
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib.h b/drivers/infiniband/ulp/ipoib/ipoib.h
+index 9044f88..ca126fc 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib.h
++++ b/drivers/infiniband/ulp/ipoib/ipoib.h
+@@ -334,6 +334,7 @@ struct ipoib_dev_priv {
+ #endif
+ 	int	hca_caps;
+ 	struct ipoib_ethtool_st ethtool;
++	struct timer_list poll_timer;
+ };
+ 
+ struct ipoib_ah {
+@@ -404,6 +405,7 @@ extern struct workqueue_struct *ipoib_workqueue;
+ 
+ int ipoib_poll(struct napi_struct *napi, int budget);
+ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr);
++void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr);
+ 
+ struct ipoib_ah *ipoib_create_ah(struct net_device *dev,
+ 				 struct ib_pd *pd, struct ib_ah_attr *attr);
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_ib.c b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+index 97b815c..f429bce 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_ib.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_ib.c
+@@ -461,6 +461,26 @@ void ipoib_ib_completion(struct ib_cq *cq, void *dev_ptr)
+ 	netif_rx_schedule(dev, &priv->napi);
+ }
+ 
++static void drain_tx_cq(struct net_device *dev)
++{
++	struct ipoib_dev_priv *priv = netdev_priv(dev);
++	unsigned long flags;
++
++	spin_lock_irqsave(&priv->tx_lock, flags);
++	while (poll_tx(priv))
++		; /* nothing */
++
++	if (netif_queue_stopped(dev))
++		mod_timer(&priv->poll_timer, jiffies + 1);
++
++	spin_unlock_irqrestore(&priv->tx_lock, flags);
++}
++
++void ipoib_send_comp_handler(struct ib_cq *cq, void *dev_ptr)
++{
++	drain_tx_cq((struct net_device *)dev_ptr);
++}
++
+ static inline int post_send(struct ipoib_dev_priv *priv,
+ 			    unsigned int wr_id,
+ 			    struct ib_ah *address, u32 qpn,
+@@ -555,12 +575,22 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ 	else
+ 		priv->tx_wr.send_flags &= ~IB_SEND_IP_CSUM;
+ 
++	if (++priv->tx_outstanding == ipoib_sendq_size) {
++		ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
++		if (ib_req_notify_cq(priv->send_cq, IB_CQ_NEXT_COMP))
++			ipoib_warn(priv, "request notify on send CQ failed\n");
++		netif_stop_queue(dev);
++	}
++
+ 	if (unlikely(post_send(priv, priv->tx_head & (ipoib_sendq_size - 1),
+ 			       address->ah, qpn, tx_req, phead, hlen))) {
+ 		ipoib_warn(priv, "post_send failed\n");
+ 		++dev->stats.tx_errors;
++		--priv->tx_outstanding;
+ 		ipoib_dma_unmap_tx(priv->ca, tx_req);
+ 		dev_kfree_skb_any(skb);
++		if (netif_queue_stopped(dev))
++			netif_wake_queue(dev);
+ 	} else {
+ 		dev->trans_start = jiffies;
+ 
+@@ -568,14 +598,11 @@ void ipoib_send(struct net_device *dev, struct sk_buff *skb,
+ 		++priv->tx_head;
+ 		skb_orphan(skb);
+ 
+-		if (++priv->tx_outstanding == ipoib_sendq_size) {
+-			ipoib_dbg(priv, "TX ring full, stopping kernel net queue\n");
+-			netif_stop_queue(dev);
+-		}
+ 	}
+ 
+ 	if (unlikely(priv->tx_outstanding > MAX_SEND_CQE))
+-		poll_tx(priv);
++		while (poll_tx(priv))
++			; /* nothing */
+ }
+ 
+ static void __ipoib_reap_ah(struct net_device *dev)
+@@ -609,6 +636,11 @@ void ipoib_reap_ah(struct work_struct *work)
+ 				   round_jiffies_relative(HZ));
+ }
+ 
++static void ipoib_ib_tx_timer_func(unsigned long ctx)
++{
++	drain_tx_cq((struct net_device *)ctx);
++}
++
+ int ipoib_ib_dev_open(struct net_device *dev)
+ {
+ 	struct ipoib_dev_priv *priv = netdev_priv(dev);
+@@ -645,6 +677,10 @@ int ipoib_ib_dev_open(struct net_device *dev)
+ 	queue_delayed_work(ipoib_workqueue, &priv->ah_reap_task,
+ 			   round_jiffies_relative(HZ));
+ 
++	init_timer(&priv->poll_timer);
++	priv->poll_timer.function = ipoib_ib_tx_timer_func;
++	priv->poll_timer.data = (unsigned long)dev;
++
+ 	set_bit(IPOIB_FLAG_INITIALIZED, &priv->flags);
+ 
+ 	return 0;
+@@ -810,6 +846,7 @@ int ipoib_ib_dev_stop(struct net_device *dev, int flush)
+ 	ipoib_dbg(priv, "All sends and receives done.\n");
+ 
+ timeout:
++	del_timer_sync(&priv->poll_timer);
+ 	qp_attr.qp_state = IB_QPS_RESET;
+ 	if (ib_modify_qp(priv->qp, &qp_attr, IB_QP_STATE))
+ 		ipoib_warn(priv, "Failed to modify QP to RESET state\n");
+diff --git a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+index c1e7ece..8766d29 100644
+--- a/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
++++ b/drivers/infiniband/ulp/ipoib/ipoib_verbs.c
+@@ -187,7 +187,8 @@ int ipoib_transport_dev_init(struct net_device *dev, struct ib_device *ca)
+ 		goto out_free_mr;
+ 	}
+ 
+-	priv->send_cq = ib_create_cq(priv->ca, NULL, NULL, dev, ipoib_sendq_size, 0);
++	priv->send_cq = ib_create_cq(priv->ca, ipoib_send_comp_handler, NULL,
++				     dev, ipoib_sendq_size, 0);
+ 	if (IS_ERR(priv->send_cq)) {
+ 		printk(KERN_WARNING "%s: failed to create send CQ\n", ca->name);
+ 		goto out_free_recv_cq;
+diff --git a/drivers/input/misc/Kconfig b/drivers/input/misc/Kconfig
+index 92b6834..3ad8bd9 100644
+--- a/drivers/input/misc/Kconfig
++++ b/drivers/input/misc/Kconfig
+@@ -14,7 +14,7 @@ if INPUT_MISC
+ 
+ config INPUT_PCSPKR
+ 	tristate "PC Speaker support"
+-	depends on ALPHA || X86 || MIPS || PPC_PREP || PPC_CHRP || PPC_PSERIES
++	depends on PCSPKR_PLATFORM
+ 	depends on SND_PCSP=n
+ 	help
+ 	  Say Y here if you want the standard PC Speaker to be used for
+diff --git a/drivers/input/serio/hp_sdc.c b/drivers/input/serio/hp_sdc.c
+index 02b3ad8..edfedd9 100644
+--- a/drivers/input/serio/hp_sdc.c
++++ b/drivers/input/serio/hp_sdc.c
+@@ -69,6 +69,7 @@
+ #include <linux/time.h>
+ #include <linux/slab.h>
+ #include <linux/hil.h>
++#include <linux/semaphore.h>
+ #include <asm/io.h>
+ #include <asm/system.h>
+ 
+diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c
+index 2097820..b8b9e44 100644
+--- a/drivers/macintosh/adb.c
++++ b/drivers/macintosh/adb.c
+@@ -37,7 +37,7 @@
+ #include <linux/device.h>
+ #include <linux/kthread.h>
+ #include <linux/platform_device.h>
+-#include <linux/semaphore.h>
++#include <linux/mutex.h>
+ 
+ #include <asm/uaccess.h>
+ #ifdef CONFIG_PPC
+@@ -102,7 +102,7 @@ static struct adb_handler {
+ } adb_handler[16];
+ 
+ /*
+- * The adb_handler_sem mutex protects all accesses to the original_address
++ * The adb_handler_mutex mutex protects all accesses to the original_address
+  * and handler_id fields of adb_handler[i] for all i, and changes to the
+  * handler field.
+  * Accesses to the handler field are protected by the adb_handler_lock
+@@ -110,7 +110,7 @@ static struct adb_handler {
+  * time adb_unregister returns, we know that the old handler isn't being
+  * called.
+  */
+-static DECLARE_MUTEX(adb_handler_sem);
++static DEFINE_MUTEX(adb_handler_mutex);
+ static DEFINE_RWLOCK(adb_handler_lock);
+ 
+ #if 0
+@@ -355,7 +355,7 @@ do_adb_reset_bus(void)
+ 		msleep(500);
+ 	}
+ 
+-	down(&adb_handler_sem);
++	mutex_lock(&adb_handler_mutex);
+ 	write_lock_irq(&adb_handler_lock);
+ 	memset(adb_handler, 0, sizeof(adb_handler));
+ 	write_unlock_irq(&adb_handler_lock);
+@@ -376,7 +376,7 @@ do_adb_reset_bus(void)
+ 		if (adb_controller->autopoll)
+ 			adb_controller->autopoll(autopoll_devs);
+ 	}
+-	up(&adb_handler_sem);
++	mutex_unlock(&adb_handler_mutex);
+ 
+ 	blocking_notifier_call_chain(&adb_client_list,
+ 		ADB_MSG_POST_RESET, NULL);
+@@ -454,7 +454,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids,
+ {
+ 	int i;
+ 
+-	down(&adb_handler_sem);
++	mutex_lock(&adb_handler_mutex);
+ 	ids->nids = 0;
+ 	for (i = 1; i < 16; i++) {
+ 		if ((adb_handler[i].original_address == default_id) &&
+@@ -472,7 +472,7 @@ adb_register(int default_id, int handler_id, struct adb_ids *ids,
+ 			ids->id[ids->nids++] = i;
+ 		}
+ 	}
+-	up(&adb_handler_sem);
++	mutex_unlock(&adb_handler_mutex);
+ 	return ids->nids;
+ }
+ 
+@@ -481,7 +481,7 @@ adb_unregister(int index)
+ {
+ 	int ret = -ENODEV;
+ 
+-	down(&adb_handler_sem);
++	mutex_lock(&adb_handler_mutex);
+ 	write_lock_irq(&adb_handler_lock);
+ 	if (adb_handler[index].handler) {
+ 		while(adb_handler[index].busy) {
+@@ -493,7 +493,7 @@ adb_unregister(int index)
+ 		adb_handler[index].handler = NULL;
+ 	}
+ 	write_unlock_irq(&adb_handler_lock);
+-	up(&adb_handler_sem);
++	mutex_unlock(&adb_handler_mutex);
+ 	return ret;
+ }
+ 
+@@ -557,19 +557,19 @@ adb_try_handler_change(int address, int new_id)
+ {
+ 	int ret;
+ 
+-	down(&adb_handler_sem);
++	mutex_lock(&adb_handler_mutex);
+ 	ret = try_handler_change(address, new_id);
+-	up(&adb_handler_sem);
++	mutex_unlock(&adb_handler_mutex);
+ 	return ret;
+ }
+ 
+ int
+ adb_get_infos(int address, int *original_address, int *handler_id)
+ {
+-	down(&adb_handler_sem);
++	mutex_lock(&adb_handler_mutex);
+ 	*original_address = adb_handler[address].original_address;
+ 	*handler_id = adb_handler[address].handler_id;
+-	up(&adb_handler_sem);
++	mutex_unlock(&adb_handler_mutex);
+ 
+ 	return (*original_address != 0);
+ }
+@@ -628,10 +628,10 @@ do_adb_query(struct adb_request *req)
+ 	case ADB_QUERY_GETDEVINFO:
+ 		if (req->nbytes < 3)
+ 			break;
+-		down(&adb_handler_sem);
++		mutex_lock(&adb_handler_mutex);
+ 		req->reply[0] = adb_handler[req->data[2]].original_address;
+ 		req->reply[1] = adb_handler[req->data[2]].handler_id;
+-		up(&adb_handler_sem);
++		mutex_unlock(&adb_handler_mutex);
+ 		req->complete = 1;
+ 		req->reply_len = 2;
+ 		adb_write_done(req);
+diff --git a/drivers/macintosh/therm_pm72.c b/drivers/macintosh/therm_pm72.c
+index 1e0a69a..ddfb426 100644
+--- a/drivers/macintosh/therm_pm72.c
++++ b/drivers/macintosh/therm_pm72.c
+@@ -122,6 +122,7 @@
+ #include <linux/kmod.h>
+ #include <linux/i2c.h>
+ #include <linux/kthread.h>
++#include <linux/mutex.h>
+ #include <asm/prom.h>
+ #include <asm/machdep.h>
+ #include <asm/io.h>
+@@ -169,7 +170,7 @@ static int				rackmac;
+ static s32				dimm_output_clamp;
+ static int 				fcu_rpm_shift;
+ static int				fcu_tickle_ticks;
+-static DECLARE_MUTEX(driver_lock);
++static DEFINE_MUTEX(driver_lock);
+ 
+ /*
+  * We have 3 types of CPU PID control. One is "split" old style control
+@@ -729,9 +730,9 @@ static void fetch_cpu_pumps_minmax(void)
+ static ssize_t show_##name(struct device *dev, struct device_attribute *attr, char *buf)	\
+ {								\
+ 	ssize_t r;						\
+-	down(&driver_lock);					\
++	mutex_lock(&driver_lock);					\
+ 	r = sprintf(buf, "%d.%03d", FIX32TOPRINT(data));	\
+-	up(&driver_lock);					\
++	mutex_unlock(&driver_lock);					\
+ 	return r;						\
+ }
+ #define BUILD_SHOW_FUNC_INT(name, data)				\
+@@ -1803,11 +1804,11 @@ static int main_control_loop(void *x)
+ {
+ 	DBG("main_control_loop started\n");
+ 
+-	down(&driver_lock);
++	mutex_lock(&driver_lock);
+ 
+ 	if (start_fcu() < 0) {
+ 		printk(KERN_ERR "kfand: failed to start FCU\n");
+-		up(&driver_lock);
++		mutex_unlock(&driver_lock);
+ 		goto out;
+ 	}
+ 
+@@ -1822,14 +1823,14 @@ static int main_control_loop(void *x)
+ 
+ 	fcu_tickle_ticks = FCU_TICKLE_TICKS;
+ 
+-	up(&driver_lock);
++	mutex_unlock(&driver_lock);
+ 
+ 	while (state == state_attached) {
+ 		unsigned long elapsed, start;
+ 
+ 		start = jiffies;
+ 
+-		down(&driver_lock);
++		mutex_lock(&driver_lock);
+ 
+ 		/* Tickle the FCU just in case */
+ 		if (--fcu_tickle_ticks < 0) {
+@@ -1861,7 +1862,7 @@ static int main_control_loop(void *x)
+ 			do_monitor_slots(&slots_state);
+ 		else
+ 			do_monitor_drives(&drives_state);
+-		up(&driver_lock);
++		mutex_unlock(&driver_lock);
+ 
+ 		if (critical_state == 1) {
+ 			printk(KERN_WARNING "Temperature control detected a critical condition\n");
+@@ -2019,13 +2020,13 @@ static void detach_fcu(void)
+  */
+ static int therm_pm72_attach(struct i2c_adapter *adapter)
+ {
+-	down(&driver_lock);
++	mutex_lock(&driver_lock);
+ 
+ 	/* Check state */
+ 	if (state == state_detached)
+ 		state = state_attaching;
+ 	if (state != state_attaching) {
+-		up(&driver_lock);
++		mutex_unlock(&driver_lock);
+ 		return 0;
+ 	}
+ 
+@@ -2054,7 +2055,7 @@ static int therm_pm72_attach(struct i2c_adapter *adapter)
+ 		state = state_attached;
+ 		start_control_loops();
+ 	}
+-	up(&driver_lock);
++	mutex_unlock(&driver_lock);
+ 
+ 	return 0;
+ }
+@@ -2065,16 +2066,16 @@ static int therm_pm72_attach(struct i2c_adapter *adapter)
+  */
+ static int therm_pm72_detach(struct i2c_adapter *adapter)
+ {
+-	down(&driver_lock);
++	mutex_lock(&driver_lock);
+ 
+ 	if (state != state_detached)
+ 		state = state_detaching;
+ 
+ 	/* Stop control loops if any */
+ 	DBG("stopping control loops\n");
+-	up(&driver_lock);
++	mutex_unlock(&driver_lock);
+ 	stop_control_loops();
+-	down(&driver_lock);
++	mutex_lock(&driver_lock);
+ 
+ 	if (u3_0 != NULL && !strcmp(adapter->name, "u3 0")) {
+ 		DBG("lost U3-0, disposing control loops\n");
+@@ -2090,7 +2091,7 @@ static int therm_pm72_detach(struct i2c_adapter *adapter)
+ 	if (u3_0 == NULL && u3_1 == NULL)
+ 		state = state_detached;
+ 
+-	up(&driver_lock);
++	mutex_unlock(&driver_lock);
+ 
+ 	return 0;
+ }
+diff --git a/drivers/macintosh/windfarm_smu_sat.c b/drivers/macintosh/windfarm_smu_sat.c
+index 797918d..7f2be4b 100644
+--- a/drivers/macintosh/windfarm_smu_sat.c
++++ b/drivers/macintosh/windfarm_smu_sat.c
+@@ -13,7 +13,7 @@
+ #include <linux/init.h>
+ #include <linux/wait.h>
+ #include <linux/i2c.h>
+-#include <linux/semaphore.h>
++#include <linux/mutex.h>
+ #include <asm/prom.h>
+ #include <asm/smu.h>
+ #include <asm/pmac_low_i2c.h>
+@@ -36,7 +36,7 @@
+ struct wf_sat {
+ 	int			nr;
+ 	atomic_t		refcnt;
+-	struct semaphore	mutex;
++	struct mutex		mutex;
+ 	unsigned long		last_read; /* jiffies when cache last updated */
+ 	u8			cache[16];
+ 	struct i2c_client	i2c;
+@@ -163,7 +163,7 @@ static int wf_sat_get(struct wf_sensor *sr, s32 *value)
+ 	if (sat->i2c.adapter == NULL)
+ 		return -ENODEV;
+ 
+-	down(&sat->mutex);
++	mutex_lock(&sat->mutex);
+ 	if (time_after(jiffies, (sat->last_read + MAX_AGE))) {
+ 		err = wf_sat_read_cache(sat);
+ 		if (err)
+@@ -182,7 +182,7 @@ static int wf_sat_get(struct wf_sensor *sr, s32 *value)
+ 	err = 0;
+ 
+  fail:
+-	up(&sat->mutex);
++	mutex_unlock(&sat->mutex);
+ 	return err;
+ }
+ 
+@@ -233,7 +233,7 @@ static void wf_sat_create(struct i2c_adapter *adapter, struct device_node *dev)
+ 	sat->nr = -1;
+ 	sat->node = of_node_get(dev);
+ 	atomic_set(&sat->refcnt, 0);
+-	init_MUTEX(&sat->mutex);
++	mutex_init(&sat->mutex);
+ 	sat->i2c.addr = (addr >> 1) & 0x7f;
+ 	sat->i2c.adapter = adapter;
+ 	sat->i2c.driver = &wf_sat_driver;
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 5938fa9..faf3d89 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -886,7 +886,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
+ 	 */
+ 	raid10_find_phys(conf, r10_bio);
+  retry_write:
+-	blocked_rdev = 0;
++	blocked_rdev = NULL;
+ 	rcu_read_lock();
+ 	for (i = 0;  i < conf->copies; i++) {
+ 		int d = r10_bio->devs[i].devnum;
+diff --git a/drivers/media/Makefile b/drivers/media/Makefile
+index 73f742c..cc11c4c 100644
+--- a/drivers/media/Makefile
++++ b/drivers/media/Makefile
+@@ -2,6 +2,8 @@
+ # Makefile for the kernel multimedia device drivers.
+ #
+ 
++obj-y := common/
++
+ obj-$(CONFIG_VIDEO_MEDIA) += common/
+ 
+ # Since hybrid devices are here, should be compiled if DVB and/or V4L
+diff --git a/drivers/media/video/cx18/cx18-driver.c b/drivers/media/video/cx18/cx18-driver.c
+index 8f5ed9b..3f55d47 100644
+--- a/drivers/media/video/cx18/cx18-driver.c
++++ b/drivers/media/video/cx18/cx18-driver.c
+@@ -613,7 +613,7 @@ static int __devinit cx18_probe(struct pci_dev *dev,
+ 	}
+ 
+ 	cx = kzalloc(sizeof(struct cx18), GFP_ATOMIC);
+-	if (cx == 0) {
++	if (!cx) {
+ 		spin_unlock(&cx18_cards_lock);
+ 		return -ENOMEM;
+ 	}
+diff --git a/drivers/media/video/saa7134/saa7134-video.c b/drivers/media/video/saa7134/saa7134-video.c
+index a0baf2d..48e1a01 100644
+--- a/drivers/media/video/saa7134/saa7134-video.c
++++ b/drivers/media/video/saa7134/saa7134-video.c
+@@ -1634,7 +1634,7 @@ static int saa7134_s_fmt_overlay(struct file *file, void *priv,
+ 	struct saa7134_fh *fh = priv;
+ 	struct saa7134_dev *dev = fh->dev;
+ 	int err;
+-	unsigned int flags;
++	unsigned long flags;
+ 
+ 	if (saa7134_no_overlay > 0) {
+ 		printk(KERN_ERR "V4L2_BUF_TYPE_VIDEO_OVERLAY: no_overlay\n");
+diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c
+index 30a1af8..fa39410 100644
+--- a/drivers/misc/kgdbts.c
++++ b/drivers/misc/kgdbts.c
+@@ -47,6 +47,7 @@
+  *       to test the HW NMI watchdog
+  * F## = Break at do_fork for ## iterations
+  * S## = Break at sys_open for ## iterations
++ * I## = Run the single step test ## iterations
+  *
+  * NOTE: that the do_fork and sys_open tests are mutually exclusive.
+  *
+@@ -375,7 +376,7 @@ static void emul_sstep_get(char *arg)
+ 		break;
+ 	case 1:
+ 		/* set breakpoint */
+-		break_helper("Z0", 0, sstep_addr);
++		break_helper("Z0", NULL, sstep_addr);
+ 		break;
+ 	case 2:
+ 		/* Continue */
+@@ -383,7 +384,7 @@ static void emul_sstep_get(char *arg)
+ 		break;
+ 	case 3:
+ 		/* Clear breakpoint */
+-		break_helper("z0", 0, sstep_addr);
++		break_helper("z0", NULL, sstep_addr);
+ 		break;
+ 	default:
+ 		eprintk("kgdbts: ERROR failed sstep get emulation\n");
+@@ -465,11 +466,11 @@ static struct test_struct sw_breakpoint_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+ 	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ 	{ "c", "T0*", }, /* Continue */
+-	{ "g", "kgdbts_break_test", 0, check_and_rewind_pc },
++	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ 	{ "write", "OK", write_regs },
+ 	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+ 	{ "D", "OK" }, /* Detach */
+-	{ "D", "OK", 0,  got_break }, /* If the test worked we made it here */
++	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+ 	{ "", "" },
+ };
+ 
+@@ -499,14 +500,14 @@ static struct test_struct singlestep_break_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+ 	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ 	{ "c", "T0*", }, /* Continue */
+-	{ "g", "kgdbts_break_test", 0, check_and_rewind_pc },
++	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ 	{ "write", "OK", write_regs }, /* Write registers */
+ 	{ "kgdbts_break_test", "OK", sw_rem_break }, /*remove breakpoint */
+ 	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+-	{ "g", "kgdbts_break_test", 0, check_single_step },
++	{ "g", "kgdbts_break_test", NULL, check_single_step },
+ 	{ "kgdbts_break_test", "OK", sw_break, }, /* set sw breakpoint */
+ 	{ "c", "T0*", }, /* Continue */
+-	{ "g", "kgdbts_break_test", 0, check_and_rewind_pc },
++	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ 	{ "write", "OK", write_regs }, /* Write registers */
+ 	{ "D", "OK" }, /* Remove all breakpoints and continues */
+ 	{ "", "" },
+@@ -520,14 +521,14 @@ static struct test_struct do_fork_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+ 	{ "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+ 	{ "c", "T0*", }, /* Continue */
+-	{ "g", "do_fork", 0, check_and_rewind_pc }, /* check location */
++	{ "g", "do_fork", NULL, check_and_rewind_pc }, /* check location */
+ 	{ "write", "OK", write_regs }, /* Write registers */
+ 	{ "do_fork", "OK", sw_rem_break }, /*remove breakpoint */
+ 	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+-	{ "g", "do_fork", 0, check_single_step },
++	{ "g", "do_fork", NULL, check_single_step },
+ 	{ "do_fork", "OK", sw_break, }, /* set sw breakpoint */
+ 	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+-	{ "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */
++	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+ 	{ "", "" },
+ };
+ 
+@@ -538,14 +539,14 @@ static struct test_struct sys_open_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+ 	{ "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+ 	{ "c", "T0*", }, /* Continue */
+-	{ "g", "sys_open", 0, check_and_rewind_pc }, /* check location */
++	{ "g", "sys_open", NULL, check_and_rewind_pc }, /* check location */
+ 	{ "write", "OK", write_regs }, /* Write registers */
+ 	{ "sys_open", "OK", sw_rem_break }, /*remove breakpoint */
+ 	{ "s", "T0*", emul_sstep_get, emul_sstep_put }, /* Single step */
+-	{ "g", "sys_open", 0, check_single_step },
++	{ "g", "sys_open", NULL, check_single_step },
+ 	{ "sys_open", "OK", sw_break, }, /* set sw breakpoint */
+ 	{ "7", "T0*", skip_back_repeat_test }, /* Loop based on repeat_test */
+-	{ "D", "OK", 0, final_ack_set }, /* detach and unregister I/O */
++	{ "D", "OK", NULL, final_ack_set }, /* detach and unregister I/O */
+ 	{ "", "" },
+ };
+ 
+@@ -556,11 +557,11 @@ static struct test_struct hw_breakpoint_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+ 	{ "kgdbts_break_test", "OK", hw_break, }, /* set hw breakpoint */
+ 	{ "c", "T0*", }, /* Continue */
+-	{ "g", "kgdbts_break_test", 0, check_and_rewind_pc },
++	{ "g", "kgdbts_break_test", NULL, check_and_rewind_pc },
+ 	{ "write", "OK", write_regs },
+ 	{ "kgdbts_break_test", "OK", hw_rem_break }, /*remove breakpoint */
+ 	{ "D", "OK" }, /* Detach */
+-	{ "D", "OK", 0,  got_break }, /* If the test worked we made it here */
++	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+ 	{ "", "" },
+ };
+ 
+@@ -570,12 +571,12 @@ static struct test_struct hw_breakpoint_test[] = {
+ static struct test_struct hw_write_break_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+ 	{ "hw_break_val", "OK", hw_write_break, }, /* set hw breakpoint */
+-	{ "c", "T0*", 0, got_break }, /* Continue */
+-	{ "g", "silent", 0, check_and_rewind_pc },
++	{ "c", "T0*", NULL, got_break }, /* Continue */
++	{ "g", "silent", NULL, check_and_rewind_pc },
+ 	{ "write", "OK", write_regs },
+ 	{ "hw_break_val", "OK", hw_rem_write_break }, /*remove breakpoint */
+ 	{ "D", "OK" }, /* Detach */
+-	{ "D", "OK", 0,  got_break }, /* If the test worked we made it here */
++	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+ 	{ "", "" },
+ };
+ 
+@@ -585,12 +586,12 @@ static struct test_struct hw_write_break_test[] = {
+ static struct test_struct hw_access_break_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+ 	{ "hw_break_val", "OK", hw_access_break, }, /* set hw breakpoint */
+-	{ "c", "T0*", 0, got_break }, /* Continue */
+-	{ "g", "silent", 0, check_and_rewind_pc },
++	{ "c", "T0*", NULL, got_break }, /* Continue */
++	{ "g", "silent", NULL, check_and_rewind_pc },
+ 	{ "write", "OK", write_regs },
+ 	{ "hw_break_val", "OK", hw_rem_access_break }, /*remove breakpoint */
+ 	{ "D", "OK" }, /* Detach */
+-	{ "D", "OK", 0,  got_break }, /* If the test worked we made it here */
++	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+ 	{ "", "" },
+ };
+ 
+@@ -599,9 +600,9 @@ static struct test_struct hw_access_break_test[] = {
+  */
+ static struct test_struct nmi_sleep_test[] = {
+ 	{ "?", "S0*" }, /* Clear break points */
+-	{ "c", "T0*", 0, got_break }, /* Continue */
++	{ "c", "T0*", NULL, got_break }, /* Continue */
+ 	{ "D", "OK" }, /* Detach */
+-	{ "D", "OK", 0,  got_break }, /* If the test worked we made it here */
++	{ "D", "OK", NULL,  got_break }, /* On success we made it here */
+ 	{ "", "" },
+ };
+ 
+@@ -874,18 +875,23 @@ static void kgdbts_run_tests(void)
+ {
+ 	char *ptr;
+ 	int fork_test = 0;
+-	int sys_open_test = 0;
++	int do_sys_open_test = 0;
++	int sstep_test = 1000;
+ 	int nmi_sleep = 0;
++	int i;
+ 
+ 	ptr = strstr(config, "F");
+ 	if (ptr)
+-		fork_test = simple_strtol(ptr+1, NULL, 10);
++		fork_test = simple_strtol(ptr + 1, NULL, 10);
+ 	ptr = strstr(config, "S");
+ 	if (ptr)
+-		sys_open_test = simple_strtol(ptr+1, NULL, 10);
++		do_sys_open_test = simple_strtol(ptr + 1, NULL, 10);
+ 	ptr = strstr(config, "N");
+ 	if (ptr)
+ 		nmi_sleep = simple_strtol(ptr+1, NULL, 10);
++	ptr = strstr(config, "I");
++	if (ptr)
++		sstep_test = simple_strtol(ptr+1, NULL, 10);
+ 
+ 	/* required internal KGDB tests */
+ 	v1printk("kgdbts:RUN plant and detach test\n");
+@@ -894,8 +900,13 @@ static void kgdbts_run_tests(void)
+ 	run_breakpoint_test(0);
+ 	v1printk("kgdbts:RUN bad memory access test\n");
+ 	run_bad_read_test();
+-	v1printk("kgdbts:RUN singlestep breakpoint test\n");
+-	run_singlestep_break_test();
++	v1printk("kgdbts:RUN singlestep test %i iterations\n", sstep_test);
++	for (i = 0; i < sstep_test; i++) {
++		run_singlestep_break_test();
++		if (i % 100 == 0)
++			v1printk("kgdbts:RUN singlestep [%i/%i]\n",
++				 i, sstep_test);
++	}
+ 
+ 	/* ===Optional tests=== */
+ 
+@@ -922,7 +933,7 @@ static void kgdbts_run_tests(void)
+ 		repeat_test = fork_test;
+ 		printk(KERN_INFO "kgdbts:RUN do_fork for %i breakpoints\n",
+ 			repeat_test);
+-		kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg");
++		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+ 		run_do_fork_test();
+ 		return;
+ 	}
+@@ -931,11 +942,11 @@ static void kgdbts_run_tests(void)
+ 	 * executed because a kernel thread will be spawned at the very
+ 	 * end to unregister the debug hooks.
+ 	 */
+-	if (sys_open_test) {
+-		repeat_test = sys_open_test;
++	if (do_sys_open_test) {
++		repeat_test = do_sys_open_test;
+ 		printk(KERN_INFO "kgdbts:RUN sys_open for %i breakpoints\n",
+ 			repeat_test);
+-		kthread_run(kgdbts_unreg_thread, 0, "kgdbts_unreg");
++		kthread_run(kgdbts_unreg_thread, NULL, "kgdbts_unreg");
+ 		run_sys_open_test();
+ 		return;
+ 	}
+diff --git a/drivers/net/3c59x.c b/drivers/net/3c59x.c
+index 6f8e7d4..2edda8c 100644
+--- a/drivers/net/3c59x.c
++++ b/drivers/net/3c59x.c
+@@ -319,7 +319,7 @@ static struct vortex_chip_info {
+ 	{"3c920B-EMB-WNM (ATI Radeon 9100 IGP)",
+ 	 PCI_USES_MASTER, IS_TORNADO|HAS_MII|HAS_HWCKSM, 128, },
+ 	{"3c980 Cyclone",
+-	 PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM, 128, },
++	 PCI_USES_MASTER, IS_CYCLONE|HAS_HWCKSM|EXTRA_PREAMBLE, 128, },
+ 
+ 	{"3c980C Python-T",
+ 	 PCI_USES_MASTER, IS_CYCLONE|HAS_NWAY|HAS_HWCKSM, 128, },
+@@ -600,7 +600,6 @@ struct vortex_private {
+ 	struct sk_buff* tx_skbuff[TX_RING_SIZE];
+ 	unsigned int cur_rx, cur_tx;		/* The next free ring entry */
+ 	unsigned int dirty_rx, dirty_tx;	/* The ring entries to be free()ed. */
+-	struct net_device_stats stats;		/* Generic stats */
+ 	struct vortex_extra_stats xstats;	/* NIC-specific extra stats */
+ 	struct sk_buff *tx_skb;				/* Packet being eaten by bus master ctrl.  */
+ 	dma_addr_t tx_skb_dma;				/* Allocated DMA address for bus master ctrl DMA.   */
+@@ -1875,7 +1874,7 @@ static void vortex_tx_timeout(struct net_device *dev)
+ 
+ 	issue_and_wait(dev, TxReset);
+ 
+-	vp->stats.tx_errors++;
++	dev->stats.tx_errors++;
+ 	if (vp->full_bus_master_tx) {
+ 		printk(KERN_DEBUG "%s: Resetting the Tx ring pointer.\n", dev->name);
+ 		if (vp->cur_tx - vp->dirty_tx > 0  &&  ioread32(ioaddr + DownListPtr) == 0)
+@@ -1887,7 +1886,7 @@ static void vortex_tx_timeout(struct net_device *dev)
+ 			iowrite8(PKT_BUF_SZ>>8, ioaddr + TxFreeThreshold);
+ 		iowrite16(DownUnstall, ioaddr + EL3_CMD);
+ 	} else {
+-		vp->stats.tx_dropped++;
++		dev->stats.tx_dropped++;
+ 		netif_wake_queue(dev);
+ 	}
+ 
+@@ -1928,8 +1927,8 @@ vortex_error(struct net_device *dev, int status)
+ 			}
+ 			dump_tx_ring(dev);
+ 		}
+-		if (tx_status & 0x14)  vp->stats.tx_fifo_errors++;
+-		if (tx_status & 0x38)  vp->stats.tx_aborted_errors++;
++		if (tx_status & 0x14)  dev->stats.tx_fifo_errors++;
++		if (tx_status & 0x38)  dev->stats.tx_aborted_errors++;
+ 		if (tx_status & 0x08)  vp->xstats.tx_max_collisions++;
+ 		iowrite8(0, ioaddr + TxStatus);
+ 		if (tx_status & 0x30) {			/* txJabber or txUnderrun */
+@@ -2051,8 +2050,8 @@ vortex_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 				if (vortex_debug > 2)
+ 				  printk(KERN_DEBUG "%s: Tx error, status %2.2x.\n",
+ 						 dev->name, tx_status);
+-				if (tx_status & 0x04) vp->stats.tx_fifo_errors++;
+-				if (tx_status & 0x38) vp->stats.tx_aborted_errors++;
++				if (tx_status & 0x04) dev->stats.tx_fifo_errors++;
++				if (tx_status & 0x38) dev->stats.tx_aborted_errors++;
+ 				if (tx_status & 0x30) {
+ 					issue_and_wait(dev, TxReset);
+ 				}
+@@ -2350,7 +2349,7 @@ boomerang_interrupt(int irq, void *dev_id)
+ 				} else {
+ 					printk(KERN_DEBUG "boomerang_interrupt: no skb!\n");
+ 				}
+-				/* vp->stats.tx_packets++;  Counted below. */
++				/* dev->stats.tx_packets++;  Counted below. */
+ 				dirty_tx++;
+ 			}
+ 			vp->dirty_tx = dirty_tx;
+@@ -2409,12 +2408,12 @@ static int vortex_rx(struct net_device *dev)
+ 			unsigned char rx_error = ioread8(ioaddr + RxErrors);
+ 			if (vortex_debug > 2)
+ 				printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+-			vp->stats.rx_errors++;
+-			if (rx_error & 0x01)  vp->stats.rx_over_errors++;
+-			if (rx_error & 0x02)  vp->stats.rx_length_errors++;
+-			if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
+-			if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
+-			if (rx_error & 0x10)  vp->stats.rx_length_errors++;
++			dev->stats.rx_errors++;
++			if (rx_error & 0x01)  dev->stats.rx_over_errors++;
++			if (rx_error & 0x02)  dev->stats.rx_length_errors++;
++			if (rx_error & 0x04)  dev->stats.rx_frame_errors++;
++			if (rx_error & 0x08)  dev->stats.rx_crc_errors++;
++			if (rx_error & 0x10)  dev->stats.rx_length_errors++;
+ 		} else {
+ 			/* The packet length: up to 4.5K!. */
+ 			int pkt_len = rx_status & 0x1fff;
+@@ -2446,7 +2445,7 @@ static int vortex_rx(struct net_device *dev)
+ 				skb->protocol = eth_type_trans(skb, dev);
+ 				netif_rx(skb);
+ 				dev->last_rx = jiffies;
+-				vp->stats.rx_packets++;
++				dev->stats.rx_packets++;
+ 				/* Wait a limited time to go to next packet. */
+ 				for (i = 200; i >= 0; i--)
+ 					if ( ! (ioread16(ioaddr + EL3_STATUS) & CmdInProgress))
+@@ -2455,7 +2454,7 @@ static int vortex_rx(struct net_device *dev)
+ 			} else if (vortex_debug > 0)
+ 				printk(KERN_NOTICE "%s: No memory to allocate a sk_buff of "
+ 					   "size %d.\n", dev->name, pkt_len);
+-			vp->stats.rx_dropped++;
++			dev->stats.rx_dropped++;
+ 		}
+ 		issue_and_wait(dev, RxDiscard);
+ 	}
+@@ -2482,12 +2481,12 @@ boomerang_rx(struct net_device *dev)
+ 			unsigned char rx_error = rx_status >> 16;
+ 			if (vortex_debug > 2)
+ 				printk(KERN_DEBUG " Rx error: status %2.2x.\n", rx_error);
+-			vp->stats.rx_errors++;
+-			if (rx_error & 0x01)  vp->stats.rx_over_errors++;
+-			if (rx_error & 0x02)  vp->stats.rx_length_errors++;
+-			if (rx_error & 0x04)  vp->stats.rx_frame_errors++;
+-			if (rx_error & 0x08)  vp->stats.rx_crc_errors++;
+-			if (rx_error & 0x10)  vp->stats.rx_length_errors++;
++			dev->stats.rx_errors++;
++			if (rx_error & 0x01)  dev->stats.rx_over_errors++;
++			if (rx_error & 0x02)  dev->stats.rx_length_errors++;
++			if (rx_error & 0x04)  dev->stats.rx_frame_errors++;
++			if (rx_error & 0x08)  dev->stats.rx_crc_errors++;
++			if (rx_error & 0x10)  dev->stats.rx_length_errors++;
+ 		} else {
+ 			/* The packet length: up to 4.5K!. */
+ 			int pkt_len = rx_status & 0x1fff;
+@@ -2529,7 +2528,7 @@ boomerang_rx(struct net_device *dev)
+ 			}
+ 			netif_rx(skb);
+ 			dev->last_rx = jiffies;
+-			vp->stats.rx_packets++;
++			dev->stats.rx_packets++;
+ 		}
+ 		entry = (++vp->cur_rx) % RX_RING_SIZE;
+ 	}
+@@ -2591,7 +2590,7 @@ vortex_down(struct net_device *dev, int final_down)
+ 	del_timer_sync(&vp->rx_oom_timer);
+ 	del_timer_sync(&vp->timer);
+ 
+-	/* Turn off statistics ASAP.  We update vp->stats below. */
++	/* Turn off statistics ASAP.  We update dev->stats below. */
+ 	iowrite16(StatsDisable, ioaddr + EL3_CMD);
+ 
+ 	/* Disable the receiver and transmitter. */
+@@ -2728,7 +2727,7 @@ static struct net_device_stats *vortex_get_stats(struct net_device *dev)
+ 		update_stats(ioaddr, dev);
+ 		spin_unlock_irqrestore (&vp->lock, flags);
+ 	}
+-	return &vp->stats;
++	return &dev->stats;
+ }
+ 
+ /*  Update statistics.
+@@ -2748,18 +2747,18 @@ static void update_stats(void __iomem *ioaddr, struct net_device *dev)
+ 	/* Unlike the 3c5x9 we need not turn off stats updates while reading. */
+ 	/* Switch to the stats window, and read everything. */
+ 	EL3WINDOW(6);
+-	vp->stats.tx_carrier_errors		+= ioread8(ioaddr + 0);
+-	vp->stats.tx_heartbeat_errors		+= ioread8(ioaddr + 1);
+-	vp->stats.tx_window_errors		+= ioread8(ioaddr + 4);
+-	vp->stats.rx_fifo_errors		+= ioread8(ioaddr + 5);
+-	vp->stats.tx_packets			+= ioread8(ioaddr + 6);
+-	vp->stats.tx_packets			+= (ioread8(ioaddr + 9)&0x30) << 4;
++	dev->stats.tx_carrier_errors		+= ioread8(ioaddr + 0);
++	dev->stats.tx_heartbeat_errors		+= ioread8(ioaddr + 1);
++	dev->stats.tx_window_errors		+= ioread8(ioaddr + 4);
++	dev->stats.rx_fifo_errors		+= ioread8(ioaddr + 5);
++	dev->stats.tx_packets			+= ioread8(ioaddr + 6);
++	dev->stats.tx_packets			+= (ioread8(ioaddr + 9)&0x30) << 4;
+ 	/* Rx packets	*/			ioread8(ioaddr + 7);   /* Must read to clear */
+ 	/* Don't bother with register 9, an extension of registers 6&7.
+ 	   If we do use the 6&7 values the atomic update assumption above
+ 	   is invalid. */
+-	vp->stats.rx_bytes 			+= ioread16(ioaddr + 10);
+-	vp->stats.tx_bytes 			+= ioread16(ioaddr + 12);
++	dev->stats.rx_bytes 			+= ioread16(ioaddr + 10);
++	dev->stats.tx_bytes 			+= ioread16(ioaddr + 12);
+ 	/* Extra stats for get_ethtool_stats() */
+ 	vp->xstats.tx_multiple_collisions	+= ioread8(ioaddr + 2);
+ 	vp->xstats.tx_single_collisions         += ioread8(ioaddr + 3);
+@@ -2767,14 +2766,14 @@ static void update_stats(void __iomem *ioaddr, struct net_device *dev)
+ 	EL3WINDOW(4);
+ 	vp->xstats.rx_bad_ssd			+= ioread8(ioaddr + 12);
+ 
+-	vp->stats.collisions = vp->xstats.tx_multiple_collisions
++	dev->stats.collisions = vp->xstats.tx_multiple_collisions
+ 		+ vp->xstats.tx_single_collisions
+ 		+ vp->xstats.tx_max_collisions;
+ 
+ 	{
+ 		u8 up = ioread8(ioaddr + 13);
+-		vp->stats.rx_bytes += (up & 0x0f) << 16;
+-		vp->stats.tx_bytes += (up & 0xf0) << 12;
++		dev->stats.rx_bytes += (up & 0x0f) << 16;
++		dev->stats.tx_bytes += (up & 0xf0) << 12;
+ 	}
+ 
+ 	EL3WINDOW(old_window >> 13);
+diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
+index af46341..d27f54a 100644
+--- a/drivers/net/Kconfig
++++ b/drivers/net/Kconfig
+@@ -1273,20 +1273,6 @@ config PCNET32
+ 	  To compile this driver as a module, choose M here. The module
+ 	  will be called pcnet32.
+ 
+-config PCNET32_NAPI
+-	bool "Use RX polling (NAPI)"
+-	depends on PCNET32
+-	help
+-	  NAPI is a new driver API designed to reduce CPU and interrupt load
+-	  when the driver is receiving lots of packets from the card. It is
+-	  still somewhat experimental and thus not yet enabled by default.
+-
+-	  If your estimated Rx load is 10kpps or more, or if the card will be
+-	  deployed on potentially unfriendly networks (e.g. in a firewall),
+-	  then say Y here.
+-
+-	  If in doubt, say N.
+-
+ config AMD8111_ETH
+ 	tristate "AMD 8111 (new PCI lance) support"
+ 	depends on NET_PCI && PCI
+diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
+index 6425603..50a40e4 100644
+--- a/drivers/net/bonding/bond_main.c
++++ b/drivers/net/bonding/bond_main.c
+@@ -1425,13 +1425,13 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ 	res = netdev_set_master(slave_dev, bond_dev);
+ 	if (res) {
+ 		dprintk("Error %d calling netdev_set_master\n", res);
+-		goto err_close;
++		goto err_restore_mac;
+ 	}
+ 	/* open the slave since the application closed it */
+ 	res = dev_open(slave_dev);
+ 	if (res) {
+ 		dprintk("Openning slave %s failed\n", slave_dev->name);
+-		goto err_restore_mac;
++		goto err_unset_master;
+ 	}
+ 
+ 	new_slave->dev = slave_dev;
+@@ -1444,7 +1444,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ 		 */
+ 		res = bond_alb_init_slave(bond, new_slave);
+ 		if (res) {
+-			goto err_unset_master;
++			goto err_close;
+ 		}
+ 	}
+ 
+@@ -1619,7 +1619,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ 
+ 	res = bond_create_slave_symlinks(bond_dev, slave_dev);
+ 	if (res)
+-		goto err_unset_master;
++		goto err_close;
+ 
+ 	printk(KERN_INFO DRV_NAME
+ 	       ": %s: enslaving %s as a%s interface with a%s link.\n",
+@@ -1631,12 +1631,12 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
+ 	return 0;
+ 
+ /* Undo stages on error */
+-err_unset_master:
+-	netdev_set_master(slave_dev, NULL);
+-
+ err_close:
+ 	dev_close(slave_dev);
+ 
++err_unset_master:
++	netdev_set_master(slave_dev, NULL);
++
+ err_restore_mac:
+ 	if (!bond->params.fail_over_mac) {
+ 		memcpy(addr.sa_data, new_slave->perm_hwaddr, ETH_ALEN);
+@@ -4936,7 +4936,9 @@ int bond_create(char *name, struct bond_params *params, struct bonding **newbond
+ 	if (res < 0) {
+ 		rtnl_lock();
+ 		down_write(&bonding_rwsem);
+-		goto out_bond;
++		bond_deinit(bond_dev);
++		unregister_netdevice(bond_dev);
++		goto out_rtnl;
+ 	}
+ 
+ 	return 0;
+@@ -4990,9 +4992,10 @@ err:
+ 		destroy_workqueue(bond->wq);
+ 	}
+ 
++	bond_destroy_sysfs();
++
+ 	rtnl_lock();
+ 	bond_free_all();
+-	bond_destroy_sysfs();
+ 	rtnl_unlock();
+ out:
+ 	return res;
+@@ -5004,9 +5007,10 @@ static void __exit bonding_exit(void)
+ 	unregister_netdevice_notifier(&bond_netdev_notifier);
+ 	unregister_inetaddr_notifier(&bond_inetaddr_notifier);
+ 
++	bond_destroy_sysfs();
++
+ 	rtnl_lock();
+ 	bond_free_all();
+-	bond_destroy_sysfs();
+ 	rtnl_unlock();
+ }
+ 
+diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c
+index 979c2d0..68c41a0 100644
+--- a/drivers/net/bonding/bond_sysfs.c
++++ b/drivers/net/bonding/bond_sysfs.c
+@@ -146,29 +146,29 @@ static ssize_t bonding_store_bonds(struct class *cls, const char *buffer, size_t
+ 						": Unable remove bond %s due to open references.\n",
+ 						ifname);
+ 					res = -EPERM;
+-					goto out;
++					goto out_unlock;
+ 				}
+ 				printk(KERN_INFO DRV_NAME
+ 					": %s is being deleted...\n",
+ 					bond->dev->name);
+ 				bond_destroy(bond);
+-				up_write(&bonding_rwsem);
+-				rtnl_unlock();
+-				goto out;
++				goto out_unlock;
+ 			}
+ 
+ 		printk(KERN_ERR DRV_NAME
+ 			": unable to delete non-existent bond %s\n", ifname);
+ 		res = -ENODEV;
+-		up_write(&bonding_rwsem);
+-		rtnl_unlock();
+-		goto out;
++		goto out_unlock;
+ 	}
+ 
+ err_no_cmd:
+ 	printk(KERN_ERR DRV_NAME
+ 		": no command found in bonding_masters. Use +ifname or -ifname.\n");
+-	res = -EPERM;
++	return -EPERM;
++
++out_unlock:
++	up_write(&bonding_rwsem);
++	rtnl_unlock();
+ 
+ 	/* Always return either count or an error.  If you return 0, you'll
+ 	 * get called forever, which is bad.
+diff --git a/drivers/net/cxgb3/cxgb3_main.c b/drivers/net/cxgb3/cxgb3_main.c
+index 05e5f59..ce949d5 100644
+--- a/drivers/net/cxgb3/cxgb3_main.c
++++ b/drivers/net/cxgb3/cxgb3_main.c
+@@ -1894,11 +1894,11 @@ static int cxgb_extension_ioctl(struct net_device *dev, void __user *useraddr)
+ 		u8 *fw_data;
+ 		struct ch_mem_range t;
+ 
+-		if (!capable(CAP_NET_ADMIN))
++		if (!capable(CAP_SYS_RAWIO))
+ 			return -EPERM;
+ 		if (copy_from_user(&t, useraddr, sizeof(t)))
+ 			return -EFAULT;
+-
++		/* Check t.len sanity ? */
+ 		fw_data = kmalloc(t.len, GFP_KERNEL);
+ 		if (!fw_data)
+ 			return -ENOMEM;
+diff --git a/drivers/net/e1000e/defines.h b/drivers/net/e1000e/defines.h
+index 2a53875..f823b8b 100644
+--- a/drivers/net/e1000e/defines.h
++++ b/drivers/net/e1000e/defines.h
+@@ -648,6 +648,8 @@
+ #define IFE_E_PHY_ID         0x02A80330
+ #define IFE_PLUS_E_PHY_ID    0x02A80320
+ #define IFE_C_E_PHY_ID       0x02A80310
++#define BME1000_E_PHY_ID     0x01410CB0
++#define BME1000_E_PHY_ID_R2  0x01410CB1
+ 
+ /* M88E1000 Specific Registers */
+ #define M88E1000_PHY_SPEC_CTRL     0x10  /* PHY Specific Control Register */
+@@ -701,6 +703,14 @@
+ #define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK  0x0E00
+ #define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X    0x0800
+ 
++/* BME1000 PHY Specific Control Register */
++#define BME1000_PSCR_ENABLE_DOWNSHIFT   0x0800 /* 1 = enable downshift */
++
++
++#define PHY_PAGE_SHIFT 5
++#define PHY_REG(page, reg) (((page) << PHY_PAGE_SHIFT) | \
++                           ((reg) & MAX_PHY_REG_ADDRESS))
++
+ /*
+  * Bits...
+  * 15-5: page
+diff --git a/drivers/net/e1000e/e1000.h b/drivers/net/e1000e/e1000.h
+index 38bfd0d..d3bc6f8 100644
+--- a/drivers/net/e1000e/e1000.h
++++ b/drivers/net/e1000e/e1000.h
+@@ -127,7 +127,7 @@ struct e1000_buffer {
+ 		/* arrays of page information for packet split */
+ 		struct e1000_ps_page *ps_pages;
+ 	};
+-
++	struct page *page;
+ };
+ 
+ struct e1000_ring {
+@@ -304,6 +304,7 @@ struct e1000_info {
+ #define FLAG_HAS_CTRLEXT_ON_LOAD          (1 << 5)
+ #define FLAG_HAS_SWSM_ON_LOAD             (1 << 6)
+ #define FLAG_HAS_JUMBO_FRAMES             (1 << 7)
++#define FLAG_IS_ICH                       (1 << 9)
+ #define FLAG_HAS_SMART_POWER_DOWN         (1 << 11)
+ #define FLAG_IS_QUAD_PORT_A               (1 << 12)
+ #define FLAG_IS_QUAD_PORT                 (1 << 13)
+@@ -386,6 +387,7 @@ extern void e1000e_set_kmrn_lock_loss_workaround_ich8lan(struct e1000_hw *hw,
+ 						 bool state);
+ extern void e1000e_igp3_phy_powerdown_workaround_ich8lan(struct e1000_hw *hw);
+ extern void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw);
++extern void e1000e_disable_gig_wol_ich8lan(struct e1000_hw *hw);
+ 
+ extern s32 e1000e_check_for_copper_link(struct e1000_hw *hw);
+ extern s32 e1000e_check_for_fiber_link(struct e1000_hw *hw);
+@@ -443,6 +445,9 @@ extern s32 e1000e_get_phy_info_m88(struct e1000_hw *hw);
+ extern s32 e1000e_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data);
+ extern s32 e1000e_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data);
+ extern enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id);
++extern s32 e1000e_determine_phy_address(struct e1000_hw *hw);
++extern s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data);
++extern s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data);
+ extern void e1000e_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl);
+ extern s32 e1000e_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data);
+ extern s32 e1000e_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data);
+diff --git a/drivers/net/e1000e/ethtool.c b/drivers/net/e1000e/ethtool.c
+index ce045ac..a14561f 100644
+--- a/drivers/net/e1000e/ethtool.c
++++ b/drivers/net/e1000e/ethtool.c
+@@ -494,8 +494,12 @@ static int e1000_get_eeprom(struct net_device *netdev,
+ 		for (i = 0; i < last_word - first_word + 1; i++) {
+ 			ret_val = e1000_read_nvm(hw, first_word + i, 1,
+ 						      &eeprom_buff[i]);
+-			if (ret_val)
++			if (ret_val) {
++				/* a read error occurred, throw away the
++				 * result */
++				memset(eeprom_buff, 0xff, sizeof(eeprom_buff));
+ 				break;
++			}
+ 		}
+ 	}
+ 
+@@ -803,8 +807,7 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
+ 	/* restore previous status */
+ 	ew32(STATUS, before);
+ 
+-	if ((mac->type != e1000_ich8lan) &&
+-	    (mac->type != e1000_ich9lan)) {
++	if (!(adapter->flags & FLAG_IS_ICH)) {
+ 		REG_PATTERN_TEST(E1000_FCAL, 0xFFFFFFFF, 0xFFFFFFFF);
+ 		REG_PATTERN_TEST(E1000_FCAH, 0x0000FFFF, 0xFFFFFFFF);
+ 		REG_PATTERN_TEST(E1000_FCT, 0x0000FFFF, 0xFFFFFFFF);
+@@ -824,15 +827,13 @@ static int e1000_reg_test(struct e1000_adapter *adapter, u64 *data)
+ 
+ 	REG_SET_AND_CHECK(E1000_RCTL, 0xFFFFFFFF, 0x00000000);
+ 
+-	before = (((mac->type == e1000_ich8lan) ||
+-		   (mac->type == e1000_ich9lan)) ? 0x06C3B33E : 0x06DFB3FE);
++	before = ((adapter->flags & FLAG_IS_ICH) ? 0x06C3B33E : 0x06DFB3FE);
+ 	REG_SET_AND_CHECK(E1000_RCTL, before, 0x003FFFFB);
+ 	REG_SET_AND_CHECK(E1000_TCTL, 0xFFFFFFFF, 0x00000000);
+ 
+ 	REG_SET_AND_CHECK(E1000_RCTL, before, 0xFFFFFFFF);
+ 	REG_PATTERN_TEST(E1000_RDBAL, 0xFFFFFFF0, 0xFFFFFFFF);
+-	if ((mac->type != e1000_ich8lan) &&
+-	    (mac->type != e1000_ich9lan))
++	if (!(adapter->flags & FLAG_IS_ICH))
+ 		REG_PATTERN_TEST(E1000_TXCW, 0xC000FFFF, 0x0000FFFF);
+ 	REG_PATTERN_TEST(E1000_TDBAL, 0xFFFFFFF0, 0xFFFFFFFF);
+ 	REG_PATTERN_TEST(E1000_TIDV, 0x0000FFFF, 0x0000FFFF);
+@@ -911,9 +912,7 @@ static int e1000_intr_test(struct e1000_adapter *adapter, u64 *data)
+ 
+ 	/* Test each interrupt */
+ 	for (i = 0; i < 10; i++) {
+-
+-		if (((adapter->hw.mac.type == e1000_ich8lan) ||
+-		     (adapter->hw.mac.type == e1000_ich9lan)) && i == 8)
++		if ((adapter->flags & FLAG_IS_ICH) && (i == 8))
+ 			continue;
+ 
+ 		/* Interrupt to test */
+@@ -1184,6 +1183,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
+ 	struct e1000_hw *hw = &adapter->hw;
+ 	u32 ctrl_reg = 0;
+ 	u32 stat_reg = 0;
++	u16 phy_reg = 0;
+ 
+ 	hw->mac.autoneg = 0;
+ 
+@@ -1211,6 +1211,28 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
+ 			     E1000_CTRL_SPD_100 |/* Force Speed to 100 */
+ 			     E1000_CTRL_FD);	 /* Force Duplex to FULL */
+ 		break;
++	case e1000_phy_bm:
++		/* Set Default MAC Interface speed to 1GB */
++		e1e_rphy(hw, PHY_REG(2, 21), &phy_reg);
++		phy_reg &= ~0x0007;
++		phy_reg |= 0x006;
++		e1e_wphy(hw, PHY_REG(2, 21), phy_reg);
++		/* Assert SW reset for above settings to take effect */
++		e1000e_commit_phy(hw);
++		mdelay(1);
++		/* Force Full Duplex */
++		e1e_rphy(hw, PHY_REG(769, 16), &phy_reg);
++		e1e_wphy(hw, PHY_REG(769, 16), phy_reg | 0x000C);
++		/* Set Link Up (in force link) */
++		e1e_rphy(hw, PHY_REG(776, 16), &phy_reg);
++		e1e_wphy(hw, PHY_REG(776, 16), phy_reg | 0x0040);
++		/* Force Link */
++		e1e_rphy(hw, PHY_REG(769, 16), &phy_reg);
++		e1e_wphy(hw, PHY_REG(769, 16), phy_reg | 0x0040);
++		/* Set Early Link Enable */
++		e1e_rphy(hw, PHY_REG(769, 20), &phy_reg);
++		e1e_wphy(hw, PHY_REG(769, 20), phy_reg | 0x0400);
++		/* fall through */
+ 	default:
+ 		/* force 1000, set loopback */
+ 		e1e_wphy(hw, PHY_CONTROL, 0x4140);
+@@ -1224,8 +1246,7 @@ static int e1000_integrated_phy_loopback(struct e1000_adapter *adapter)
+ 			     E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */
+ 			     E1000_CTRL_FD);	 /* Force Duplex to FULL */
+ 
+-		if ((adapter->hw.mac.type == e1000_ich8lan) ||
+-		    (adapter->hw.mac.type == e1000_ich9lan))
++		if (adapter->flags & FLAG_IS_ICH)
+ 			ctrl_reg |= E1000_CTRL_SLU;	/* Set Link Up */
+ 	}
+ 
+diff --git a/drivers/net/e1000e/hw.h b/drivers/net/e1000e/hw.h
+index a930e6d..74f263a 100644
+--- a/drivers/net/e1000e/hw.h
++++ b/drivers/net/e1000e/hw.h
+@@ -216,6 +216,21 @@ enum e1e_registers {
+ #define IGP01E1000_PHY_LINK_HEALTH	0x13 /* PHY Link Health */
+ #define IGP02E1000_PHY_POWER_MGMT	0x19 /* Power Management */
+ #define IGP01E1000_PHY_PAGE_SELECT	0x1F /* Page Select */
++#define BM_PHY_PAGE_SELECT		22   /* Page Select for BM */
++#define IGP_PAGE_SHIFT			5
++#define PHY_REG_MASK			0x1F
++
++#define BM_WUC_PAGE			800
++#define BM_WUC_ADDRESS_OPCODE		0x11
++#define BM_WUC_DATA_OPCODE		0x12
++#define BM_WUC_ENABLE_PAGE		769
++#define BM_WUC_ENABLE_REG		17
++#define BM_WUC_ENABLE_BIT		(1 << 2)
++#define BM_WUC_HOST_WU_BIT		(1 << 4)
++
++#define BM_WUC	PHY_REG(BM_WUC_PAGE, 1)
++#define BM_WUFC PHY_REG(BM_WUC_PAGE, 2)
++#define BM_WUS	PHY_REG(BM_WUC_PAGE, 3)
+ 
+ #define IGP01E1000_PHY_PCS_INIT_REG	0x00B4
+ #define IGP01E1000_PHY_POLARITY_MASK	0x0078
+@@ -331,10 +346,16 @@ enum e1e_registers {
+ #define E1000_DEV_ID_ICH8_IFE_G			0x10C5
+ #define E1000_DEV_ID_ICH8_IGP_M			0x104D
+ #define E1000_DEV_ID_ICH9_IGP_AMT		0x10BD
++#define E1000_DEV_ID_ICH9_IGP_M_AMT		0x10F5
++#define E1000_DEV_ID_ICH9_IGP_M			0x10BF
++#define E1000_DEV_ID_ICH9_IGP_M_V		0x10CB
+ #define E1000_DEV_ID_ICH9_IGP_C			0x294C
+ #define E1000_DEV_ID_ICH9_IFE			0x10C0
+ #define E1000_DEV_ID_ICH9_IFE_GT		0x10C3
+ #define E1000_DEV_ID_ICH9_IFE_G			0x10C2
++#define E1000_DEV_ID_ICH10_R_BM_LM		0x10CC
++#define E1000_DEV_ID_ICH10_R_BM_LF		0x10CD
++#define E1000_DEV_ID_ICH10_R_BM_V		0x10CE
+ 
+ #define E1000_FUNC_1 1
+ 
+@@ -378,6 +399,7 @@ enum e1000_phy_type {
+ 	e1000_phy_gg82563,
+ 	e1000_phy_igp_3,
+ 	e1000_phy_ife,
++	e1000_phy_bm,
+ };
+ 
+ enum e1000_bus_width {
+diff --git a/drivers/net/e1000e/ich8lan.c b/drivers/net/e1000e/ich8lan.c
+index 768485d..9e38452 100644
+--- a/drivers/net/e1000e/ich8lan.c
++++ b/drivers/net/e1000e/ich8lan.c
+@@ -38,6 +38,12 @@
+  * 82566DM Gigabit Network Connection
+  * 82566MC Gigabit Network Connection
+  * 82566MM Gigabit Network Connection
++ * 82567LM Gigabit Network Connection
++ * 82567LF Gigabit Network Connection
++ * 82567LM-2 Gigabit Network Connection
++ * 82567LF-2 Gigabit Network Connection
++ * 82567V-2 Gigabit Network Connection
++ * 82562GT-3 10/100 Network Connection
+  */
+ 
+ #include <linux/netdevice.h>
+@@ -198,6 +204,19 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
+ 	phy->addr			= 1;
+ 	phy->reset_delay_us		= 100;
+ 
++	/*
++	 * We may need to do this twice - once for IGP and if that fails,
++	 * we'll set BM func pointers and try again
++	 */
++	ret_val = e1000e_determine_phy_address(hw);
++	if (ret_val) {
++		hw->phy.ops.write_phy_reg = e1000e_write_phy_reg_bm;
++		hw->phy.ops.read_phy_reg  = e1000e_read_phy_reg_bm;
++		ret_val = e1000e_determine_phy_address(hw);
++		if (ret_val)
++			return ret_val;
++	}
++
+ 	phy->id = 0;
+ 	while ((e1000_phy_unknown == e1000e_get_phy_type_from_id(phy->id)) &&
+ 	       (i++ < 100)) {
+@@ -219,6 +238,13 @@ static s32 e1000_init_phy_params_ich8lan(struct e1000_hw *hw)
+ 		phy->type = e1000_phy_ife;
+ 		phy->autoneg_mask = E1000_ALL_NOT_GIG;
+ 		break;
++	case BME1000_E_PHY_ID:
++		phy->type = e1000_phy_bm;
++		phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT;
++		hw->phy.ops.read_phy_reg = e1000e_read_phy_reg_bm;
++		hw->phy.ops.write_phy_reg = e1000e_write_phy_reg_bm;
++		hw->phy.ops.commit_phy = e1000e_phy_sw_reset;
++		break;
+ 	default:
+ 		return -E1000_ERR_PHY;
+ 		break;
+@@ -664,6 +690,7 @@ static s32 e1000_get_phy_info_ich8lan(struct e1000_hw *hw)
+ 		return e1000_get_phy_info_ife_ich8lan(hw);
+ 		break;
+ 	case e1000_phy_igp_3:
++	case e1000_phy_bm:
+ 		return e1000e_get_phy_info_igp(hw);
+ 		break;
+ 	default:
+@@ -728,7 +755,7 @@ static s32 e1000_set_d0_lplu_state_ich8lan(struct e1000_hw *hw, bool active)
+ 	s32 ret_val = 0;
+ 	u16 data;
+ 
+-	if (phy->type != e1000_phy_igp_3)
++	if (phy->type == e1000_phy_ife)
+ 		return ret_val;
+ 
+ 	phy_ctrl = er32(PHY_CTRL);
+@@ -1918,8 +1945,35 @@ static s32 e1000_setup_copper_link_ich8lan(struct e1000_hw *hw)
+ 		ret_val = e1000e_copper_link_setup_igp(hw);
+ 		if (ret_val)
+ 			return ret_val;
++	} else if (hw->phy.type == e1000_phy_bm) {
++		ret_val = e1000e_copper_link_setup_m88(hw);
++		if (ret_val)
++			return ret_val;
+ 	}
+ 
++	if (hw->phy.type == e1000_phy_ife) {
++		ret_val = e1e_rphy(hw, IFE_PHY_MDIX_CONTROL, &reg_data);
++		if (ret_val)
++			return ret_val;
++
++		reg_data &= ~IFE_PMC_AUTO_MDIX;
++
++		switch (hw->phy.mdix) {
++		case 1:
++			reg_data &= ~IFE_PMC_FORCE_MDIX;
++			break;
++		case 2:
++			reg_data |= IFE_PMC_FORCE_MDIX;
++			break;
++		case 0:
++		default:
++			reg_data |= IFE_PMC_AUTO_MDIX;
++			break;
++		}
++		ret_val = e1e_wphy(hw, IFE_PHY_MDIX_CONTROL, reg_data);
++		if (ret_val)
++			return ret_val;
++	}
+ 	return e1000e_setup_copper_link(hw);
+ }
+ 
+@@ -2127,6 +2181,31 @@ void e1000e_gig_downshift_workaround_ich8lan(struct e1000_hw *hw)
+ }
+ 
+ /**
++ *  e1000e_disable_gig_wol_ich8lan - disable gig during WoL
++ *  @hw: pointer to the HW structure
++ *
++ *  During S0 to Sx transition, it is possible the link remains at gig
++ *  instead of negotiating to a lower speed.  Before going to Sx, set
++ *  'LPLU Enabled' and 'Gig Disable' to force link speed negotiation
++ *  to a lower speed.
++ *
++ *  Should only be called for ICH9 devices.
++ **/
++void e1000e_disable_gig_wol_ich8lan(struct e1000_hw *hw)
++{
++	u32 phy_ctrl;
++
++	if (hw->mac.type == e1000_ich9lan) {
++		phy_ctrl = er32(PHY_CTRL);
++		phy_ctrl |= E1000_PHY_CTRL_D0A_LPLU |
++		            E1000_PHY_CTRL_GBE_DISABLE;
++		ew32(PHY_CTRL, phy_ctrl);
++	}
++
++	return;
++}
++
++/**
+  *  e1000_cleanup_led_ich8lan - Restore the default LED operation
+  *  @hw: pointer to the HW structure
+  *
+@@ -2247,6 +2326,7 @@ static struct e1000_nvm_operations ich8_nvm_ops = {
+ struct e1000_info e1000_ich8_info = {
+ 	.mac			= e1000_ich8lan,
+ 	.flags			= FLAG_HAS_WOL
++				  | FLAG_IS_ICH
+ 				  | FLAG_RX_CSUM_ENABLED
+ 				  | FLAG_HAS_CTRLEXT_ON_LOAD
+ 				  | FLAG_HAS_AMT
+@@ -2262,6 +2342,7 @@ struct e1000_info e1000_ich8_info = {
+ struct e1000_info e1000_ich9_info = {
+ 	.mac			= e1000_ich9lan,
+ 	.flags			= FLAG_HAS_JUMBO_FRAMES
++				  | FLAG_IS_ICH
+ 				  | FLAG_HAS_WOL
+ 				  | FLAG_RX_CSUM_ENABLED
+ 				  | FLAG_HAS_CTRLEXT_ON_LOAD
+diff --git a/drivers/net/e1000e/netdev.c b/drivers/net/e1000e/netdev.c
+index 8991ab8..8cbb40f 100644
+--- a/drivers/net/e1000e/netdev.c
++++ b/drivers/net/e1000e/netdev.c
+@@ -43,10 +43,11 @@
+ #include <linux/if_vlan.h>
+ #include <linux/cpu.h>
+ #include <linux/smp.h>
++#include <linux/pm_qos_params.h>
+ 
+ #include "e1000.h"
+ 
+-#define DRV_VERSION "0.2.1"
++#define DRV_VERSION "0.3.3.3-k2"
+ char e1000e_driver_name[] = "e1000e";
+ const char e1000e_driver_version[] = DRV_VERSION;
+ 
+@@ -341,6 +342,89 @@ no_buffers:
+ }
+ 
+ /**
++ * e1000_alloc_jumbo_rx_buffers - Replace used jumbo receive buffers
++ * @adapter: address of board private structure
++ * @rx_ring: pointer to receive ring structure
++ * @cleaned_count: number of buffers to allocate this pass
++ **/
++
++static void e1000_alloc_jumbo_rx_buffers(struct e1000_adapter *adapter,
++                                         int cleaned_count)
++{
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_rx_desc *rx_desc;
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	struct e1000_buffer *buffer_info;
++	struct sk_buff *skb;
++	unsigned int i;
++	unsigned int bufsz = 256 -
++	                     16 /* for skb_reserve */ -
++	                     NET_IP_ALIGN;
++
++	i = rx_ring->next_to_use;
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (cleaned_count--) {
++		skb = buffer_info->skb;
++		if (skb) {
++			skb_trim(skb, 0);
++			goto check_page;
++		}
++
++		skb = netdev_alloc_skb(netdev, bufsz);
++		if (unlikely(!skb)) {
++			/* Better luck next round */
++			adapter->alloc_rx_buff_failed++;
++			break;
++		}
++
++		/* Make buffer alignment 2 beyond a 16 byte boundary
++		 * this will result in a 16 byte aligned IP header after
++		 * the 14 byte MAC header is removed
++		 */
++		skb_reserve(skb, NET_IP_ALIGN);
++
++		buffer_info->skb = skb;
++check_page:
++		/* allocate a new page if necessary */
++		if (!buffer_info->page) {
++			buffer_info->page = alloc_page(GFP_ATOMIC);
++			if (unlikely(!buffer_info->page)) {
++				adapter->alloc_rx_buff_failed++;
++				break;
++			}
++		}
++
++		if (!buffer_info->dma)
++			buffer_info->dma = pci_map_page(pdev,
++			                                buffer_info->page, 0,
++			                                PAGE_SIZE,
++			                                PCI_DMA_FROMDEVICE);
++
++		rx_desc = E1000_RX_DESC(*rx_ring, i);
++		rx_desc->buffer_addr = cpu_to_le64(buffer_info->dma);
++
++		if (unlikely(++i == rx_ring->count))
++			i = 0;
++		buffer_info = &rx_ring->buffer_info[i];
++	}
++
++	if (likely(rx_ring->next_to_use != i)) {
++		rx_ring->next_to_use = i;
++		if (unlikely(i-- == 0))
++			i = (rx_ring->count - 1);
++
++		/* Force memory writes to complete before letting h/w
++		 * know there are new descriptors to fetch.  (Only
++		 * applicable for weak-ordered memory model archs,
++		 * such as IA-64). */
++		wmb();
++		writel(i, adapter->hw.hw_addr + rx_ring->tail);
++	}
++}
++
++/**
+  * e1000_clean_rx_irq - Send received data up the network stack; legacy
+  * @adapter: board private structure
+  *
+@@ -783,6 +867,186 @@ next_desc:
+ }
+ 
+ /**
++ * e1000_consume_page - helper function
++ **/
++static void e1000_consume_page(struct e1000_buffer *bi, struct sk_buff *skb,
++                               u16 length)
++{
++	bi->page = NULL;
++	skb->len += length;
++	skb->data_len += length;
++	skb->truesize += length;
++}
++
++/**
++ * e1000_clean_jumbo_rx_irq - Send received data up the network stack; legacy
++ * @adapter: board private structure
++ *
++ * the return value indicates whether actual cleaning was done, there
++ * is no guarantee that everything was cleaned
++ **/
++
++static bool e1000_clean_jumbo_rx_irq(struct e1000_adapter *adapter,
++                                     int *work_done, int work_to_do)
++{
++	struct net_device *netdev = adapter->netdev;
++	struct pci_dev *pdev = adapter->pdev;
++	struct e1000_ring *rx_ring = adapter->rx_ring;
++	struct e1000_rx_desc *rx_desc, *next_rxd;
++	struct e1000_buffer *buffer_info, *next_buffer;
++	u32 length;
++	unsigned int i;
++	int cleaned_count = 0;
++	bool cleaned = false;
++	unsigned int total_rx_bytes=0, total_rx_packets=0;
++
++	i = rx_ring->next_to_clean;
++	rx_desc = E1000_RX_DESC(*rx_ring, i);
++	buffer_info = &rx_ring->buffer_info[i];
++
++	while (rx_desc->status & E1000_RXD_STAT_DD) {
++		struct sk_buff *skb;
++		u8 status;
++
++		if (*work_done >= work_to_do)
++			break;
++		(*work_done)++;
++
++		status = rx_desc->status;
++		skb = buffer_info->skb;
++		buffer_info->skb = NULL;
++
++		++i;
++		if (i == rx_ring->count)
++			i = 0;
++		next_rxd = E1000_RX_DESC(*rx_ring, i);
++		prefetch(next_rxd);
++
++		next_buffer = &rx_ring->buffer_info[i];
++
++		cleaned = true;
++		cleaned_count++;
++		pci_unmap_page(pdev, buffer_info->dma, PAGE_SIZE,
++		               PCI_DMA_FROMDEVICE);
++		buffer_info->dma = 0;
++
++		length = le16_to_cpu(rx_desc->length);
++
++		/* errors is only valid for DD + EOP descriptors */
++		if (unlikely((status & E1000_RXD_STAT_EOP) &&
++		    (rx_desc->errors & E1000_RXD_ERR_FRAME_ERR_MASK))) {
++				/* recycle both page and skb */
++				buffer_info->skb = skb;
++				/* an error means any chain goes out the window
++				 * too */
++				if (rx_ring->rx_skb_top)
++					dev_kfree_skb(rx_ring->rx_skb_top);
++				rx_ring->rx_skb_top = NULL;
++				goto next_desc;
++		}
++
++#define rxtop rx_ring->rx_skb_top
++		if (!(status & E1000_RXD_STAT_EOP)) {
++			/* this descriptor is only the beginning (or middle) */
++			if (!rxtop) {
++				/* this is the beginning of a chain */
++				rxtop = skb;
++				skb_fill_page_desc(rxtop, 0, buffer_info->page,
++				                   0, length);
++			} else {
++				/* this is the middle of a chain */
++				skb_fill_page_desc(rxtop,
++				    skb_shinfo(rxtop)->nr_frags,
++				    buffer_info->page, 0, length);
++				/* re-use the skb, only consumed the page */
++				buffer_info->skb = skb;
++			}
++			e1000_consume_page(buffer_info, rxtop, length);
++			goto next_desc;
++		} else {
++			if (rxtop) {
++				/* end of the chain */
++				skb_fill_page_desc(rxtop,
++				    skb_shinfo(rxtop)->nr_frags,
++				    buffer_info->page, 0, length);
++				/* re-use the current skb, we only consumed the
++				 * page */
++				buffer_info->skb = skb;
++				skb = rxtop;
++				rxtop = NULL;
++				e1000_consume_page(buffer_info, skb, length);
++			} else {
++				/* no chain, got EOP, this buf is the packet
++				 * copybreak to save the put_page/alloc_page */
++				if (length <= copybreak &&
++				    skb_tailroom(skb) >= length) {
++					u8 *vaddr;
++					vaddr = kmap_atomic(buffer_info->page,
++					                   KM_SKB_DATA_SOFTIRQ);
++					memcpy(skb_tail_pointer(skb), vaddr,
++					       length);
++					kunmap_atomic(vaddr,
++					              KM_SKB_DATA_SOFTIRQ);
++					/* re-use the page, so don't erase
++					 * buffer_info->page */
++					skb_put(skb, length);
++				} else {
++					skb_fill_page_desc(skb, 0,
++					                   buffer_info->page, 0,
++				                           length);
++					e1000_consume_page(buffer_info, skb,
++					                   length);
++				}
++			}
++		}
++
++		/* Receive Checksum Offload XXX recompute due to CRC strip? */
++		e1000_rx_checksum(adapter,
++		                  (u32)(status) |
++		                  ((u32)(rx_desc->errors) << 24),
++		                  le16_to_cpu(rx_desc->csum), skb);
++
++		/* probably a little skewed due to removing CRC */
++		total_rx_bytes += skb->len;
++		total_rx_packets++;
++
++		/* eth type trans needs skb->data to point to something */
++		if (!pskb_may_pull(skb, ETH_HLEN)) {
++			ndev_err(netdev, "pskb_may_pull failed.\n");
++			dev_kfree_skb(skb);
++			goto next_desc;
++		}
++
++		e1000_receive_skb(adapter, netdev, skb, status,
++		                  rx_desc->special);
++
++next_desc:
++		rx_desc->status = 0;
++
++		/* return some buffers to hardware, one at a time is too slow */
++		if (unlikely(cleaned_count >= E1000_RX_BUFFER_WRITE)) {
++			adapter->alloc_rx_buf(adapter, cleaned_count);
++			cleaned_count = 0;
++		}
++
++		/* use prefetched values */
++		rx_desc = next_rxd;
++		buffer_info = next_buffer;
++	}
++	rx_ring->next_to_clean = i;
++
++	cleaned_count = e1000_desc_unused(rx_ring);
++	if (cleaned_count)
++		adapter->alloc_rx_buf(adapter, cleaned_count);
++
++	adapter->total_rx_bytes += total_rx_bytes;
++	adapter->total_rx_packets += total_rx_packets;
++	adapter->net_stats.rx_bytes += total_rx_bytes;
++	adapter->net_stats.rx_packets += total_rx_packets;
++	return cleaned;
++}
++
++/**
+  * e1000_clean_rx_ring - Free Rx Buffers per Queue
+  * @adapter: board private structure
+  **/
+@@ -802,6 +1066,10 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter)
+ 				pci_unmap_single(pdev, buffer_info->dma,
+ 						 adapter->rx_buffer_len,
+ 						 PCI_DMA_FROMDEVICE);
++			else if (adapter->clean_rx == e1000_clean_jumbo_rx_irq)
++				pci_unmap_page(pdev, buffer_info->dma,
++				               PAGE_SIZE,
++				               PCI_DMA_FROMDEVICE);
+ 			else if (adapter->clean_rx == e1000_clean_rx_irq_ps)
+ 				pci_unmap_single(pdev, buffer_info->dma,
+ 						 adapter->rx_ps_bsize0,
+@@ -809,6 +1077,11 @@ static void e1000_clean_rx_ring(struct e1000_adapter *adapter)
+ 			buffer_info->dma = 0;
+ 		}
+ 
++		if (buffer_info->page) {
++			put_page(buffer_info->page);
++			buffer_info->page = NULL;
++		}
++
+ 		if (buffer_info->skb) {
+ 			dev_kfree_skb(buffer_info->skb);
+ 			buffer_info->skb = NULL;
+@@ -1755,10 +2028,12 @@ static void e1000_setup_rctl(struct e1000_adapter *adapter)
+ 	 * a lot of memory, since we allocate 3 pages at all times
+ 	 * per packet.
+ 	 */
+-	adapter->rx_ps_pages = 0;
+ 	pages = PAGE_USE_COUNT(adapter->netdev->mtu);
+-	if ((pages <= 3) && (PAGE_SIZE <= 16384) && (rctl & E1000_RCTL_LPE))
++	if (!(adapter->flags & FLAG_IS_ICH) && (pages <= 3) &&
++	    (PAGE_SIZE <= 16384) && (rctl & E1000_RCTL_LPE))
+ 		adapter->rx_ps_pages = pages;
++	else
++		adapter->rx_ps_pages = 0;
+ 
+ 	if (adapter->rx_ps_pages) {
+ 		/* Configure extra packet-split registers */
+@@ -1819,9 +2094,12 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
+ 			sizeof(union e1000_rx_desc_packet_split);
+ 		adapter->clean_rx = e1000_clean_rx_irq_ps;
+ 		adapter->alloc_rx_buf = e1000_alloc_rx_buffers_ps;
++	} else if (adapter->netdev->mtu > ETH_FRAME_LEN + ETH_FCS_LEN) {
++		rdlen = rx_ring->count * sizeof(struct e1000_rx_desc);
++		adapter->clean_rx = e1000_clean_jumbo_rx_irq;
++		adapter->alloc_rx_buf = e1000_alloc_jumbo_rx_buffers;
+ 	} else {
+-		rdlen = rx_ring->count *
+-			sizeof(struct e1000_rx_desc);
++		rdlen = rx_ring->count * sizeof(struct e1000_rx_desc);
+ 		adapter->clean_rx = e1000_clean_rx_irq;
+ 		adapter->alloc_rx_buf = e1000_alloc_rx_buffers;
+ 	}
+@@ -1885,8 +2163,21 @@ static void e1000_configure_rx(struct e1000_adapter *adapter)
+ 	 * units), e.g. using jumbo frames when setting to E1000_ERT_2048
+ 	 */
+ 	if ((adapter->flags & FLAG_HAS_ERT) &&
+-	    (adapter->netdev->mtu > ETH_DATA_LEN))
+-		ew32(ERT, E1000_ERT_2048);
++	    (adapter->netdev->mtu > ETH_DATA_LEN)) {
++		u32 rxdctl = er32(RXDCTL(0));
++		ew32(RXDCTL(0), rxdctl | 0x3);
++		ew32(ERT, E1000_ERT_2048 | (1 << 13));
++		/*
++		 * With jumbo frames and early-receive enabled, excessive
++		 * C4->C2 latencies result in dropped transactions.
++		 */
++		pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY,
++					  e1000e_driver_name, 55);
++	} else {
++		pm_qos_update_requirement(PM_QOS_CPU_DMA_LATENCY,
++					  e1000e_driver_name,
++					  PM_QOS_DEFAULT_VALUE);
++	}
+ 
+ 	/* Enable Receives */
+ 	ew32(RCTL, rctl);
+@@ -2155,6 +2446,14 @@ void e1000e_reset(struct e1000_adapter *adapter)
+ 
+ 	/* Allow time for pending master requests to run */
+ 	mac->ops.reset_hw(hw);
++
++	/*
++	 * For parts with AMT enabled, let the firmware know
++	 * that the network interface is in control
++	 */
++	if ((adapter->flags & FLAG_HAS_AMT) && e1000e_check_mng_mode(hw))
++		e1000_get_hw_control(adapter);
++
+ 	ew32(WUC, 0);
+ 
+ 	if (mac->ops.init_hw(hw))
+@@ -3469,6 +3768,8 @@ static int e1000_change_mtu(struct net_device *netdev, int new_mtu)
+ 	 * means we reserve 2 more, this pushes us to allocate from the next
+ 	 * larger slab size.
+ 	 * i.e. RXBUFFER_2048 --> size-4096 slab
++	 * However with the new *_jumbo_rx* routines, jumbo receives will use
++	 * fragmented skbs
+ 	 */
+ 
+ 	if (max_frame <= 256)
+@@ -3626,6 +3927,9 @@ static int e1000_suspend(struct pci_dev *pdev, pm_message_t state)
+ 			ew32(CTRL_EXT, ctrl_ext);
+ 		}
+ 
++		if (adapter->flags & FLAG_IS_ICH)
++			e1000e_disable_gig_wol_ich8lan(&adapter->hw);
++
+ 		/* Allow time for pending master requests to run */
+ 		e1000e_disable_pcie_master(&adapter->hw);
+ 
+@@ -4292,6 +4596,13 @@ static struct pci_device_id e1000_pci_tbl[] = {
+ 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IFE_GT), board_ich9lan },
+ 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_AMT), board_ich9lan },
+ 	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_C), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_AMT), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH9_IGP_M_V), board_ich9lan },
++
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LM), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_LF), board_ich9lan },
++	{ PCI_VDEVICE(INTEL, E1000_DEV_ID_ICH10_R_BM_V), board_ich9lan },
+ 
+ 	{ }	/* terminate list */
+ };
+@@ -4326,7 +4637,9 @@ static int __init e1000_init_module(void)
+ 	printk(KERN_INFO "%s: Copyright (c) 1999-2008 Intel Corporation.\n",
+ 	       e1000e_driver_name);
+ 	ret = pci_register_driver(&e1000_driver);
+-
++	pm_qos_add_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name,
++			       PM_QOS_DEFAULT_VALUE);
++				
+ 	return ret;
+ }
+ module_init(e1000_init_module);
+@@ -4340,6 +4653,7 @@ module_init(e1000_init_module);
+ static void __exit e1000_exit_module(void)
+ {
+ 	pci_unregister_driver(&e1000_driver);
++	pm_qos_remove_requirement(PM_QOS_CPU_DMA_LATENCY, e1000e_driver_name);
+ }
+ module_exit(e1000_exit_module);
+ 
+diff --git a/drivers/net/e1000e/phy.c b/drivers/net/e1000e/phy.c
+index e102332..b133dcf 100644
+--- a/drivers/net/e1000e/phy.c
++++ b/drivers/net/e1000e/phy.c
+@@ -34,6 +34,9 @@ static s32 e1000_get_phy_cfg_done(struct e1000_hw *hw);
+ static s32 e1000_phy_force_speed_duplex(struct e1000_hw *hw);
+ static s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active);
+ static s32 e1000_wait_autoneg(struct e1000_hw *hw);
++static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg);
++static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset,
++					  u16 *data, bool read);
+ 
+ /* Cable length tables */
+ static const u16 e1000_m88_cable_length_table[] =
+@@ -465,6 +468,10 @@ s32 e1000e_copper_link_setup_m88(struct e1000_hw *hw)
+ 	if (phy->disable_polarity_correction == 1)
+ 		phy_data |= M88E1000_PSCR_POLARITY_REVERSAL;
+ 
++	/* Enable downshift on BM (disabled by default) */
++	if (phy->type == e1000_phy_bm)
++		phy_data |= BME1000_PSCR_ENABLE_DOWNSHIFT;
++
+ 	ret_val = e1e_wphy(hw, M88E1000_PHY_SPEC_CTRL, phy_data);
+ 	if (ret_val)
+ 		return ret_val;
+@@ -1776,6 +1783,10 @@ enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id)
+ 	case IFE_C_E_PHY_ID:
+ 		phy_type = e1000_phy_ife;
+ 		break;
++	case BME1000_E_PHY_ID:
++	case BME1000_E_PHY_ID_R2:
++		phy_type = e1000_phy_bm;
++		break;
+ 	default:
+ 		phy_type = e1000_phy_unknown;
+ 		break;
+@@ -1784,6 +1795,273 @@ enum e1000_phy_type e1000e_get_phy_type_from_id(u32 phy_id)
+ }
+ 
+ /**
++ *  e1000e_determine_phy_address - Determines PHY address.
++ *  @hw: pointer to the HW structure
++ *
++ *  This uses a trial and error method to loop through possible PHY
++ *  addresses. It tests each by reading the PHY ID registers and
++ *  checking for a match.
++ **/
++s32 e1000e_determine_phy_address(struct e1000_hw *hw)
++{
++	s32 ret_val = -E1000_ERR_PHY_TYPE;
++	u32 phy_addr= 0;
++	u32 i = 0;
++	enum e1000_phy_type phy_type = e1000_phy_unknown;
++
++	do {
++		for (phy_addr = 0; phy_addr < 4; phy_addr++) {
++			hw->phy.addr = phy_addr;
++			e1000e_get_phy_id(hw);
++			phy_type = e1000e_get_phy_type_from_id(hw->phy.id);
++
++			/* 
++			 * If phy_type is valid, break - we found our
++			 * PHY address
++			 */
++			if (phy_type  != e1000_phy_unknown) {
++				ret_val = 0;
++				break;
++			}
++		}
++		i++;
++	} while ((ret_val != 0) && (i < 100));
++
++	return ret_val;
++}
++
++/**
++ *  e1000_get_phy_addr_for_bm_page - Retrieve PHY page address
++ *  @page: page to access
++ *
++ *  Returns the phy address for the page requested.
++ **/
++static u32 e1000_get_phy_addr_for_bm_page(u32 page, u32 reg)
++{
++	u32 phy_addr = 2;
++
++	if ((page >= 768) || (page == 0 && reg == 25) || (reg == 31))
++		phy_addr = 1;
++
++	return phy_addr;
++}
++
++/**
++ *  e1000e_write_phy_reg_bm - Write BM PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to write to
++ *  @data: data to write at register offset
++ *
++ *  Acquires semaphore, if necessary, then writes the data to PHY register
++ *  at the offset.  Release any acquired semaphores before exiting.
++ **/
++s32 e1000e_write_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 data)
++{
++	s32 ret_val;
++	u32 page_select = 0;
++	u32 page = offset >> IGP_PAGE_SHIFT;
++	u32 page_shift = 0;
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, &data,
++							 false);
++		goto out;
++	}
++
++	ret_val = hw->phy.ops.acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		/*
++		 * Page select is register 31 for phy address 1 and 22 for
++		 * phy address 2 and 3. Page select is shifted only for
++		 * phy address 1.
++		 */
++		if (hw->phy.addr == 1) {
++			page_shift = IGP_PAGE_SHIFT;
++			page_select = IGP01E1000_PHY_PAGE_SELECT;
++		} else {
++			page_shift = 0;
++			page_select = BM_PHY_PAGE_SELECT;
++		}
++
++		/* Page is shifted left, PHY expects (page x 32) */
++		ret_val = e1000e_write_phy_reg_mdic(hw, page_select,
++		                                    (page << page_shift));
++		if (ret_val) {
++			hw->phy.ops.release_phy(hw);
++			goto out;
++		}
++	}
++
++	ret_val = e1000e_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++	                                    data);
++
++	hw->phy.ops.release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000e_read_phy_reg_bm - Read BM PHY register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read
++ *  @data: pointer to the read data
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting.
++ **/
++s32 e1000e_read_phy_reg_bm(struct e1000_hw *hw, u32 offset, u16 *data)
++{
++	s32 ret_val;
++	u32 page_select = 0;
++	u32 page = offset >> IGP_PAGE_SHIFT;
++	u32 page_shift = 0;
++
++	/* Page 800 works differently than the rest so it has its own func */
++	if (page == BM_WUC_PAGE) {
++		ret_val = e1000_access_phy_wakeup_reg_bm(hw, offset, data,
++							 true);
++		goto out;
++	}
++
++	ret_val = hw->phy.ops.acquire_phy(hw);
++	if (ret_val)
++		goto out;
++
++	hw->phy.addr = e1000_get_phy_addr_for_bm_page(page, offset);
++
++	if (offset > MAX_PHY_MULTI_PAGE_REG) {
++		/*
++		 * Page select is register 31 for phy address 1 and 22 for
++		 * phy address 2 and 3. Page select is shifted only for
++		 * phy address 1.
++		 */
++		if (hw->phy.addr == 1) {
++			page_shift = IGP_PAGE_SHIFT;
++			page_select = IGP01E1000_PHY_PAGE_SELECT;
++		} else {
++			page_shift = 0;
++			page_select = BM_PHY_PAGE_SELECT;
++		}
++
++		/* Page is shifted left, PHY expects (page x 32) */
++		ret_val = e1000e_write_phy_reg_mdic(hw, page_select,
++		                                    (page << page_shift));
++		if (ret_val) {
++			hw->phy.ops.release_phy(hw);
++			goto out;
++		}
++	}
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset,
++	                                   data);
++	hw->phy.ops.release_phy(hw);
++
++out:
++	return ret_val;
++}
++
++/**
++ *  e1000_access_phy_wakeup_reg_bm - Read BM PHY wakeup register
++ *  @hw: pointer to the HW structure
++ *  @offset: register offset to be read or written
++ *  @data: pointer to the data to read or write
++ *  @read: determines if operation is read or write
++ *
++ *  Acquires semaphore, if necessary, then reads the PHY register at offset
++ *  and storing the retrieved information in data.  Release any acquired
++ *  semaphores before exiting. Note that procedure to read the wakeup
++ *  registers are different. It works as such:
++ *  1) Set page 769, register 17, bit 2 = 1
++ *  2) Set page to 800 for host (801 if we were manageability)
++ *  3) Write the address using the address opcode (0x11)
++ *  4) Read or write the data using the data opcode (0x12)
++ *  5) Restore 769_17.2 to its original value
++ **/
++static s32 e1000_access_phy_wakeup_reg_bm(struct e1000_hw *hw, u32 offset,
++					  u16 *data, bool read)
++{
++	s32 ret_val;
++	u16 reg = ((u16)offset) & PHY_REG_MASK;
++	u16 phy_reg = 0;
++	u8  phy_acquired = 1;
++
++
++	ret_val = hw->phy.ops.acquire_phy(hw);
++	if (ret_val) {
++		phy_acquired = 0;
++		goto out;
++	}
++
++	/* All operations in this function are phy address 1 */
++	hw->phy.addr = 1;
++
++	/* Set page 769 */
++	e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
++	                          (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
++
++	ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, &phy_reg);
++	if (ret_val)
++		goto out;
++
++	/* First clear bit 4 to avoid a power state change */
++	phy_reg &= ~(BM_WUC_HOST_WU_BIT);
++	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, phy_reg);
++	if (ret_val)
++		goto out;
++
++	/* Write bit 2 = 1, and clear bit 4 to 769_17 */
++	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG,
++	                                    phy_reg | BM_WUC_ENABLE_BIT);
++	if (ret_val)
++		goto out;
++
++	/* Select page 800 */
++	ret_val = e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
++	                                    (BM_WUC_PAGE << IGP_PAGE_SHIFT));
++
++	/* Write the page 800 offset value using opcode 0x11 */
++	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ADDRESS_OPCODE, reg);
++	if (ret_val)
++		goto out;
++
++	if (read) {
++	        /* Read the page 800 value using opcode 0x12 */
++		ret_val = e1000e_read_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE,
++		                                   data);
++	} else {
++	        /* Read the page 800 value using opcode 0x12 */
++		ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_DATA_OPCODE,
++						    *data);
++	}
++
++	if (ret_val)
++		goto out;
++
++	/*
++	 * Restore 769_17.2 to its original value
++	 * Set page 769
++	 */
++	e1000e_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT,
++	                          (BM_WUC_ENABLE_PAGE << IGP_PAGE_SHIFT));
++
++	/* Clear 769_17.2 */
++	ret_val = e1000e_write_phy_reg_mdic(hw, BM_WUC_ENABLE_REG, phy_reg);
++
++out:
++	if (phy_acquired == 1)
++		hw->phy.ops.release_phy(hw);
++	return ret_val;
++}
++
++/**
+  *  e1000e_commit_phy - Soft PHY reset
+  *  @hw: pointer to the HW structure
+  *
+diff --git a/drivers/net/eexpress.c b/drivers/net/eexpress.c
+index 2eb82ab..795c594 100644
+--- a/drivers/net/eexpress.c
++++ b/drivers/net/eexpress.c
+@@ -202,7 +202,7 @@ static unsigned short start_code[] = {
+ 	0x0000,Cmd_MCast,
+ 	0x0076,                 /* link to next command */
+ #define CONF_NR_MULTICAST 0x44
+-	0x0000,                 /* number of multicast addresses */
++	0x0000,                 /* number of bytes in multicast address(es) */
+ #define CONF_MULTICAST 0x46
+ 	0x0000, 0x0000, 0x0000, /* some addresses */
+ 	0x0000, 0x0000, 0x0000,
+@@ -1569,7 +1569,7 @@ static void eexp_hw_init586(struct net_device *dev)
+ 
+ static void eexp_setup_filter(struct net_device *dev)
+ {
+-	struct dev_mc_list *dmi = dev->mc_list;
++	struct dev_mc_list *dmi;
+ 	unsigned short ioaddr = dev->base_addr;
+ 	int count = dev->mc_count;
+ 	int i;
+@@ -1580,9 +1580,9 @@ static void eexp_setup_filter(struct net_device *dev)
+ 	}
+ 
+ 	outw(CONF_NR_MULTICAST & ~31, ioaddr+SM_PTR);
+-	outw(count, ioaddr+SHADOW(CONF_NR_MULTICAST));
+-	for (i = 0; i < count; i++) {
+-		unsigned short *data = (unsigned short *)dmi->dmi_addr;
++	outw(6*count, ioaddr+SHADOW(CONF_NR_MULTICAST));
++	for (i = 0, dmi = dev->mc_list; i < count; i++, dmi = dmi->next) {
++		unsigned short *data;
+ 		if (!dmi) {
+ 			printk(KERN_INFO "%s: too few multicast addresses\n", dev->name);
+ 			break;
+@@ -1591,6 +1591,7 @@ static void eexp_setup_filter(struct net_device *dev)
+ 			printk(KERN_INFO "%s: invalid multicast address length given.\n", dev->name);
+ 			continue;
+ 		}
++		data = (unsigned short *)dmi->dmi_addr;
+ 		outw((CONF_MULTICAST+(6*i)) & ~31, ioaddr+SM_PTR);
+ 		outw(data[0], ioaddr+SHADOW(CONF_MULTICAST+(6*i)));
+ 		outw((CONF_MULTICAST+(6*i)+2) & ~31, ioaddr+SM_PTR);
+diff --git a/drivers/net/fs_enet/mii-fec.c b/drivers/net/fs_enet/mii-fec.c
+index ba75efc..f0014cf 100644
+--- a/drivers/net/fs_enet/mii-fec.c
++++ b/drivers/net/fs_enet/mii-fec.c
+@@ -194,7 +194,7 @@ static int __devinit fs_enet_mdio_probe(struct of_device *ofdev,
+ 
+ 	ret = of_address_to_resource(ofdev->node, 0, &res);
+ 	if (ret)
+-		return ret;
++		goto out_res;
+ 
+ 	snprintf(new_bus->id, MII_BUS_ID_SIZE, "%x", res.start);
+ 
+@@ -236,6 +236,7 @@ out_free_irqs:
+ 	kfree(new_bus->irq);
+ out_unmap_regs:
+ 	iounmap(fec->fecp);
++out_res:
+ out_fec:
+ 	kfree(fec);
+ out_mii:
+diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c
+index 587afe7..6f22f06 100644
+--- a/drivers/net/gianfar.c
++++ b/drivers/net/gianfar.c
+@@ -138,6 +138,7 @@ static int gfar_poll(struct napi_struct *napi, int budget);
+ static void gfar_netpoll(struct net_device *dev);
+ #endif
+ int gfar_clean_rx_ring(struct net_device *dev, int rx_work_limit);
++static int gfar_clean_tx_ring(struct net_device *dev);
+ static int gfar_process_frame(struct net_device *dev, struct sk_buff *skb, int length);
+ static void gfar_vlan_rx_register(struct net_device *netdev,
+ 		                struct vlan_group *grp);
+@@ -1141,7 +1142,7 @@ static int gfar_close(struct net_device *dev)
+ }
+ 
+ /* Changes the mac address if the controller is not running. */
+-int gfar_set_mac_address(struct net_device *dev)
++static int gfar_set_mac_address(struct net_device *dev)
+ {
+ 	gfar_set_mac_for_addr(dev, 0, dev->dev_addr);
+ 
+@@ -1260,7 +1261,7 @@ static void gfar_timeout(struct net_device *dev)
+ }
+ 
+ /* Interrupt Handler for Transmit complete */
+-int gfar_clean_tx_ring(struct net_device *dev)
++static int gfar_clean_tx_ring(struct net_device *dev)
+ {
+ 	struct txbd8 *bdp;
+ 	struct gfar_private *priv = netdev_priv(dev);
+diff --git a/drivers/net/gianfar.h b/drivers/net/gianfar.h
+index fd487be..27f37c8 100644
+--- a/drivers/net/gianfar.h
++++ b/drivers/net/gianfar.h
+@@ -782,5 +782,8 @@ extern void gfar_halt(struct net_device *dev);
+ extern void gfar_phy_test(struct mii_bus *bus, struct phy_device *phydev,
+ 		int enable, u32 regnum, u32 read);
+ void gfar_init_sysfs(struct net_device *dev);
++int gfar_local_mdio_write(struct gfar_mii __iomem *regs, int mii_id,
++			  int regnum, u16 value);
++int gfar_local_mdio_read(struct gfar_mii __iomem *regs, int mii_id, int regnum);
+ 
+ #endif /* __GIANFAR_H */
+diff --git a/drivers/net/gianfar_sysfs.c b/drivers/net/gianfar_sysfs.c
+index 230878b..5116f68 100644
+--- a/drivers/net/gianfar_sysfs.c
++++ b/drivers/net/gianfar_sysfs.c
+@@ -103,10 +103,10 @@ static ssize_t gfar_set_rx_stash_size(struct device *dev,
+ 
+ 	spin_lock_irqsave(&priv->rxlock, flags);
+ 	if (length > priv->rx_buffer_size)
+-		return count;
++		goto out;
+ 
+ 	if (length == priv->rx_stash_size)
+-		return count;
++		goto out;
+ 
+ 	priv->rx_stash_size = length;
+ 
+@@ -125,6 +125,7 @@ static ssize_t gfar_set_rx_stash_size(struct device *dev,
+ 
+ 	gfar_write(&priv->regs->attr, temp);
+ 
++out:
+ 	spin_unlock_irqrestore(&priv->rxlock, flags);
+ 
+ 	return count;
+@@ -154,10 +155,10 @@ static ssize_t gfar_set_rx_stash_index(struct device *dev,
+ 
+ 	spin_lock_irqsave(&priv->rxlock, flags);
+ 	if (index > priv->rx_stash_size)
+-		return count;
++		goto out;
+ 
+ 	if (index == priv->rx_stash_index)
+-		return count;
++		goto out;
+ 
+ 	priv->rx_stash_index = index;
+ 
+@@ -166,6 +167,7 @@ static ssize_t gfar_set_rx_stash_index(struct device *dev,
+ 	temp |= ATTRELI_EI(index);
+ 	gfar_write(&priv->regs->attreli, flags);
+ 
++out:
+ 	spin_unlock_irqrestore(&priv->rxlock, flags);
+ 
+ 	return count;
+diff --git a/drivers/net/irda/nsc-ircc.c b/drivers/net/irda/nsc-ircc.c
+index a873d2b..a7714da 100644
+--- a/drivers/net/irda/nsc-ircc.c
++++ b/drivers/net/irda/nsc-ircc.c
+@@ -100,7 +100,9 @@ static int nsc_ircc_probe_39x(nsc_chip_t *chip, chipio_t *info);
+ static int nsc_ircc_init_108(nsc_chip_t *chip, chipio_t *info);
+ static int nsc_ircc_init_338(nsc_chip_t *chip, chipio_t *info);
+ static int nsc_ircc_init_39x(nsc_chip_t *chip, chipio_t *info);
++#ifdef CONFIG_PNP
+ static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id);
++#endif
+ 
+ /* These are the known NSC chips */
+ static nsc_chip_t chips[] = {
+@@ -156,9 +158,11 @@ static const struct pnp_device_id nsc_ircc_pnp_table[] = {
+ MODULE_DEVICE_TABLE(pnp, nsc_ircc_pnp_table);
+ 
+ static struct pnp_driver nsc_ircc_pnp_driver = {
++#ifdef CONFIG_PNP
+ 	.name = "nsc-ircc",
+ 	.id_table = nsc_ircc_pnp_table,
+ 	.probe = nsc_ircc_pnp_probe,
++#endif
+ };
+ 
+ /* Some prototypes */
+@@ -916,6 +920,7 @@ static int nsc_ircc_probe_39x(nsc_chip_t *chip, chipio_t *info)
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_PNP
+ /* PNP probing */
+ static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *id)
+ {
+@@ -952,6 +957,7 @@ static int nsc_ircc_pnp_probe(struct pnp_dev *dev, const struct pnp_device_id *i
+ 
+ 	return 0;
+ }
++#endif
+ 
+ /*
+  * Function nsc_ircc_setup (info)
+diff --git a/drivers/net/irda/smsc-ircc2.c b/drivers/net/irda/smsc-ircc2.c
+index 1f26da7..cfe0194 100644
+--- a/drivers/net/irda/smsc-ircc2.c
++++ b/drivers/net/irda/smsc-ircc2.c
+@@ -376,6 +376,7 @@ MODULE_DEVICE_TABLE(pnp, smsc_ircc_pnp_table);
+ 
+ static int pnp_driver_registered;
+ 
++#ifdef CONFIG_PNP
+ static int __init smsc_ircc_pnp_probe(struct pnp_dev *dev,
+ 				      const struct pnp_device_id *dev_id)
+ {
+@@ -402,7 +403,9 @@ static struct pnp_driver smsc_ircc_pnp_driver = {
+ 	.id_table	= smsc_ircc_pnp_table,
+ 	.probe		= smsc_ircc_pnp_probe,
+ };
+-
++#else /* CONFIG_PNP */
++static struct pnp_driver smsc_ircc_pnp_driver;
++#endif
+ 
+ /*******************************************************************************
+  *
+diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
+index 2056cfc..c36a03a 100644
+--- a/drivers/net/macvlan.c
++++ b/drivers/net/macvlan.c
+@@ -450,7 +450,7 @@ static void macvlan_dellink(struct net_device *dev)
+ 	unregister_netdevice(dev);
+ 
+ 	if (list_empty(&port->vlans))
+-		macvlan_port_destroy(dev);
++		macvlan_port_destroy(port->dev);
+ }
+ 
+ static struct rtnl_link_ops macvlan_link_ops __read_mostly = {
+diff --git a/drivers/net/mlx4/mr.c b/drivers/net/mlx4/mr.c
+index cb46446..03a9abc 100644
+--- a/drivers/net/mlx4/mr.c
++++ b/drivers/net/mlx4/mr.c
+@@ -551,7 +551,7 @@ int mlx4_fmr_alloc(struct mlx4_dev *dev, u32 pd, u32 access, int max_pages,
+ 	u64 mtt_seg;
+ 	int err = -ENOMEM;
+ 
+-	if (page_shift < 12 || page_shift >= 32)
++	if (page_shift < (ffs(dev->caps.page_size_cap) - 1) || page_shift >= 32)
+ 		return -EINVAL;
+ 
+ 	/* All MTTs must fit in the same page */
+diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
+index 381b36e..b7915cd 100644
+--- a/drivers/net/mv643xx_eth.c
++++ b/drivers/net/mv643xx_eth.c
+@@ -91,6 +91,11 @@
+  */
+ #define PHY_ADDR_REG				0x0000
+ #define SMI_REG					0x0004
++#define WINDOW_BASE(i)				(0x0200 + ((i) << 3))
++#define WINDOW_SIZE(i)				(0x0204 + ((i) << 3))
++#define WINDOW_REMAP_HIGH(i)			(0x0280 + ((i) << 2))
++#define WINDOW_BAR_ENABLE			0x0290
++#define WINDOW_PROTECT(i)			(0x0294 + ((i) << 4))
+ 
+ /*
+  * Per-port registers.
+@@ -507,9 +512,23 @@ struct mv643xx_mib_counters {
+ 	u32 late_collision;
+ };
+ 
++struct mv643xx_shared_private {
++	void __iomem *eth_base;
++
++	/* used to protect SMI_REG, which is shared across ports */
++	spinlock_t phy_lock;
++
++	u32 win_protect;
++
++	unsigned int t_clk;
++};
++
+ struct mv643xx_private {
++	struct mv643xx_shared_private *shared;
+ 	int port_num;			/* User Ethernet port number	*/
+ 
++	struct mv643xx_shared_private *shared_smi;
++
+ 	u32 rx_sram_addr;		/* Base address of rx sram area */
+ 	u32 rx_sram_size;		/* Size of rx sram area		*/
+ 	u32 tx_sram_addr;		/* Base address of tx sram area */
+@@ -614,19 +633,14 @@ static const struct ethtool_ops mv643xx_ethtool_ops;
+ static char mv643xx_driver_name[] = "mv643xx_eth";
+ static char mv643xx_driver_version[] = "1.0";
+ 
+-static void __iomem *mv643xx_eth_base;
+-
+-/* used to protect SMI_REG, which is shared across ports */
+-static DEFINE_SPINLOCK(mv643xx_eth_phy_lock);
+-
+ static inline u32 rdl(struct mv643xx_private *mp, int offset)
+ {
+-	return readl(mv643xx_eth_base + offset);
++	return readl(mp->shared->eth_base + offset);
+ }
+ 
+ static inline void wrl(struct mv643xx_private *mp, int offset, u32 data)
+ {
+-	writel(data, mv643xx_eth_base + offset);
++	writel(data, mp->shared->eth_base + offset);
+ }
+ 
+ /*
+@@ -1119,7 +1133,6 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id)
+  *
+  * INPUT:
+  *	struct mv643xx_private *mp	Ethernet port
+- *	unsigned int t_clk		t_clk of the MV-643xx chip in HZ units
+  *	unsigned int delay		Delay in usec
+  *
+  * OUTPUT:
+@@ -1130,10 +1143,10 @@ static irqreturn_t mv643xx_eth_int_handler(int irq, void *dev_id)
+  *
+  */
+ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp,
+-					unsigned int t_clk, unsigned int delay)
++					unsigned int delay)
+ {
+ 	unsigned int port_num = mp->port_num;
+-	unsigned int coal = ((t_clk / 1000000) * delay) / 64;
++	unsigned int coal = ((mp->shared->t_clk / 1000000) * delay) / 64;
+ 
+ 	/* Set RX Coalescing mechanism */
+ 	wrl(mp, SDMA_CONFIG_REG(port_num),
+@@ -1158,7 +1171,6 @@ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp,
+  *
+  * INPUT:
+  *	struct mv643xx_private *mp	Ethernet port
+- *	unsigned int t_clk		t_clk of the MV-643xx chip in HZ units
+  *	unsigned int delay		Delay in uSeconds
+  *
+  * OUTPUT:
+@@ -1169,9 +1181,9 @@ static unsigned int eth_port_set_rx_coal(struct mv643xx_private *mp,
+  *
+  */
+ static unsigned int eth_port_set_tx_coal(struct mv643xx_private *mp,
+-					unsigned int t_clk, unsigned int delay)
++					unsigned int delay)
+ {
+-	unsigned int coal = ((t_clk / 1000000) * delay) / 64;
++	unsigned int coal = ((mp->shared->t_clk / 1000000) * delay) / 64;
+ 
+ 	/* Set TX Coalescing mechanism */
+ 	wrl(mp, TX_FIFO_URGENT_THRESHOLD_REG(mp->port_num), coal << 4);
+@@ -1413,11 +1425,11 @@ static int mv643xx_eth_open(struct net_device *dev)
+ 
+ #ifdef MV643XX_COAL
+ 	mp->rx_int_coal =
+-		eth_port_set_rx_coal(mp, 133000000, MV643XX_RX_COAL);
++		eth_port_set_rx_coal(mp, MV643XX_RX_COAL);
+ #endif
+ 
+ 	mp->tx_int_coal =
+-		eth_port_set_tx_coal(mp, 133000000, MV643XX_TX_COAL);
++		eth_port_set_tx_coal(mp, MV643XX_TX_COAL);
+ 
+ 	/* Unmask phy and link status changes interrupts */
+ 	wrl(mp, INTERRUPT_EXTEND_MASK_REG(port_num), ETH_INT_UNMASK_ALL_EXT);
+@@ -1827,6 +1839,11 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
+ 		return -ENODEV;
+ 	}
+ 
++	if (pd->shared == NULL) {
++		printk(KERN_ERR "No mv643xx_eth_platform_data->shared\n");
++		return -ENODEV;
++	}
++
+ 	dev = alloc_etherdev(sizeof(struct mv643xx_private));
+ 	if (!dev)
+ 		return -ENOMEM;
+@@ -1877,8 +1894,16 @@ static int mv643xx_eth_probe(struct platform_device *pdev)
+ 
+ 	spin_lock_init(&mp->lock);
+ 
++	mp->shared = platform_get_drvdata(pd->shared);
+ 	port_num = mp->port_num = pd->port_number;
+ 
++	if (mp->shared->win_protect)
++		wrl(mp, WINDOW_PROTECT(port_num), mp->shared->win_protect);
++
++	mp->shared_smi = mp->shared;
++	if (pd->shared_smi != NULL)
++		mp->shared_smi = platform_get_drvdata(pd->shared_smi);
++
+ 	/* set default config values */
+ 	eth_port_uc_addr_get(mp, dev->dev_addr);
+ 	mp->rx_ring_size = PORT_DEFAULT_RECEIVE_QUEUE_SIZE;
+@@ -1983,30 +2008,91 @@ static int mv643xx_eth_remove(struct platform_device *pdev)
+ 	return 0;
+ }
+ 
++static void mv643xx_eth_conf_mbus_windows(struct mv643xx_shared_private *msp,
++					  struct mbus_dram_target_info *dram)
++{
++	void __iomem *base = msp->eth_base;
++	u32 win_enable;
++	u32 win_protect;
++	int i;
++
++	for (i = 0; i < 6; i++) {
++		writel(0, base + WINDOW_BASE(i));
++		writel(0, base + WINDOW_SIZE(i));
++		if (i < 4)
++			writel(0, base + WINDOW_REMAP_HIGH(i));
++	}
++
++	win_enable = 0x3f;
++	win_protect = 0;
++
++	for (i = 0; i < dram->num_cs; i++) {
++		struct mbus_dram_window *cs = dram->cs + i;
++
++		writel((cs->base & 0xffff0000) |
++			(cs->mbus_attr << 8) |
++			dram->mbus_dram_target_id, base + WINDOW_BASE(i));
++		writel((cs->size - 1) & 0xffff0000, base + WINDOW_SIZE(i));
++
++		win_enable &= ~(1 << i);
++		win_protect |= 3 << (2 * i);
++	}
++
++	writel(win_enable, base + WINDOW_BAR_ENABLE);
++	msp->win_protect = win_protect;
++}
++
+ static int mv643xx_eth_shared_probe(struct platform_device *pdev)
+ {
+ 	static int mv643xx_version_printed = 0;
++	struct mv643xx_eth_shared_platform_data *pd = pdev->dev.platform_data;
++	struct mv643xx_shared_private *msp;
+ 	struct resource *res;
++	int ret;
+ 
+ 	if (!mv643xx_version_printed++)
+ 		printk(KERN_NOTICE "MV-643xx 10/100/1000 Ethernet Driver\n");
+ 
++	ret = -EINVAL;
+ 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ 	if (res == NULL)
+-		return -ENODEV;
++		goto out;
+ 
+-	mv643xx_eth_base = ioremap(res->start, res->end - res->start + 1);
+-	if (mv643xx_eth_base == NULL)
+-		return -ENOMEM;
++	ret = -ENOMEM;
++	msp = kmalloc(sizeof(*msp), GFP_KERNEL);
++	if (msp == NULL)
++		goto out;
++	memset(msp, 0, sizeof(*msp));
++
++	msp->eth_base = ioremap(res->start, res->end - res->start + 1);
++	if (msp->eth_base == NULL)
++		goto out_free;
++
++	spin_lock_init(&msp->phy_lock);
++	msp->t_clk = (pd != NULL && pd->t_clk != 0) ? pd->t_clk : 133000000;
++
++	platform_set_drvdata(pdev, msp);
++
++	/*
++	 * (Re-)program MBUS remapping windows if we are asked to.
++	 */
++	if (pd != NULL && pd->dram != NULL)
++		mv643xx_eth_conf_mbus_windows(msp, pd->dram);
+ 
+ 	return 0;
+ 
++out_free:
++	kfree(msp);
++out:
++	return ret;
+ }
+ 
+ static int mv643xx_eth_shared_remove(struct platform_device *pdev)
+ {
+-	iounmap(mv643xx_eth_base);
+-	mv643xx_eth_base = NULL;
++	struct mv643xx_shared_private *msp = platform_get_drvdata(pdev);
++
++	iounmap(msp->eth_base);
++	kfree(msp);
+ 
+ 	return 0;
+ }
+@@ -2906,15 +2992,16 @@ static void eth_port_reset(struct mv643xx_private *mp)
+ static void eth_port_read_smi_reg(struct mv643xx_private *mp,
+ 				unsigned int phy_reg, unsigned int *value)
+ {
++	void __iomem *smi_reg = mp->shared_smi->eth_base + SMI_REG;
+ 	int phy_addr = ethernet_phy_get(mp);
+ 	unsigned long flags;
+ 	int i;
+ 
+ 	/* the SMI register is a shared resource */
+-	spin_lock_irqsave(&mv643xx_eth_phy_lock, flags);
++	spin_lock_irqsave(&mp->shared_smi->phy_lock, flags);
+ 
+ 	/* wait for the SMI register to become available */
+-	for (i = 0; rdl(mp, SMI_REG) & ETH_SMI_BUSY; i++) {
++	for (i = 0; readl(smi_reg) & ETH_SMI_BUSY; i++) {
+ 		if (i == PHY_WAIT_ITERATIONS) {
+ 			printk("%s: PHY busy timeout\n", mp->dev->name);
+ 			goto out;
+@@ -2922,11 +3009,11 @@ static void eth_port_read_smi_reg(struct mv643xx_private *mp,
+ 		udelay(PHY_WAIT_MICRO_SECONDS);
+ 	}
+ 
+-	wrl(mp, SMI_REG,
+-		(phy_addr << 16) | (phy_reg << 21) | ETH_SMI_OPCODE_READ);
++	writel((phy_addr << 16) | (phy_reg << 21) | ETH_SMI_OPCODE_READ,
++		smi_reg);
+ 
+ 	/* now wait for the data to be valid */
+-	for (i = 0; !(rdl(mp, SMI_REG) & ETH_SMI_READ_VALID); i++) {
++	for (i = 0; !(readl(smi_reg) & ETH_SMI_READ_VALID); i++) {
+ 		if (i == PHY_WAIT_ITERATIONS) {
+ 			printk("%s: PHY read timeout\n", mp->dev->name);
+ 			goto out;
+@@ -2934,9 +3021,9 @@ static void eth_port_read_smi_reg(struct mv643xx_private *mp,
+ 		udelay(PHY_WAIT_MICRO_SECONDS);
+ 	}
+ 
+-	*value = rdl(mp, SMI_REG) & 0xffff;
++	*value = readl(smi_reg) & 0xffff;
+ out:
+-	spin_unlock_irqrestore(&mv643xx_eth_phy_lock, flags);
++	spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags);
+ }
+ 
+ /*
+@@ -2962,17 +3049,16 @@ out:
+ static void eth_port_write_smi_reg(struct mv643xx_private *mp,
+ 				   unsigned int phy_reg, unsigned int value)
+ {
+-	int phy_addr;
+-	int i;
++	void __iomem *smi_reg = mp->shared_smi->eth_base + SMI_REG;
++	int phy_addr = ethernet_phy_get(mp);
+ 	unsigned long flags;
+-
+-	phy_addr = ethernet_phy_get(mp);
++	int i;
+ 
+ 	/* the SMI register is a shared resource */
+-	spin_lock_irqsave(&mv643xx_eth_phy_lock, flags);
++	spin_lock_irqsave(&mp->shared_smi->phy_lock, flags);
+ 
+ 	/* wait for the SMI register to become available */
+-	for (i = 0; rdl(mp, SMI_REG) & ETH_SMI_BUSY; i++) {
++	for (i = 0; readl(smi_reg) & ETH_SMI_BUSY; i++) {
+ 		if (i == PHY_WAIT_ITERATIONS) {
+ 			printk("%s: PHY busy timeout\n", mp->dev->name);
+ 			goto out;
+@@ -2980,10 +3066,10 @@ static void eth_port_write_smi_reg(struct mv643xx_private *mp,
+ 		udelay(PHY_WAIT_MICRO_SECONDS);
+ 	}
+ 
+-	wrl(mp, SMI_REG, (phy_addr << 16) | (phy_reg << 21) |
+-				ETH_SMI_OPCODE_WRITE | (value & 0xffff));
++	writel((phy_addr << 16) | (phy_reg << 21) |
++		ETH_SMI_OPCODE_WRITE | (value & 0xffff), smi_reg);
+ out:
+-	spin_unlock_irqrestore(&mv643xx_eth_phy_lock, flags);
++	spin_unlock_irqrestore(&mp->shared_smi->phy_lock, flags);
+ }
+ 
+ /*
+diff --git a/drivers/net/niu.c b/drivers/net/niu.c
+index 4009c4c..57cfd72 100644
+--- a/drivers/net/niu.c
++++ b/drivers/net/niu.c
+@@ -1,6 +1,6 @@
+ /* niu.c: Neptune ethernet driver.
+  *
+- * Copyright (C) 2007 David S. Miller (davem at davemloft.net)
++ * Copyright (C) 2007, 2008 David S. Miller (davem at davemloft.net)
+  */
+ 
+ #include <linux/module.h>
+@@ -33,8 +33,8 @@
+ 
+ #define DRV_MODULE_NAME		"niu"
+ #define PFX DRV_MODULE_NAME	": "
+-#define DRV_MODULE_VERSION	"0.8"
+-#define DRV_MODULE_RELDATE	"April 24, 2008"
++#define DRV_MODULE_VERSION	"0.9"
++#define DRV_MODULE_RELDATE	"May 4, 2008"
+ 
+ static char version[] __devinitdata =
+ 	DRV_MODULE_NAME ".c:v" DRV_MODULE_VERSION " (" DRV_MODULE_RELDATE ")\n";
+@@ -7264,8 +7264,11 @@ static int __devinit niu_get_and_validate_port(struct niu *np)
+ 				parent->num_ports = nr64(ESPC_NUM_PORTS_MACS) &
+ 					ESPC_NUM_PORTS_MACS_VAL;
+ 
++				/* All of the current probing methods fail on
++				 * Maramba on-board parts.
++				 */
+ 				if (!parent->num_ports)
+-					return -ENODEV;
++					parent->num_ports = 4;
+ 			}
+ 		}
+ 	}
+diff --git a/drivers/net/pcnet32.c b/drivers/net/pcnet32.c
+index 4eb322e..a1c454d 100644
+--- a/drivers/net/pcnet32.c
++++ b/drivers/net/pcnet32.c
+@@ -22,12 +22,8 @@
+  *************************************************************************/
+ 
+ #define DRV_NAME	"pcnet32"
+-#ifdef CONFIG_PCNET32_NAPI
+-#define DRV_VERSION	"1.34-NAPI"
+-#else
+-#define DRV_VERSION	"1.34"
+-#endif
+-#define DRV_RELDATE	"14.Aug.2007"
++#define DRV_VERSION	"1.35"
++#define DRV_RELDATE	"21.Apr.2008"
+ #define PFX		DRV_NAME ": "
+ 
+ static const char *const version =
+@@ -445,30 +441,24 @@ static struct pcnet32_access pcnet32_dwio = {
+ 
+ static void pcnet32_netif_stop(struct net_device *dev)
+ {
+-#ifdef CONFIG_PCNET32_NAPI
+ 	struct pcnet32_private *lp = netdev_priv(dev);
+-#endif
++
+ 	dev->trans_start = jiffies;
+-#ifdef CONFIG_PCNET32_NAPI
+ 	napi_disable(&lp->napi);
+-#endif
+ 	netif_tx_disable(dev);
+ }
+ 
+ static void pcnet32_netif_start(struct net_device *dev)
+ {
+-#ifdef CONFIG_PCNET32_NAPI
+ 	struct pcnet32_private *lp = netdev_priv(dev);
+ 	ulong ioaddr = dev->base_addr;
+ 	u16 val;
+-#endif
++
+ 	netif_wake_queue(dev);
+-#ifdef CONFIG_PCNET32_NAPI
+ 	val = lp->a.read_csr(ioaddr, CSR3);
+ 	val &= 0x00ff;
+ 	lp->a.write_csr(ioaddr, CSR3, val);
+ 	napi_enable(&lp->napi);
+-#endif
+ }
+ 
+ /*
+@@ -911,11 +901,7 @@ static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1)
+ 	rc = 1;			/* default to fail */
+ 
+ 	if (netif_running(dev))
+-#ifdef CONFIG_PCNET32_NAPI
+ 		pcnet32_netif_stop(dev);
+-#else
+-		pcnet32_close(dev);
+-#endif
+ 
+ 	spin_lock_irqsave(&lp->lock, flags);
+ 	lp->a.write_csr(ioaddr, CSR0, CSR0_STOP);	/* stop the chip */
+@@ -1046,7 +1032,6 @@ static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1)
+ 	x = a->read_bcr(ioaddr, 32);	/* reset internal loopback */
+ 	a->write_bcr(ioaddr, 32, (x & ~0x0002));
+ 
+-#ifdef CONFIG_PCNET32_NAPI
+ 	if (netif_running(dev)) {
+ 		pcnet32_netif_start(dev);
+ 		pcnet32_restart(dev, CSR0_NORMAL);
+@@ -1055,16 +1040,6 @@ static int pcnet32_loopback_test(struct net_device *dev, uint64_t * data1)
+ 		lp->a.write_bcr(ioaddr, 20, 4);	/* return to 16bit mode */
+ 	}
+ 	spin_unlock_irqrestore(&lp->lock, flags);
+-#else
+-	if (netif_running(dev)) {
+-		spin_unlock_irqrestore(&lp->lock, flags);
+-		pcnet32_open(dev);
+-	} else {
+-		pcnet32_purge_rx_ring(dev);
+-		lp->a.write_bcr(ioaddr, 20, 4);	/* return to 16bit mode */
+-		spin_unlock_irqrestore(&lp->lock, flags);
+-	}
+-#endif
+ 
+ 	return (rc);
+ }				/* end pcnet32_loopback_test  */
+@@ -1270,11 +1245,7 @@ static void pcnet32_rx_entry(struct net_device *dev,
+ 	}
+ 	dev->stats.rx_bytes += skb->len;
+ 	skb->protocol = eth_type_trans(skb, dev);
+-#ifdef CONFIG_PCNET32_NAPI
+ 	netif_receive_skb(skb);
+-#else
+-	netif_rx(skb);
+-#endif
+ 	dev->last_rx = jiffies;
+ 	dev->stats.rx_packets++;
+ 	return;
+@@ -1403,7 +1374,6 @@ static int pcnet32_tx(struct net_device *dev)
+ 	return must_restart;
+ }
+ 
+-#ifdef CONFIG_PCNET32_NAPI
+ static int pcnet32_poll(struct napi_struct *napi, int budget)
+ {
+ 	struct pcnet32_private *lp = container_of(napi, struct pcnet32_private, napi);
+@@ -1442,7 +1412,6 @@ static int pcnet32_poll(struct napi_struct *napi, int budget)
+ 	}
+ 	return work_done;
+ }
+-#endif
+ 
+ #define PCNET32_REGS_PER_PHY	32
+ #define PCNET32_MAX_PHYS	32
+@@ -1864,9 +1833,7 @@ pcnet32_probe1(unsigned long ioaddr, int shared, struct pci_dev *pdev)
+ 	/* napi.weight is used in both the napi and non-napi cases */
+ 	lp->napi.weight = lp->rx_ring_size / 2;
+ 
+-#ifdef CONFIG_PCNET32_NAPI
+ 	netif_napi_add(dev, &lp->napi, pcnet32_poll, lp->rx_ring_size / 2);
+-#endif
+ 
+ 	if (fdx && !(lp->options & PCNET32_PORT_ASEL) &&
+ 	    ((cards_found >= MAX_UNITS) || full_duplex[cards_found]))
+@@ -2297,9 +2264,7 @@ static int pcnet32_open(struct net_device *dev)
+ 		goto err_free_ring;
+ 	}
+ 
+-#ifdef CONFIG_PCNET32_NAPI
+ 	napi_enable(&lp->napi);
+-#endif
+ 
+ 	/* Re-initialize the PCNET32, and start it when done. */
+ 	lp->a.write_csr(ioaddr, 1, (lp->init_dma_addr & 0xffff));
+@@ -2623,7 +2588,6 @@ pcnet32_interrupt(int irq, void *dev_id)
+ 				       dev->name, csr0);
+ 			/* unlike for the lance, there is no restart needed */
+ 		}
+-#ifdef CONFIG_PCNET32_NAPI
+ 		if (netif_rx_schedule_prep(dev, &lp->napi)) {
+ 			u16 val;
+ 			/* set interrupt masks */
+@@ -2634,24 +2598,9 @@ pcnet32_interrupt(int irq, void *dev_id)
+ 			__netif_rx_schedule(dev, &lp->napi);
+ 			break;
+ 		}
+-#else
+-		pcnet32_rx(dev, lp->napi.weight);
+-		if (pcnet32_tx(dev)) {
+-			/* reset the chip to clear the error condition, then restart */
+-			lp->a.reset(ioaddr);
+-			lp->a.write_csr(ioaddr, CSR4, 0x0915); /* auto tx pad */
+-			pcnet32_restart(dev, CSR0_START);
+-			netif_wake_queue(dev);
+-		}
+-#endif
+ 		csr0 = lp->a.read_csr(ioaddr, CSR0);
+ 	}
+ 
+-#ifndef CONFIG_PCNET32_NAPI
+-	/* Set interrupt enable. */
+-	lp->a.write_csr(ioaddr, CSR0, CSR0_INTEN);
+-#endif
+-
+ 	if (netif_msg_intr(lp))
+ 		printk(KERN_DEBUG "%s: exiting interrupt, csr0=%#4.4x.\n",
+ 		       dev->name, lp->a.read_csr(ioaddr, CSR0));
+@@ -2670,9 +2619,7 @@ static int pcnet32_close(struct net_device *dev)
+ 	del_timer_sync(&lp->watchdog_timer);
+ 
+ 	netif_stop_queue(dev);
+-#ifdef CONFIG_PCNET32_NAPI
+ 	napi_disable(&lp->napi);
+-#endif
+ 
+ 	spin_lock_irqsave(&lp->lock, flags);
+ 
+diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
+index 3c18bb5..45cc291 100644
+--- a/drivers/net/phy/phy.c
++++ b/drivers/net/phy/phy.c
+@@ -547,7 +547,7 @@ static void phy_force_reduction(struct phy_device *phydev)
+  * Must not be called from interrupt context, or while the
+  * phydev->lock is held.
+  */
+-void phy_error(struct phy_device *phydev)
++static void phy_error(struct phy_device *phydev)
+ {
+ 	mutex_lock(&phydev->lock);
+ 	phydev->state = PHY_HALTED;
+diff --git a/drivers/net/tulip/uli526x.c b/drivers/net/tulip/uli526x.c
+index a59c1f2..2511ca7 100644
+--- a/drivers/net/tulip/uli526x.c
++++ b/drivers/net/tulip/uli526x.c
+@@ -434,10 +434,6 @@ static int uli526x_open(struct net_device *dev)
+ 
+ 	ULI526X_DBUG(0, "uli526x_open", 0);
+ 
+-	ret = request_irq(dev->irq, &uli526x_interrupt, IRQF_SHARED, dev->name, dev);
+-	if (ret)
+-		return ret;
+-
+ 	/* system variable init */
+ 	db->cr6_data = CR6_DEFAULT | uli526x_cr6_user_set;
+ 	db->tx_packet_cnt = 0;
+@@ -456,6 +452,10 @@ static int uli526x_open(struct net_device *dev)
+ 	/* Initialize ULI526X board */
+ 	uli526x_init(dev);
+ 
++	ret = request_irq(dev->irq, &uli526x_interrupt, IRQF_SHARED, dev->name, dev);
++	if (ret)
++		return ret;
++
+ 	/* Active System Interface */
+ 	netif_wake_queue(dev);
+ 
+@@ -1368,6 +1368,12 @@ static void update_cr6(u32 cr6_data, unsigned long ioaddr)
+  *	This setup frame initialize ULI526X address filter mode
+  */
+ 
++#ifdef __BIG_ENDIAN
++#define FLT_SHIFT 16
++#else
++#define FLT_SHIFT 0
++#endif
++
+ static void send_filter_frame(struct net_device *dev, int mc_cnt)
+ {
+ 	struct uli526x_board_info *db = netdev_priv(dev);
+@@ -1384,27 +1390,27 @@ static void send_filter_frame(struct net_device *dev, int mc_cnt)
+ 
+ 	/* Node address */
+ 	addrptr = (u16 *) dev->dev_addr;
+-	*suptr++ = addrptr[0];
+-	*suptr++ = addrptr[1];
+-	*suptr++ = addrptr[2];
++	*suptr++ = addrptr[0] << FLT_SHIFT;
++	*suptr++ = addrptr[1] << FLT_SHIFT;
++	*suptr++ = addrptr[2] << FLT_SHIFT;
+ 
+ 	/* broadcast address */
+-	*suptr++ = 0xffff;
+-	*suptr++ = 0xffff;
+-	*suptr++ = 0xffff;
++	*suptr++ = 0xffff << FLT_SHIFT;
++	*suptr++ = 0xffff << FLT_SHIFT;
++	*suptr++ = 0xffff << FLT_SHIFT;
+ 
+ 	/* fit the multicast address */
+ 	for (mcptr = dev->mc_list, i = 0; i < mc_cnt; i++, mcptr = mcptr->next) {
+ 		addrptr = (u16 *) mcptr->dmi_addr;
+-		*suptr++ = addrptr[0];
+-		*suptr++ = addrptr[1];
+-		*suptr++ = addrptr[2];
++		*suptr++ = addrptr[0] << FLT_SHIFT;
++		*suptr++ = addrptr[1] << FLT_SHIFT;
++		*suptr++ = addrptr[2] << FLT_SHIFT;
+ 	}
+ 
+ 	for (; i<14; i++) {
+-		*suptr++ = 0xffff;
+-		*suptr++ = 0xffff;
+-		*suptr++ = 0xffff;
++		*suptr++ = 0xffff << FLT_SHIFT;
++		*suptr++ = 0xffff << FLT_SHIFT;
++		*suptr++ = 0xffff << FLT_SHIFT;
+ 	}
+ 
+ 	/* prepare the setup frame */
+diff --git a/drivers/net/ucc_geth.c b/drivers/net/ucc_geth.c
+index 281ce3d..ca0bdac 100644
+--- a/drivers/net/ucc_geth.c
++++ b/drivers/net/ucc_geth.c
+@@ -62,7 +62,6 @@
+ #endif				/* UGETH_VERBOSE_DEBUG */
+ #define UGETH_MSG_DEFAULT	(NETIF_MSG_IFUP << 1 ) - 1
+ 
+-void uec_set_ethtool_ops(struct net_device *netdev);
+ 
+ static DEFINE_SPINLOCK(ugeth_lock);
+ 
+@@ -216,7 +215,8 @@ static struct list_head *dequeue(struct list_head *lh)
+ 	}
+ }
+ 
+-static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth, u8 *bd)
++static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth,
++		u8 __iomem *bd)
+ {
+ 	struct sk_buff *skb = NULL;
+ 
+@@ -236,21 +236,22 @@ static struct sk_buff *get_new_skb(struct ucc_geth_private *ugeth, u8 *bd)
+ 
+ 	skb->dev = ugeth->dev;
+ 
+-	out_be32(&((struct qe_bd *)bd)->buf,
++	out_be32(&((struct qe_bd __iomem *)bd)->buf,
+ 		      dma_map_single(NULL,
+ 				     skb->data,
+ 				     ugeth->ug_info->uf_info.max_rx_buf_length +
+ 				     UCC_GETH_RX_DATA_BUF_ALIGNMENT,
+ 				     DMA_FROM_DEVICE));
+ 
+-	out_be32((u32 *)bd, (R_E | R_I | (in_be32((u32 *)bd) & R_W)));
++	out_be32((u32 __iomem *)bd,
++			(R_E | R_I | (in_be32((u32 __iomem*)bd) & R_W)));
+ 
+ 	return skb;
+ }
+ 
+ static int rx_bd_buffer_set(struct ucc_geth_private *ugeth, u8 rxQ)
+ {
+-	u8 *bd;
++	u8 __iomem *bd;
+ 	u32 bd_status;
+ 	struct sk_buff *skb;
+ 	int i;
+@@ -259,7 +260,7 @@ static int rx_bd_buffer_set(struct ucc_geth_private *ugeth, u8 rxQ)
+ 	i = 0;
+ 
+ 	do {
+-		bd_status = in_be32((u32*)bd);
++		bd_status = in_be32((u32 __iomem *)bd);
+ 		skb = get_new_skb(ugeth, bd);
+ 
+ 		if (!skb)	/* If can not allocate data buffer,
+@@ -277,7 +278,7 @@ static int rx_bd_buffer_set(struct ucc_geth_private *ugeth, u8 rxQ)
+ }
+ 
+ static int fill_init_enet_entries(struct ucc_geth_private *ugeth,
+-				  volatile u32 *p_start,
++				  u32 *p_start,
+ 				  u8 num_entries,
+ 				  u32 thread_size,
+ 				  u32 thread_alignment,
+@@ -316,7 +317,7 @@ static int fill_init_enet_entries(struct ucc_geth_private *ugeth,
+ }
+ 
+ static int return_init_enet_entries(struct ucc_geth_private *ugeth,
+-				    volatile u32 *p_start,
++				    u32 *p_start,
+ 				    u8 num_entries,
+ 				    enum qe_risc_allocation risc,
+ 				    int skip_page_for_first_entry)
+@@ -326,21 +327,22 @@ static int return_init_enet_entries(struct ucc_geth_private *ugeth,
+ 	int snum;
+ 
+ 	for (i = 0; i < num_entries; i++) {
++		u32 val = *p_start;
++
+ 		/* Check that this entry was actually valid --
+ 		needed in case failed in allocations */
+-		if ((*p_start & ENET_INIT_PARAM_RISC_MASK) == risc) {
++		if ((val & ENET_INIT_PARAM_RISC_MASK) == risc) {
+ 			snum =
+-			    (u32) (*p_start & ENET_INIT_PARAM_SNUM_MASK) >>
++			    (u32) (val & ENET_INIT_PARAM_SNUM_MASK) >>
+ 			    ENET_INIT_PARAM_SNUM_SHIFT;
+ 			qe_put_snum((u8) snum);
+ 			if (!((i == 0) && skip_page_for_first_entry)) {
+ 			/* First entry of Rx does not have page */
+ 				init_enet_offset =
+-				    (in_be32(p_start) &
+-				     ENET_INIT_PARAM_PTR_MASK);
++				    (val & ENET_INIT_PARAM_PTR_MASK);
+ 				qe_muram_free(init_enet_offset);
+ 			}
+-			*(p_start++) = 0;	/* Just for cosmetics */
++			*p_start++ = 0;
+ 		}
+ 	}
+ 
+@@ -349,7 +351,7 @@ static int return_init_enet_entries(struct ucc_geth_private *ugeth,
+ 
+ #ifdef DEBUG
+ static int dump_init_enet_entries(struct ucc_geth_private *ugeth,
+-				  volatile u32 *p_start,
++				  u32 __iomem *p_start,
+ 				  u8 num_entries,
+ 				  u32 thread_size,
+ 				  enum qe_risc_allocation risc,
+@@ -360,11 +362,13 @@ static int dump_init_enet_entries(struct ucc_geth_private *ugeth,
+ 	int snum;
+ 
+ 	for (i = 0; i < num_entries; i++) {
++		u32 val = in_be32(p_start);
++
+ 		/* Check that this entry was actually valid --
+ 		needed in case failed in allocations */
+-		if ((*p_start & ENET_INIT_PARAM_RISC_MASK) == risc) {
++		if ((val & ENET_INIT_PARAM_RISC_MASK) == risc) {
+ 			snum =
+-			    (u32) (*p_start & ENET_INIT_PARAM_SNUM_MASK) >>
++			    (u32) (val & ENET_INIT_PARAM_SNUM_MASK) >>
+ 			    ENET_INIT_PARAM_SNUM_SHIFT;
+ 			qe_put_snum((u8) snum);
+ 			if (!((i == 0) && skip_page_for_first_entry)) {
+@@ -440,7 +444,7 @@ static int hw_add_addr_in_paddr(struct ucc_geth_private *ugeth,
+ 
+ static int hw_clear_addr_in_paddr(struct ucc_geth_private *ugeth, u8 paddr_num)
+ {
+-	struct ucc_geth_82xx_address_filtering_pram *p_82xx_addr_filt;
++	struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt;
+ 
+ 	if (!(paddr_num < NUM_OF_PADDRS)) {
+ 		ugeth_warn("%s: Illagel paddr_num.", __FUNCTION__);
+@@ -448,7 +452,7 @@ static int hw_clear_addr_in_paddr(struct ucc_geth_private *ugeth, u8 paddr_num)
+ 	}
+ 
+ 	p_82xx_addr_filt =
+-	    (struct ucc_geth_82xx_address_filtering_pram *) ugeth->p_rx_glbl_pram->
++	    (struct ucc_geth_82xx_address_filtering_pram __iomem *) ugeth->p_rx_glbl_pram->
+ 	    addressfiltering;
+ 
+ 	/* Writing address ff.ff.ff.ff.ff.ff disables address
+@@ -463,11 +467,11 @@ static int hw_clear_addr_in_paddr(struct ucc_geth_private *ugeth, u8 paddr_num)
+ static void hw_add_addr_in_hash(struct ucc_geth_private *ugeth,
+                                 u8 *p_enet_addr)
+ {
+-	struct ucc_geth_82xx_address_filtering_pram *p_82xx_addr_filt;
++	struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt;
+ 	u32 cecr_subblock;
+ 
+ 	p_82xx_addr_filt =
+-	    (struct ucc_geth_82xx_address_filtering_pram *) ugeth->p_rx_glbl_pram->
++	    (struct ucc_geth_82xx_address_filtering_pram __iomem *) ugeth->p_rx_glbl_pram->
+ 	    addressfiltering;
+ 
+ 	cecr_subblock =
+@@ -487,7 +491,7 @@ static void hw_add_addr_in_hash(struct ucc_geth_private *ugeth,
+ static void magic_packet_detection_enable(struct ucc_geth_private *ugeth)
+ {
+ 	struct ucc_fast_private *uccf;
+-	struct ucc_geth *ug_regs;
++	struct ucc_geth __iomem *ug_regs;
+ 	u32 maccfg2, uccm;
+ 
+ 	uccf = ugeth->uccf;
+@@ -507,7 +511,7 @@ static void magic_packet_detection_enable(struct ucc_geth_private *ugeth)
+ static void magic_packet_detection_disable(struct ucc_geth_private *ugeth)
+ {
+ 	struct ucc_fast_private *uccf;
+-	struct ucc_geth *ug_regs;
++	struct ucc_geth __iomem *ug_regs;
+ 	u32 maccfg2, uccm;
+ 
+ 	uccf = ugeth->uccf;
+@@ -538,13 +542,13 @@ static void get_statistics(struct ucc_geth_private *ugeth,
+ 			   rx_firmware_statistics,
+ 			   struct ucc_geth_hardware_statistics *hardware_statistics)
+ {
+-	struct ucc_fast *uf_regs;
+-	struct ucc_geth *ug_regs;
++	struct ucc_fast __iomem *uf_regs;
++	struct ucc_geth __iomem *ug_regs;
+ 	struct ucc_geth_tx_firmware_statistics_pram *p_tx_fw_statistics_pram;
+ 	struct ucc_geth_rx_firmware_statistics_pram *p_rx_fw_statistics_pram;
+ 
+ 	ug_regs = ugeth->ug_regs;
+-	uf_regs = (struct ucc_fast *) ug_regs;
++	uf_regs = (struct ucc_fast __iomem *) ug_regs;
+ 	p_tx_fw_statistics_pram = ugeth->p_tx_fw_statistics_pram;
+ 	p_rx_fw_statistics_pram = ugeth->p_rx_fw_statistics_pram;
+ 
+@@ -1132,9 +1136,9 @@ static void dump_regs(struct ucc_geth_private *ugeth)
+ }
+ #endif /* DEBUG */
+ 
+-static void init_default_reg_vals(volatile u32 *upsmr_register,
+-				  volatile u32 *maccfg1_register,
+-				  volatile u32 *maccfg2_register)
++static void init_default_reg_vals(u32 __iomem *upsmr_register,
++				  u32 __iomem *maccfg1_register,
++				  u32 __iomem *maccfg2_register)
+ {
+ 	out_be32(upsmr_register, UCC_GETH_UPSMR_INIT);
+ 	out_be32(maccfg1_register, UCC_GETH_MACCFG1_INIT);
+@@ -1148,7 +1152,7 @@ static int init_half_duplex_params(int alt_beb,
+ 				   u8 alt_beb_truncation,
+ 				   u8 max_retransmissions,
+ 				   u8 collision_window,
+-				   volatile u32 *hafdup_register)
++				   u32 __iomem *hafdup_register)
+ {
+ 	u32 value = 0;
+ 
+@@ -1180,7 +1184,7 @@ static int init_inter_frame_gap_params(u8 non_btb_cs_ipg,
+ 				       u8 non_btb_ipg,
+ 				       u8 min_ifg,
+ 				       u8 btb_ipg,
+-				       volatile u32 *ipgifg_register)
++				       u32 __iomem *ipgifg_register)
+ {
+ 	u32 value = 0;
+ 
+@@ -1215,9 +1219,9 @@ int init_flow_control_params(u32 automatic_flow_control_mode,
+ 				    int tx_flow_control_enable,
+ 				    u16 pause_period,
+ 				    u16 extension_field,
+-				    volatile u32 *upsmr_register,
+-				    volatile u32 *uempr_register,
+-				    volatile u32 *maccfg1_register)
++				    u32 __iomem *upsmr_register,
++				    u32 __iomem *uempr_register,
++				    u32 __iomem *maccfg1_register)
+ {
+ 	u32 value = 0;
+ 
+@@ -1243,8 +1247,8 @@ int init_flow_control_params(u32 automatic_flow_control_mode,
+ 
+ static int init_hw_statistics_gathering_mode(int enable_hardware_statistics,
+ 					     int auto_zero_hardware_statistics,
+-					     volatile u32 *upsmr_register,
+-					     volatile u16 *uescr_register)
++					     u32 __iomem *upsmr_register,
++					     u16 __iomem *uescr_register)
+ {
+ 	u32 upsmr_value = 0;
+ 	u16 uescr_value = 0;
+@@ -1270,12 +1274,12 @@ static int init_hw_statistics_gathering_mode(int enable_hardware_statistics,
+ static int init_firmware_statistics_gathering_mode(int
+ 		enable_tx_firmware_statistics,
+ 		int enable_rx_firmware_statistics,
+-		volatile u32 *tx_rmon_base_ptr,
++		u32 __iomem *tx_rmon_base_ptr,
+ 		u32 tx_firmware_statistics_structure_address,
+-		volatile u32 *rx_rmon_base_ptr,
++		u32 __iomem *rx_rmon_base_ptr,
+ 		u32 rx_firmware_statistics_structure_address,
+-		volatile u16 *temoder_register,
+-		volatile u32 *remoder_register)
++		u16 __iomem *temoder_register,
++		u32 __iomem *remoder_register)
+ {
+ 	/* Note: this function does not check if */
+ 	/* the parameters it receives are NULL   */
+@@ -1307,8 +1311,8 @@ static int init_mac_station_addr_regs(u8 address_byte_0,
+ 				      u8 address_byte_3,
+ 				      u8 address_byte_4,
+ 				      u8 address_byte_5,
+-				      volatile u32 *macstnaddr1_register,
+-				      volatile u32 *macstnaddr2_register)
++				      u32 __iomem *macstnaddr1_register,
++				      u32 __iomem *macstnaddr2_register)
+ {
+ 	u32 value = 0;
+ 
+@@ -1344,7 +1348,7 @@ static int init_mac_station_addr_regs(u8 address_byte_0,
+ }
+ 
+ static int init_check_frame_length_mode(int length_check,
+-					volatile u32 *maccfg2_register)
++					u32 __iomem *maccfg2_register)
+ {
+ 	u32 value = 0;
+ 
+@@ -1360,7 +1364,7 @@ static int init_check_frame_length_mode(int length_check,
+ }
+ 
+ static int init_preamble_length(u8 preamble_length,
+-				volatile u32 *maccfg2_register)
++				u32 __iomem *maccfg2_register)
+ {
+ 	u32 value = 0;
+ 
+@@ -1376,7 +1380,7 @@ static int init_preamble_length(u8 preamble_length,
+ 
+ static int init_rx_parameters(int reject_broadcast,
+ 			      int receive_short_frames,
+-			      int promiscuous, volatile u32 *upsmr_register)
++			      int promiscuous, u32 __iomem *upsmr_register)
+ {
+ 	u32 value = 0;
+ 
+@@ -1403,7 +1407,7 @@ static int init_rx_parameters(int reject_broadcast,
+ }
+ 
+ static int init_max_rx_buff_len(u16 max_rx_buf_len,
+-				volatile u16 *mrblr_register)
++				u16 __iomem *mrblr_register)
+ {
+ 	/* max_rx_buf_len value must be a multiple of 128 */
+ 	if ((max_rx_buf_len == 0)
+@@ -1415,8 +1419,8 @@ static int init_max_rx_buff_len(u16 max_rx_buf_len,
+ }
+ 
+ static int init_min_frame_len(u16 min_frame_length,
+-			      volatile u16 *minflr_register,
+-			      volatile u16 *mrblr_register)
++			      u16 __iomem *minflr_register,
++			      u16 __iomem *mrblr_register)
+ {
+ 	u16 mrblr_value = 0;
+ 
+@@ -1431,8 +1435,8 @@ static int init_min_frame_len(u16 min_frame_length,
+ static int adjust_enet_interface(struct ucc_geth_private *ugeth)
+ {
+ 	struct ucc_geth_info *ug_info;
+-	struct ucc_geth *ug_regs;
+-	struct ucc_fast *uf_regs;
++	struct ucc_geth __iomem *ug_regs;
++	struct ucc_fast __iomem *uf_regs;
+ 	int ret_val;
+ 	u32 upsmr, maccfg2, tbiBaseAddress;
+ 	u16 value;
+@@ -1517,8 +1521,8 @@ static int adjust_enet_interface(struct ucc_geth_private *ugeth)
+ static void adjust_link(struct net_device *dev)
+ {
+ 	struct ucc_geth_private *ugeth = netdev_priv(dev);
+-	struct ucc_geth *ug_regs;
+-	struct ucc_fast *uf_regs;
++	struct ucc_geth __iomem *ug_regs;
++	struct ucc_fast __iomem *uf_regs;
+ 	struct phy_device *phydev = ugeth->phydev;
+ 	unsigned long flags;
+ 	int new_state = 0;
+@@ -1678,9 +1682,9 @@ static int ugeth_graceful_stop_rx(struct ucc_geth_private * ugeth)
+ 	uccf = ugeth->uccf;
+ 
+ 	/* Clear acknowledge bit */
+-	temp = ugeth->p_rx_glbl_pram->rxgstpack;
++	temp = in_8(&ugeth->p_rx_glbl_pram->rxgstpack);
+ 	temp &= ~GRACEFUL_STOP_ACKNOWLEDGE_RX;
+-	ugeth->p_rx_glbl_pram->rxgstpack = temp;
++	out_8(&ugeth->p_rx_glbl_pram->rxgstpack, temp);
+ 
+ 	/* Keep issuing command and checking acknowledge bit until
+ 	it is asserted, according to spec */
+@@ -1692,7 +1696,7 @@ static int ugeth_graceful_stop_rx(struct ucc_geth_private * ugeth)
+ 		qe_issue_cmd(QE_GRACEFUL_STOP_RX, cecr_subblock,
+ 			     QE_CR_PROTOCOL_ETHERNET, 0);
+ 
+-		temp = ugeth->p_rx_glbl_pram->rxgstpack;
++		temp = in_8(&ugeth->p_rx_glbl_pram->rxgstpack);
+ 	} while (!(temp & GRACEFUL_STOP_ACKNOWLEDGE_RX));
+ 
+ 	uccf->stopped_rx = 1;
+@@ -1991,19 +1995,20 @@ static int ugeth_82xx_filtering_clear_all_addr_in_hash(struct ucc_geth_private *
+ 						       enum enet_addr_type
+ 						       enet_addr_type)
+ {
+-	struct ucc_geth_82xx_address_filtering_pram *p_82xx_addr_filt;
++	struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt;
+ 	struct ucc_fast_private *uccf;
+ 	enum comm_dir comm_dir;
+ 	struct list_head *p_lh;
+ 	u16 i, num;
+-	u32 *addr_h, *addr_l;
++	u32 __iomem *addr_h;
++	u32 __iomem *addr_l;
+ 	u8 *p_counter;
+ 
+ 	uccf = ugeth->uccf;
+ 
+ 	p_82xx_addr_filt =
+-	    (struct ucc_geth_82xx_address_filtering_pram *) ugeth->p_rx_glbl_pram->
+-	    addressfiltering;
++	    (struct ucc_geth_82xx_address_filtering_pram __iomem *)
++	    ugeth->p_rx_glbl_pram->addressfiltering;
+ 
+ 	if (enet_addr_type == ENET_ADDR_TYPE_GROUP) {
+ 		addr_h = &(p_82xx_addr_filt->gaddr_h);
+@@ -2079,7 +2084,7 @@ static int ugeth_82xx_filtering_clear_addr_in_paddr(struct ucc_geth_private *uge
+ static void ucc_geth_memclean(struct ucc_geth_private *ugeth)
+ {
+ 	u16 i, j;
+-	u8 *bd;
++	u8 __iomem *bd;
+ 
+ 	if (!ugeth)
+ 		return;
+@@ -2154,8 +2159,8 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth)
+ 		for (j = 0; j < ugeth->ug_info->bdRingLenTx[i]; j++) {
+ 			if (ugeth->tx_skbuff[i][j]) {
+ 				dma_unmap_single(NULL,
+-						 ((struct qe_bd *)bd)->buf,
+-						 (in_be32((u32 *)bd) &
++						 in_be32(&((struct qe_bd __iomem *)bd)->buf),
++						 (in_be32((u32 __iomem *)bd) &
+ 						  BD_LENGTH_MASK),
+ 						 DMA_TO_DEVICE);
+ 				dev_kfree_skb_any(ugeth->tx_skbuff[i][j]);
+@@ -2182,7 +2187,7 @@ static void ucc_geth_memclean(struct ucc_geth_private *ugeth)
+ 			for (j = 0; j < ugeth->ug_info->bdRingLenRx[i]; j++) {
+ 				if (ugeth->rx_skbuff[i][j]) {
+ 					dma_unmap_single(NULL,
+-						((struct qe_bd *)bd)->buf,
++						in_be32(&((struct qe_bd __iomem *)bd)->buf),
+ 						ugeth->ug_info->
+ 						uf_info.max_rx_buf_length +
+ 						UCC_GETH_RX_DATA_BUF_ALIGNMENT,
+@@ -2218,8 +2223,8 @@ static void ucc_geth_set_multi(struct net_device *dev)
+ {
+ 	struct ucc_geth_private *ugeth;
+ 	struct dev_mc_list *dmi;
+-	struct ucc_fast *uf_regs;
+-	struct ucc_geth_82xx_address_filtering_pram *p_82xx_addr_filt;
++	struct ucc_fast __iomem *uf_regs;
++	struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt;
+ 	int i;
+ 
+ 	ugeth = netdev_priv(dev);
+@@ -2228,14 +2233,14 @@ static void ucc_geth_set_multi(struct net_device *dev)
+ 
+ 	if (dev->flags & IFF_PROMISC) {
+ 
+-		uf_regs->upsmr |= UPSMR_PRO;
++		out_be32(&uf_regs->upsmr, in_be32(&uf_regs->upsmr) | UPSMR_PRO);
+ 
+ 	} else {
+ 
+-		uf_regs->upsmr &= ~UPSMR_PRO;
++		out_be32(&uf_regs->upsmr, in_be32(&uf_regs->upsmr)&~UPSMR_PRO);
+ 
+ 		p_82xx_addr_filt =
+-		    (struct ucc_geth_82xx_address_filtering_pram *) ugeth->
++		    (struct ucc_geth_82xx_address_filtering_pram __iomem *) ugeth->
+ 		    p_rx_glbl_pram->addressfiltering;
+ 
+ 		if (dev->flags & IFF_ALLMULTI) {
+@@ -2270,7 +2275,7 @@ static void ucc_geth_set_multi(struct net_device *dev)
+ 
+ static void ucc_geth_stop(struct ucc_geth_private *ugeth)
+ {
+-	struct ucc_geth *ug_regs = ugeth->ug_regs;
++	struct ucc_geth __iomem *ug_regs = ugeth->ug_regs;
+ 	struct phy_device *phydev = ugeth->phydev;
+ 	u32 tempval;
+ 
+@@ -2419,20 +2424,20 @@ static int ucc_struct_init(struct ucc_geth_private *ugeth)
+ 		return -ENOMEM;
+ 	}
+ 
+-	ugeth->ug_regs = (struct ucc_geth *) ioremap(uf_info->regs, sizeof(struct ucc_geth));
++	ugeth->ug_regs = (struct ucc_geth __iomem *) ioremap(uf_info->regs, sizeof(struct ucc_geth));
+ 
+ 	return 0;
+ }
+ 
+ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ {
+-	struct ucc_geth_82xx_address_filtering_pram *p_82xx_addr_filt;
+-	struct ucc_geth_init_pram *p_init_enet_pram;
++	struct ucc_geth_82xx_address_filtering_pram __iomem *p_82xx_addr_filt;
++	struct ucc_geth_init_pram __iomem *p_init_enet_pram;
+ 	struct ucc_fast_private *uccf;
+ 	struct ucc_geth_info *ug_info;
+ 	struct ucc_fast_info *uf_info;
+-	struct ucc_fast *uf_regs;
+-	struct ucc_geth *ug_regs;
++	struct ucc_fast __iomem *uf_regs;
++	struct ucc_geth __iomem *ug_regs;
+ 	int ret_val = -EINVAL;
+ 	u32 remoder = UCC_GETH_REMODER_INIT;
+ 	u32 init_enet_pram_offset, cecr_subblock, command, maccfg1;
+@@ -2440,7 +2445,8 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 	u16 temoder = UCC_GETH_TEMODER_INIT;
+ 	u16 test;
+ 	u8 function_code = 0;
+-	u8 *bd, *endOfRing;
++	u8 __iomem *bd;
++	u8 __iomem *endOfRing;
+ 	u8 numThreadsRxNumerical, numThreadsTxNumerical;
+ 
+ 	ugeth_vdbg("%s: IN", __FUNCTION__);
+@@ -2602,11 +2608,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 			if (UCC_GETH_TX_BD_RING_ALIGNMENT > 4)
+ 				align = UCC_GETH_TX_BD_RING_ALIGNMENT;
+ 			ugeth->tx_bd_ring_offset[j] =
+-				kmalloc((u32) (length + align), GFP_KERNEL);
++				(u32) kmalloc((u32) (length + align), GFP_KERNEL);
+ 
+ 			if (ugeth->tx_bd_ring_offset[j] != 0)
+ 				ugeth->p_tx_bd_ring[j] =
+-					(void*)((ugeth->tx_bd_ring_offset[j] +
++					(u8 __iomem *)((ugeth->tx_bd_ring_offset[j] +
+ 					align) & ~(align - 1));
+ 		} else if (uf_info->bd_mem_part == MEM_PART_MURAM) {
+ 			ugeth->tx_bd_ring_offset[j] =
+@@ -2614,7 +2620,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 					   UCC_GETH_TX_BD_RING_ALIGNMENT);
+ 			if (!IS_ERR_VALUE(ugeth->tx_bd_ring_offset[j]))
+ 				ugeth->p_tx_bd_ring[j] =
+-				    (u8 *) qe_muram_addr(ugeth->
++				    (u8 __iomem *) qe_muram_addr(ugeth->
+ 							 tx_bd_ring_offset[j]);
+ 		}
+ 		if (!ugeth->p_tx_bd_ring[j]) {
+@@ -2626,8 +2632,8 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 			return -ENOMEM;
+ 		}
+ 		/* Zero unused end of bd ring, according to spec */
+-		memset(ugeth->p_tx_bd_ring[j] +
+-		       ug_info->bdRingLenTx[j] * sizeof(struct qe_bd), 0,
++		memset_io((void __iomem *)(ugeth->p_tx_bd_ring[j] +
++		       ug_info->bdRingLenTx[j] * sizeof(struct qe_bd)), 0,
+ 		       length - ug_info->bdRingLenTx[j] * sizeof(struct qe_bd));
+ 	}
+ 
+@@ -2639,10 +2645,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 			if (UCC_GETH_RX_BD_RING_ALIGNMENT > 4)
+ 				align = UCC_GETH_RX_BD_RING_ALIGNMENT;
+ 			ugeth->rx_bd_ring_offset[j] =
+-				kmalloc((u32) (length + align), GFP_KERNEL);
++				(u32) kmalloc((u32) (length + align), GFP_KERNEL);
+ 			if (ugeth->rx_bd_ring_offset[j] != 0)
+ 				ugeth->p_rx_bd_ring[j] =
+-					(void*)((ugeth->rx_bd_ring_offset[j] +
++					(u8 __iomem *)((ugeth->rx_bd_ring_offset[j] +
+ 					align) & ~(align - 1));
+ 		} else if (uf_info->bd_mem_part == MEM_PART_MURAM) {
+ 			ugeth->rx_bd_ring_offset[j] =
+@@ -2650,7 +2656,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 					   UCC_GETH_RX_BD_RING_ALIGNMENT);
+ 			if (!IS_ERR_VALUE(ugeth->rx_bd_ring_offset[j]))
+ 				ugeth->p_rx_bd_ring[j] =
+-				    (u8 *) qe_muram_addr(ugeth->
++				    (u8 __iomem *) qe_muram_addr(ugeth->
+ 							 rx_bd_ring_offset[j]);
+ 		}
+ 		if (!ugeth->p_rx_bd_ring[j]) {
+@@ -2685,14 +2691,14 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		bd = ugeth->confBd[j] = ugeth->txBd[j] = ugeth->p_tx_bd_ring[j];
+ 		for (i = 0; i < ug_info->bdRingLenTx[j]; i++) {
+ 			/* clear bd buffer */
+-			out_be32(&((struct qe_bd *)bd)->buf, 0);
++			out_be32(&((struct qe_bd __iomem *)bd)->buf, 0);
+ 			/* set bd status and length */
+-			out_be32((u32 *)bd, 0);
++			out_be32((u32 __iomem *)bd, 0);
+ 			bd += sizeof(struct qe_bd);
+ 		}
+ 		bd -= sizeof(struct qe_bd);
+ 		/* set bd status and length */
+-		out_be32((u32 *)bd, T_W);	/* for last BD set Wrap bit */
++		out_be32((u32 __iomem *)bd, T_W); /* for last BD set Wrap bit */
+ 	}
+ 
+ 	/* Init Rx bds */
+@@ -2717,14 +2723,14 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		bd = ugeth->rxBd[j] = ugeth->p_rx_bd_ring[j];
+ 		for (i = 0; i < ug_info->bdRingLenRx[j]; i++) {
+ 			/* set bd status and length */
+-			out_be32((u32 *)bd, R_I);
++			out_be32((u32 __iomem *)bd, R_I);
+ 			/* clear bd buffer */
+-			out_be32(&((struct qe_bd *)bd)->buf, 0);
++			out_be32(&((struct qe_bd __iomem *)bd)->buf, 0);
+ 			bd += sizeof(struct qe_bd);
+ 		}
+ 		bd -= sizeof(struct qe_bd);
+ 		/* set bd status and length */
+-		out_be32((u32 *)bd, R_W); /* for last BD set Wrap bit */
++		out_be32((u32 __iomem *)bd, R_W); /* for last BD set Wrap bit */
+ 	}
+ 
+ 	/*
+@@ -2744,10 +2750,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		return -ENOMEM;
+ 	}
+ 	ugeth->p_tx_glbl_pram =
+-	    (struct ucc_geth_tx_global_pram *) qe_muram_addr(ugeth->
++	    (struct ucc_geth_tx_global_pram __iomem *) qe_muram_addr(ugeth->
+ 							tx_glbl_pram_offset);
+ 	/* Zero out p_tx_glbl_pram */
+-	memset(ugeth->p_tx_glbl_pram, 0, sizeof(struct ucc_geth_tx_global_pram));
++	memset_io((void __iomem *)ugeth->p_tx_glbl_pram, 0, sizeof(struct ucc_geth_tx_global_pram));
+ 
+ 	/* Fill global PRAM */
+ 
+@@ -2768,7 +2774,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 	}
+ 
+ 	ugeth->p_thread_data_tx =
+-	    (struct ucc_geth_thread_data_tx *) qe_muram_addr(ugeth->
++	    (struct ucc_geth_thread_data_tx __iomem *) qe_muram_addr(ugeth->
+ 							thread_dat_tx_offset);
+ 	out_be32(&ugeth->p_tx_glbl_pram->tqptr, ugeth->thread_dat_tx_offset);
+ 
+@@ -2779,7 +2785,8 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 
+ 	/* iphoffset */
+ 	for (i = 0; i < TX_IP_OFFSET_ENTRY_MAX; i++)
+-		ugeth->p_tx_glbl_pram->iphoffset[i] = ug_info->iphoffset[i];
++		out_8(&ugeth->p_tx_glbl_pram->iphoffset[i],
++				ug_info->iphoffset[i]);
+ 
+ 	/* SQPTR */
+ 	/* Size varies with number of Tx queues */
+@@ -2797,7 +2804,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 	}
+ 
+ 	ugeth->p_send_q_mem_reg =
+-	    (struct ucc_geth_send_queue_mem_region *) qe_muram_addr(ugeth->
++	    (struct ucc_geth_send_queue_mem_region __iomem *) qe_muram_addr(ugeth->
+ 			send_q_mem_reg_offset);
+ 	out_be32(&ugeth->p_tx_glbl_pram->sqptr, ugeth->send_q_mem_reg_offset);
+ 
+@@ -2841,25 +2848,26 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		}
+ 
+ 		ugeth->p_scheduler =
+-		    (struct ucc_geth_scheduler *) qe_muram_addr(ugeth->
++		    (struct ucc_geth_scheduler __iomem *) qe_muram_addr(ugeth->
+ 							   scheduler_offset);
+ 		out_be32(&ugeth->p_tx_glbl_pram->schedulerbasepointer,
+ 			 ugeth->scheduler_offset);
+ 		/* Zero out p_scheduler */
+-		memset(ugeth->p_scheduler, 0, sizeof(struct ucc_geth_scheduler));
++		memset_io((void __iomem *)ugeth->p_scheduler, 0, sizeof(struct ucc_geth_scheduler));
+ 
+ 		/* Set values in scheduler */
+ 		out_be32(&ugeth->p_scheduler->mblinterval,
+ 			 ug_info->mblinterval);
+ 		out_be16(&ugeth->p_scheduler->nortsrbytetime,
+ 			 ug_info->nortsrbytetime);
+-		ugeth->p_scheduler->fracsiz = ug_info->fracsiz;
+-		ugeth->p_scheduler->strictpriorityq = ug_info->strictpriorityq;
+-		ugeth->p_scheduler->txasap = ug_info->txasap;
+-		ugeth->p_scheduler->extrabw = ug_info->extrabw;
++		out_8(&ugeth->p_scheduler->fracsiz, ug_info->fracsiz);
++		out_8(&ugeth->p_scheduler->strictpriorityq,
++				ug_info->strictpriorityq);
++		out_8(&ugeth->p_scheduler->txasap, ug_info->txasap);
++		out_8(&ugeth->p_scheduler->extrabw, ug_info->extrabw);
+ 		for (i = 0; i < NUM_TX_QUEUES; i++)
+-			ugeth->p_scheduler->weightfactor[i] =
+-			    ug_info->weightfactor[i];
++			out_8(&ugeth->p_scheduler->weightfactor[i],
++			    ug_info->weightfactor[i]);
+ 
+ 		/* Set pointers to cpucount registers in scheduler */
+ 		ugeth->p_cpucount[0] = &(ugeth->p_scheduler->cpucount0);
+@@ -2890,10 +2898,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 			return -ENOMEM;
+ 		}
+ 		ugeth->p_tx_fw_statistics_pram =
+-		    (struct ucc_geth_tx_firmware_statistics_pram *)
++		    (struct ucc_geth_tx_firmware_statistics_pram __iomem *)
+ 		    qe_muram_addr(ugeth->tx_fw_statistics_pram_offset);
+ 		/* Zero out p_tx_fw_statistics_pram */
+-		memset(ugeth->p_tx_fw_statistics_pram,
++		memset_io((void __iomem *)ugeth->p_tx_fw_statistics_pram,
+ 		       0, sizeof(struct ucc_geth_tx_firmware_statistics_pram));
+ 	}
+ 
+@@ -2930,10 +2938,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		return -ENOMEM;
+ 	}
+ 	ugeth->p_rx_glbl_pram =
+-	    (struct ucc_geth_rx_global_pram *) qe_muram_addr(ugeth->
++	    (struct ucc_geth_rx_global_pram __iomem *) qe_muram_addr(ugeth->
+ 							rx_glbl_pram_offset);
+ 	/* Zero out p_rx_glbl_pram */
+-	memset(ugeth->p_rx_glbl_pram, 0, sizeof(struct ucc_geth_rx_global_pram));
++	memset_io((void __iomem *)ugeth->p_rx_glbl_pram, 0, sizeof(struct ucc_geth_rx_global_pram));
+ 
+ 	/* Fill global PRAM */
+ 
+@@ -2953,7 +2961,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 	}
+ 
+ 	ugeth->p_thread_data_rx =
+-	    (struct ucc_geth_thread_data_rx *) qe_muram_addr(ugeth->
++	    (struct ucc_geth_thread_data_rx __iomem *) qe_muram_addr(ugeth->
+ 							thread_dat_rx_offset);
+ 	out_be32(&ugeth->p_rx_glbl_pram->rqptr, ugeth->thread_dat_rx_offset);
+ 
+@@ -2976,10 +2984,10 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 			return -ENOMEM;
+ 		}
+ 		ugeth->p_rx_fw_statistics_pram =
+-		    (struct ucc_geth_rx_firmware_statistics_pram *)
++		    (struct ucc_geth_rx_firmware_statistics_pram __iomem *)
+ 		    qe_muram_addr(ugeth->rx_fw_statistics_pram_offset);
+ 		/* Zero out p_rx_fw_statistics_pram */
+-		memset(ugeth->p_rx_fw_statistics_pram, 0,
++		memset_io((void __iomem *)ugeth->p_rx_fw_statistics_pram, 0,
+ 		       sizeof(struct ucc_geth_rx_firmware_statistics_pram));
+ 	}
+ 
+@@ -3000,7 +3008,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 	}
+ 
+ 	ugeth->p_rx_irq_coalescing_tbl =
+-	    (struct ucc_geth_rx_interrupt_coalescing_table *)
++	    (struct ucc_geth_rx_interrupt_coalescing_table __iomem *)
+ 	    qe_muram_addr(ugeth->rx_irq_coalescing_tbl_offset);
+ 	out_be32(&ugeth->p_rx_glbl_pram->intcoalescingptr,
+ 		 ugeth->rx_irq_coalescing_tbl_offset);
+@@ -3069,11 +3077,11 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 	}
+ 
+ 	ugeth->p_rx_bd_qs_tbl =
+-	    (struct ucc_geth_rx_bd_queues_entry *) qe_muram_addr(ugeth->
++	    (struct ucc_geth_rx_bd_queues_entry __iomem *) qe_muram_addr(ugeth->
+ 				    rx_bd_qs_tbl_offset);
+ 	out_be32(&ugeth->p_rx_glbl_pram->rbdqptr, ugeth->rx_bd_qs_tbl_offset);
+ 	/* Zero out p_rx_bd_qs_tbl */
+-	memset(ugeth->p_rx_bd_qs_tbl,
++	memset_io((void __iomem *)ugeth->p_rx_bd_qs_tbl,
+ 	       0,
+ 	       ug_info->numQueuesRx * (sizeof(struct ucc_geth_rx_bd_queues_entry) +
+ 				       sizeof(struct ucc_geth_rx_prefetched_bds)));
+@@ -3133,7 +3141,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		&ugeth->p_rx_glbl_pram->remoder);
+ 
+ 	/* function code register */
+-	ugeth->p_rx_glbl_pram->rstate = function_code;
++	out_8(&ugeth->p_rx_glbl_pram->rstate, function_code);
+ 
+ 	/* initialize extended filtering */
+ 	if (ug_info->rxExtendedFiltering) {
+@@ -3160,7 +3168,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		}
+ 
+ 		ugeth->p_exf_glbl_param =
+-		    (struct ucc_geth_exf_global_pram *) qe_muram_addr(ugeth->
++		    (struct ucc_geth_exf_global_pram __iomem *) qe_muram_addr(ugeth->
+ 				 exf_glbl_param_offset);
+ 		out_be32(&ugeth->p_rx_glbl_pram->exfGlobalParam,
+ 			 ugeth->exf_glbl_param_offset);
+@@ -3175,7 +3183,7 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 			ugeth_82xx_filtering_clear_addr_in_paddr(ugeth, (u8) j);
+ 
+ 		p_82xx_addr_filt =
+-		    (struct ucc_geth_82xx_address_filtering_pram *) ugeth->
++		    (struct ucc_geth_82xx_address_filtering_pram __iomem *) ugeth->
+ 		    p_rx_glbl_pram->addressfiltering;
+ 
+ 		ugeth_82xx_filtering_clear_all_addr_in_hash(ugeth,
+@@ -3307,17 +3315,21 @@ static int ucc_geth_startup(struct ucc_geth_private *ugeth)
+ 		return -ENOMEM;
+ 	}
+ 	p_init_enet_pram =
+-	    (struct ucc_geth_init_pram *) qe_muram_addr(init_enet_pram_offset);
++	    (struct ucc_geth_init_pram __iomem *) qe_muram_addr(init_enet_pram_offset);
+ 
+ 	/* Copy shadow InitEnet command parameter structure into PRAM */
+-	p_init_enet_pram->resinit1 = ugeth->p_init_enet_param_shadow->resinit1;
+-	p_init_enet_pram->resinit2 = ugeth->p_init_enet_param_shadow->resinit2;
+-	p_init_enet_pram->resinit3 = ugeth->p_init_enet_param_shadow->resinit3;
+-	p_init_enet_pram->resinit4 = ugeth->p_init_enet_param_shadow->resinit4;
++	out_8(&p_init_enet_pram->resinit1,
++			ugeth->p_init_enet_param_shadow->resinit1);
++	out_8(&p_init_enet_pram->resinit2,
++			ugeth->p_init_enet_param_shadow->resinit2);
++	out_8(&p_init_enet_pram->resinit3,
++			ugeth->p_init_enet_param_shadow->resinit3);
++	out_8(&p_init_enet_pram->resinit4,
++			ugeth->p_init_enet_param_shadow->resinit4);
+ 	out_be16(&p_init_enet_pram->resinit5,
+ 		 ugeth->p_init_enet_param_shadow->resinit5);
+-	p_init_enet_pram->largestexternallookupkeysize =
+-	    ugeth->p_init_enet_param_shadow->largestexternallookupkeysize;
++	out_8(&p_init_enet_pram->largestexternallookupkeysize,
++	    ugeth->p_init_enet_param_shadow->largestexternallookupkeysize);
+ 	out_be32(&p_init_enet_pram->rgftgfrxglobal,
+ 		 ugeth->p_init_enet_param_shadow->rgftgfrxglobal);
+ 	for (i = 0; i < ENET_INIT_PARAM_MAX_ENTRIES_RX; i++)
+@@ -3371,7 +3383,7 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ #ifdef CONFIG_UGETH_TX_ON_DEMAND
+ 	struct ucc_fast_private *uccf;
+ #endif
+-	u8 *bd;			/* BD pointer */
++	u8 __iomem *bd;			/* BD pointer */
+ 	u32 bd_status;
+ 	u8 txQ = 0;
+ 
+@@ -3383,7 +3395,7 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 
+ 	/* Start from the next BD that should be filled */
+ 	bd = ugeth->txBd[txQ];
+-	bd_status = in_be32((u32 *)bd);
++	bd_status = in_be32((u32 __iomem *)bd);
+ 	/* Save the skb pointer so we can free it later */
+ 	ugeth->tx_skbuff[txQ][ugeth->skb_curtx[txQ]] = skb;
+ 
+@@ -3393,7 +3405,7 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	     1) & TX_RING_MOD_MASK(ugeth->ug_info->bdRingLenTx[txQ]);
+ 
+ 	/* set up the buffer descriptor */
+-	out_be32(&((struct qe_bd *)bd)->buf,
++	out_be32(&((struct qe_bd __iomem *)bd)->buf,
+ 		      dma_map_single(NULL, skb->data, skb->len, DMA_TO_DEVICE));
+ 
+ 	/* printk(KERN_DEBUG"skb->data is 0x%x\n",skb->data); */
+@@ -3401,7 +3413,7 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ 	bd_status = (bd_status & T_W) | T_R | T_I | T_L | skb->len;
+ 
+ 	/* set bd status and length */
+-	out_be32((u32 *)bd, bd_status);
++	out_be32((u32 __iomem *)bd, bd_status);
+ 
+ 	dev->trans_start = jiffies;
+ 
+@@ -3441,7 +3453,7 @@ static int ucc_geth_start_xmit(struct sk_buff *skb, struct net_device *dev)
+ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit)
+ {
+ 	struct sk_buff *skb;
+-	u8 *bd;
++	u8 __iomem *bd;
+ 	u16 length, howmany = 0;
+ 	u32 bd_status;
+ 	u8 *bdBuffer;
+@@ -3454,11 +3466,11 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit
+ 	/* collect received buffers */
+ 	bd = ugeth->rxBd[rxQ];
+ 
+-	bd_status = in_be32((u32 *)bd);
++	bd_status = in_be32((u32 __iomem *)bd);
+ 
+ 	/* while there are received buffers and BD is full (~R_E) */
+ 	while (!((bd_status & (R_E)) || (--rx_work_limit < 0))) {
+-		bdBuffer = (u8 *) in_be32(&((struct qe_bd *)bd)->buf);
++		bdBuffer = (u8 *) in_be32(&((struct qe_bd __iomem *)bd)->buf);
+ 		length = (u16) ((bd_status & BD_LENGTH_MASK) - 4);
+ 		skb = ugeth->rx_skbuff[rxQ][ugeth->skb_currx[rxQ]];
+ 
+@@ -3516,7 +3528,7 @@ static int ucc_geth_rx(struct ucc_geth_private *ugeth, u8 rxQ, int rx_work_limit
+ 		else
+ 			bd += sizeof(struct qe_bd);
+ 
+-		bd_status = in_be32((u32 *)bd);
++		bd_status = in_be32((u32 __iomem *)bd);
+ 	}
+ 
+ 	ugeth->rxBd[rxQ] = bd;
+@@ -3527,11 +3539,11 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
+ {
+ 	/* Start from the next BD that should be filled */
+ 	struct ucc_geth_private *ugeth = netdev_priv(dev);
+-	u8 *bd;			/* BD pointer */
++	u8 __iomem *bd;		/* BD pointer */
+ 	u32 bd_status;
+ 
+ 	bd = ugeth->confBd[txQ];
+-	bd_status = in_be32((u32 *)bd);
++	bd_status = in_be32((u32 __iomem *)bd);
+ 
+ 	/* Normal processing. */
+ 	while ((bd_status & T_R) == 0) {
+@@ -3561,7 +3573,7 @@ static int ucc_geth_tx(struct net_device *dev, u8 txQ)
+ 			bd += sizeof(struct qe_bd);
+ 		else
+ 			bd = ugeth->p_tx_bd_ring[txQ];
+-		bd_status = in_be32((u32 *)bd);
++		bd_status = in_be32((u32 __iomem *)bd);
+ 	}
+ 	ugeth->confBd[txQ] = bd;
+ 	return 0;
+@@ -3910,7 +3922,7 @@ static int ucc_geth_probe(struct of_device* ofdev, const struct of_device_id *ma
+ 			return -EINVAL;
+ 		}
+ 	} else {
+-		prop = of_get_property(np, "rx-clock", NULL);
++		prop = of_get_property(np, "tx-clock", NULL);
+ 		if (!prop) {
+ 			printk(KERN_ERR
+ 				"ucc_geth: mising tx-clock-name property\n");
+diff --git a/drivers/net/ucc_geth.h b/drivers/net/ucc_geth.h
+index 9f8b758..abc0e22 100644
+--- a/drivers/net/ucc_geth.h
++++ b/drivers/net/ucc_geth.h
+@@ -700,8 +700,8 @@ struct ucc_geth_82xx_address_filtering_pram {
+ 	u32 iaddr_l;		/* individual address filter, low */
+ 	u32 gaddr_h;		/* group address filter, high */
+ 	u32 gaddr_l;		/* group address filter, low */
+-	struct ucc_geth_82xx_enet_address taddr;
+-	struct ucc_geth_82xx_enet_address paddr[NUM_OF_PADDRS];
++	struct ucc_geth_82xx_enet_address __iomem taddr;
++	struct ucc_geth_82xx_enet_address __iomem paddr[NUM_OF_PADDRS];
+ 	u8 res0[0x40 - 0x38];
+ } __attribute__ ((packed));
+ 
+@@ -1186,40 +1186,40 @@ struct ucc_geth_private {
+ 	struct ucc_fast_private *uccf;
+ 	struct net_device *dev;
+ 	struct napi_struct napi;
+-	struct ucc_geth *ug_regs;
++	struct ucc_geth __iomem *ug_regs;
+ 	struct ucc_geth_init_pram *p_init_enet_param_shadow;
+-	struct ucc_geth_exf_global_pram *p_exf_glbl_param;
++	struct ucc_geth_exf_global_pram __iomem *p_exf_glbl_param;
+ 	u32 exf_glbl_param_offset;
+-	struct ucc_geth_rx_global_pram *p_rx_glbl_pram;
++	struct ucc_geth_rx_global_pram __iomem *p_rx_glbl_pram;
+ 	u32 rx_glbl_pram_offset;
+-	struct ucc_geth_tx_global_pram *p_tx_glbl_pram;
++	struct ucc_geth_tx_global_pram __iomem *p_tx_glbl_pram;
+ 	u32 tx_glbl_pram_offset;
+-	struct ucc_geth_send_queue_mem_region *p_send_q_mem_reg;
++	struct ucc_geth_send_queue_mem_region __iomem *p_send_q_mem_reg;
+ 	u32 send_q_mem_reg_offset;
+-	struct ucc_geth_thread_data_tx *p_thread_data_tx;
++	struct ucc_geth_thread_data_tx __iomem *p_thread_data_tx;
+ 	u32 thread_dat_tx_offset;
+-	struct ucc_geth_thread_data_rx *p_thread_data_rx;
++	struct ucc_geth_thread_data_rx __iomem *p_thread_data_rx;
+ 	u32 thread_dat_rx_offset;
+-	struct ucc_geth_scheduler *p_scheduler;
++	struct ucc_geth_scheduler __iomem *p_scheduler;
+ 	u32 scheduler_offset;
+-	struct ucc_geth_tx_firmware_statistics_pram *p_tx_fw_statistics_pram;
++	struct ucc_geth_tx_firmware_statistics_pram __iomem *p_tx_fw_statistics_pram;
+ 	u32 tx_fw_statistics_pram_offset;
+-	struct ucc_geth_rx_firmware_statistics_pram *p_rx_fw_statistics_pram;
++	struct ucc_geth_rx_firmware_statistics_pram __iomem *p_rx_fw_statistics_pram;
+ 	u32 rx_fw_statistics_pram_offset;
+-	struct ucc_geth_rx_interrupt_coalescing_table *p_rx_irq_coalescing_tbl;
++	struct ucc_geth_rx_interrupt_coalescing_table __iomem *p_rx_irq_coalescing_tbl;
+ 	u32 rx_irq_coalescing_tbl_offset;
+-	struct ucc_geth_rx_bd_queues_entry *p_rx_bd_qs_tbl;
++	struct ucc_geth_rx_bd_queues_entry __iomem *p_rx_bd_qs_tbl;
+ 	u32 rx_bd_qs_tbl_offset;
+-	u8 *p_tx_bd_ring[NUM_TX_QUEUES];
++	u8 __iomem *p_tx_bd_ring[NUM_TX_QUEUES];
+ 	u32 tx_bd_ring_offset[NUM_TX_QUEUES];
+-	u8 *p_rx_bd_ring[NUM_RX_QUEUES];
++	u8 __iomem *p_rx_bd_ring[NUM_RX_QUEUES];
+ 	u32 rx_bd_ring_offset[NUM_RX_QUEUES];
+-	u8 *confBd[NUM_TX_QUEUES];
+-	u8 *txBd[NUM_TX_QUEUES];
+-	u8 *rxBd[NUM_RX_QUEUES];
++	u8 __iomem *confBd[NUM_TX_QUEUES];
++	u8 __iomem *txBd[NUM_TX_QUEUES];
++	u8 __iomem *rxBd[NUM_RX_QUEUES];
+ 	int badFrame[NUM_RX_QUEUES];
+ 	u16 cpucount[NUM_TX_QUEUES];
+-	volatile u16 *p_cpucount[NUM_TX_QUEUES];
++	u16 __iomem *p_cpucount[NUM_TX_QUEUES];
+ 	int indAddrRegUsed[NUM_OF_PADDRS];
+ 	u8 paddr[NUM_OF_PADDRS][ENET_NUM_OCTETS_PER_ADDRESS];	/* ethernet address */
+ 	u8 numGroupAddrInHash;
+@@ -1251,4 +1251,12 @@ struct ucc_geth_private {
+ 	int oldlink;
+ };
+ 
++void uec_set_ethtool_ops(struct net_device *netdev);
++int init_flow_control_params(u32 automatic_flow_control_mode,
++		int rx_flow_control_enable, int tx_flow_control_enable,
++		u16 pause_period, u16 extension_field,
++		u32 __iomem *upsmr_register, u32 __iomem *uempr_register,
++		u32 __iomem *maccfg1_register);
++
++
+ #endif				/* __UCC_GETH_H__ */
+diff --git a/drivers/net/ucc_geth_ethtool.c b/drivers/net/ucc_geth_ethtool.c
+index 9a9622c..299b7f1 100644
+--- a/drivers/net/ucc_geth_ethtool.c
++++ b/drivers/net/ucc_geth_ethtool.c
+@@ -108,12 +108,6 @@ static char rx_fw_stat_gstrings[][ETH_GSTRING_LEN] = {
+ #define UEC_TX_FW_STATS_LEN ARRAY_SIZE(tx_fw_stat_gstrings)
+ #define UEC_RX_FW_STATS_LEN ARRAY_SIZE(rx_fw_stat_gstrings)
+ 
+-extern int init_flow_control_params(u32 automatic_flow_control_mode,
+-		int rx_flow_control_enable,
+-		int tx_flow_control_enable, u16 pause_period,
+-		u16 extension_field, volatile u32 *upsmr_register,
+-		volatile u32 *uempr_register, volatile u32 *maccfg1_register);
+-
+ static int
+ uec_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd)
+ {
+diff --git a/drivers/net/ucc_geth_mii.c b/drivers/net/ucc_geth_mii.c
+index 2af4907..9404747 100644
+--- a/drivers/net/ucc_geth_mii.c
++++ b/drivers/net/ucc_geth_mii.c
+@@ -104,7 +104,7 @@ int uec_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+ }
+ 
+ /* Reset the MIIM registers, and wait for the bus to free */
+-int uec_mdio_reset(struct mii_bus *bus)
++static int uec_mdio_reset(struct mii_bus *bus)
+ {
+ 	struct ucc_mii_mng __iomem *regs = (void __iomem *)bus->priv;
+ 	unsigned int timeout = PHY_INIT_TIMEOUT;
+@@ -240,7 +240,7 @@ reg_map_fail:
+ 	return err;
+ }
+ 
+-int uec_mdio_remove(struct of_device *ofdev)
++static int uec_mdio_remove(struct of_device *ofdev)
+ {
+ 	struct device *device = &ofdev->dev;
+ 	struct mii_bus *bus = dev_get_drvdata(device);
+diff --git a/drivers/net/usb/asix.c b/drivers/net/usb/asix.c
+index 6f245cf..dc6f097 100644
+--- a/drivers/net/usb/asix.c
++++ b/drivers/net/usb/asix.c
+@@ -1381,6 +1381,10 @@ static const struct usb_device_id	products [] = {
+ 	USB_DEVICE (0x0411, 0x003d),
+ 	.driver_info =  (unsigned long) &ax8817x_info,
+ }, {
++	// Buffalo LUA-U2-GT 10/100/1000
++	USB_DEVICE (0x0411, 0x006e),
++	.driver_info =  (unsigned long) &ax88178_info,
++}, {
+ 	// Sitecom LN-029 "USB 2.0 10/100 Ethernet adapter"
+ 	USB_DEVICE (0x6189, 0x182d),
+ 	.driver_info =  (unsigned long) &ax8817x_info,
+diff --git a/drivers/net/wan/lapbether.c b/drivers/net/wan/lapbether.c
+index b5860b9..24fd613 100644
+--- a/drivers/net/wan/lapbether.c
++++ b/drivers/net/wan/lapbether.c
+@@ -459,6 +459,7 @@ static void __exit lapbeth_cleanup_driver(void)
+ 	list_for_each_safe(entry, tmp, &lapbeth_devices) {
+ 		lapbeth = list_entry(entry, struct lapbethdev, node);
+ 
++		dev_put(lapbeth->ethdev);
+ 		unregister_netdevice(lapbeth->axdev);
+ 	}
+ 	rtnl_unlock();
+diff --git a/drivers/net/wireless/iwlwifi/Kconfig b/drivers/net/wireless/iwlwifi/Kconfig
+index d5b7a76..62fb89d 100644
+--- a/drivers/net/wireless/iwlwifi/Kconfig
++++ b/drivers/net/wireless/iwlwifi/Kconfig
+@@ -1,6 +1,5 @@
+ config IWLWIFI
+-	bool
+-	default n
++	tristate
+ 
+ config IWLCORE
+ 	tristate "Intel Wireless Wifi Core"
+diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
+index 4a55bf3..3706ce7 100644
+--- a/drivers/pci/probe.c
++++ b/drivers/pci/probe.c
+@@ -842,13 +842,25 @@ static void set_pcie_port_type(struct pci_dev *pdev)
+  * reading the dword at 0x100 which must either be 0 or a valid extended
+  * capability header.
+  */
+-int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
++int pci_cfg_space_size_ext(struct pci_dev *dev)
+ {
+-	int pos;
+ 	u32 status;
+ 
+-	if (!check_exp_pcix)
+-		goto skip;
++	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
++		goto fail;
++	if (status == 0xffffffff)
++		goto fail;
++
++	return PCI_CFG_SPACE_EXP_SIZE;
++
++ fail:
++	return PCI_CFG_SPACE_SIZE;
++}
++
++int pci_cfg_space_size(struct pci_dev *dev)
++{
++	int pos;
++	u32 status;
+ 
+ 	pos = pci_find_capability(dev, PCI_CAP_ID_EXP);
+ 	if (!pos) {
+@@ -861,23 +873,12 @@ int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix)
+ 			goto fail;
+ 	}
+ 
+- skip:
+-	if (pci_read_config_dword(dev, 256, &status) != PCIBIOS_SUCCESSFUL)
+-		goto fail;
+-	if (status == 0xffffffff)
+-		goto fail;
+-
+-	return PCI_CFG_SPACE_EXP_SIZE;
++	return pci_cfg_space_size_ext(dev);
+ 
+  fail:
+ 	return PCI_CFG_SPACE_SIZE;
+ }
+ 
+-int pci_cfg_space_size(struct pci_dev *dev)
+-{
+-	return pci_cfg_space_size_ext(dev, 1);
+-}
+-
+ static void pci_release_bus_bridge_dev(struct device *dev)
+ {
+ 	kfree(dev);
+diff --git a/drivers/rtc/rtc-ds1511.c b/drivers/rtc/rtc-ds1511.c
+index a83a40b..0f0d27d 100644
+--- a/drivers/rtc/rtc-ds1511.c
++++ b/drivers/rtc/rtc-ds1511.c
+@@ -184,7 +184,7 @@ ds1511_wdog_disable(void)
+ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
+ {
+ 	u8 mon, day, dow, hrs, min, sec, yrs, cen;
+-	unsigned int flags;
++	unsigned long flags;
+ 
+ 	/*
+ 	 * won't have to change this for a while
+@@ -247,7 +247,7 @@ static int ds1511_rtc_set_time(struct device *dev, struct rtc_time *rtc_tm)
+ static int ds1511_rtc_read_time(struct device *dev, struct rtc_time *rtc_tm)
+ {
+ 	unsigned int century;
+-	unsigned int flags;
++	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&ds1511_lock, flags);
+ 	rtc_disable_update();
+diff --git a/drivers/s390/char/tty3270.c b/drivers/s390/char/tty3270.c
+index c1f2ade..5043150 100644
+--- a/drivers/s390/char/tty3270.c
++++ b/drivers/s390/char/tty3270.c
+@@ -965,8 +965,7 @@ tty3270_write_room(struct tty_struct *tty)
+  * Insert character into the screen at the current position with the
+  * current color and highlight. This function does NOT do cursor movement.
+  */
+-static int
+-tty3270_put_character(struct tty3270 *tp, char ch)
++static void tty3270_put_character(struct tty3270 *tp, char ch)
+ {
+ 	struct tty3270_line *line;
+ 	struct tty3270_cell *cell;
+@@ -986,7 +985,6 @@ tty3270_put_character(struct tty3270 *tp, char ch)
+ 	cell->character = tp->view.ascebc[(unsigned int) ch];
+ 	cell->highlight = tp->highlight;
+ 	cell->f_color = tp->f_color;
+-	return 1;
+ }
+ 
+ /*
+@@ -1612,16 +1610,15 @@ tty3270_write(struct tty_struct * tty,
+ /*
+  * Put single characters to the ttys character buffer
+  */
+-static void
+-tty3270_put_char(struct tty_struct *tty, unsigned char ch)
++static int tty3270_put_char(struct tty_struct *tty, unsigned char ch)
+ {
+ 	struct tty3270 *tp;
+ 
+ 	tp = tty->driver_data;
+-	if (!tp)
+-		return;
+-	if (tp->char_count < TTY3270_CHAR_BUF_SIZE)
+-		tp->char_buf[tp->char_count++] = ch;
++	if (!tp || tp->char_count >= TTY3270_CHAR_BUF_SIZE)
++		return 0;
++	tp->char_buf[tp->char_count++] = ch;
++	return 1;
+ }
+ 
+ /*
+diff --git a/drivers/s390/cio/blacklist.c b/drivers/s390/cio/blacklist.c
+index 40ef948..9c21b8f 100644
+--- a/drivers/s390/cio/blacklist.c
++++ b/drivers/s390/cio/blacklist.c
+@@ -19,6 +19,7 @@
+ 
+ #include <asm/cio.h>
+ #include <asm/uaccess.h>
++#include <asm/cio.h>
+ 
+ #include "blacklist.h"
+ #include "cio.h"
+@@ -43,164 +44,169 @@ typedef enum {add, free} range_action;
+  * Function: blacklist_range
+  * (Un-)blacklist the devices from-to
+  */
+-static void
+-blacklist_range (range_action action, unsigned int from, unsigned int to,
+-		 unsigned int ssid)
++static int blacklist_range(range_action action, unsigned int from_ssid,
++			   unsigned int to_ssid, unsigned int from,
++			   unsigned int to, int msgtrigger)
+ {
+-	if (!to)
+-		to = from;
+-
+-	if (from > to || to > __MAX_SUBCHANNEL || ssid > __MAX_SSID) {
+-		printk (KERN_WARNING "cio: Invalid blacklist range "
+-			"0.%x.%04x to 0.%x.%04x, skipping\n",
+-			ssid, from, ssid, to);
+-		return;
++	if ((from_ssid > to_ssid) || ((from_ssid == to_ssid) && (from > to))) {
++		if (msgtrigger)
++			printk(KERN_WARNING "cio: Invalid cio_ignore range "
++			       "0.%x.%04x-0.%x.%04x\n", from_ssid, from,
++			       to_ssid, to);
++		return 1;
+ 	}
+-	for (; from <= to; from++) {
++
++	while ((from_ssid < to_ssid) || ((from_ssid == to_ssid) &&
++	       (from <= to))) {
+ 		if (action == add)
+-			set_bit (from, bl_dev[ssid]);
++			set_bit(from, bl_dev[from_ssid]);
+ 		else
+-			clear_bit (from, bl_dev[ssid]);
++			clear_bit(from, bl_dev[from_ssid]);
++		from++;
++		if (from > __MAX_SUBCHANNEL) {
++			from_ssid++;
++			from = 0;
++		}
+ 	}
++
++	return 0;
+ }
+ 
+-/*
+- * Function: blacklist_busid
+- * Get devno/busid from given string.
+- * Shamelessly grabbed from dasd_devmap.c.
+- */
+-static int
+-blacklist_busid(char **str, int *id0, int *ssid, int *devno)
++static int pure_hex(char **cp, unsigned int *val, int min_digit,
++		    int max_digit, int max_val)
+ {
+-	int val, old_style;
+-	char *sav;
++	int diff;
++	unsigned int value;
+ 
+-	sav = *str;
++	diff = 0;
++	*val = 0;
+ 
+-	/* check for leading '0x' */
+-	old_style = 0;
+-	if ((*str)[0] == '0' && (*str)[1] == 'x') {
+-		*str += 2;
+-		old_style = 1;
+-	}
+-	if (!isxdigit((*str)[0]))	/* We require at least one hex digit */
+-		goto confused;
+-	val = simple_strtoul(*str, str, 16);
+-	if (old_style || (*str)[0] != '.') {
+-		*id0 = *ssid = 0;
+-		if (val < 0 || val > 0xffff)
+-			goto confused;
+-		*devno = val;
+-		if ((*str)[0] != ',' && (*str)[0] != '-' &&
+-		    (*str)[0] != '\n' && (*str)[0] != '\0')
+-			goto confused;
+-		return 0;
++	while (isxdigit(**cp) && (diff <= max_digit)) {
++
++		if (isdigit(**cp))
++			value = **cp - '0';
++		else
++			value = tolower(**cp) - 'a' + 10;
++		*val = *val * 16 + value;
++		(*cp)++;
++		diff++;
+ 	}
+-	/* New style x.y.z busid */
+-	if (val < 0 || val > 0xff)
+-		goto confused;
+-	*id0 = val;
+-	(*str)++;
+-	if (!isxdigit((*str)[0]))	/* We require at least one hex digit */
+-		goto confused;
+-	val = simple_strtoul(*str, str, 16);
+-	if (val < 0 || val > 0xff || (*str)++[0] != '.')
+-		goto confused;
+-	*ssid = val;
+-	if (!isxdigit((*str)[0]))	/* We require at least one hex digit */
+-		goto confused;
+-	val = simple_strtoul(*str, str, 16);
+-	if (val < 0 || val > 0xffff)
+-		goto confused;
+-	*devno = val;
+-	if ((*str)[0] != ',' && (*str)[0] != '-' &&
+-	    (*str)[0] != '\n' && (*str)[0] != '\0')
+-		goto confused;
++
++	if ((diff < min_digit) || (diff > max_digit) || (*val > max_val))
++		return 1;
++
+ 	return 0;
+-confused:
+-	strsep(str, ",\n");
+-	printk(KERN_WARNING "cio: Invalid cio_ignore parameter '%s'\n", sav);
+-	return 1;
+ }
+ 
+-static int
+-blacklist_parse_parameters (char *str, range_action action)
++static int parse_busid(char *str, int *cssid, int *ssid, int *devno,
++		       int msgtrigger)
+ {
+-	int from, to, from_id0, to_id0, from_ssid, to_ssid;
+-
+-	while (*str != 0 && *str != '\n') {
+-		range_action ra = action;
+-		while(*str == ',')
+-			str++;
+-		if (*str == '!') {
+-			ra = !action;
+-			++str;
++	char *str_work;
++	int val, rc, ret;
++
++	rc = 1;
++
++	if (*str == '\0')
++		goto out;
++
++	/* old style */
++	str_work = str;
++	val = simple_strtoul(str, &str_work, 16);
++
++	if (*str_work == '\0') {
++		if (val <= __MAX_SUBCHANNEL) {
++			*devno = val;
++			*ssid = 0;
++			*cssid = 0;
++			rc = 0;
+ 		}
++		goto out;
++	}
+ 
+-		/*
+-		 * Since we have to parse the proc commands and the
+-		 * kernel arguments we have to check four cases
+-		 */
+-		if (strncmp(str,"all,",4) == 0 || strcmp(str,"all") == 0 ||
+-		    strncmp(str,"all\n",4) == 0 || strncmp(str,"all ",4) == 0) {
+-			int j;
+-
+-			str += 3;
+-			for (j=0; j <= __MAX_SSID; j++)
+-				blacklist_range(ra, 0, __MAX_SUBCHANNEL, j);
+-		} else {
+-			int rc;
++	/* new style */
++	str_work = str;
++	ret = pure_hex(&str_work, cssid, 1, 2, __MAX_CSSID);
++	if (ret || (str_work[0] != '.'))
++		goto out;
++	str_work++;
++	ret = pure_hex(&str_work, ssid, 1, 1, __MAX_SSID);
++	if (ret || (str_work[0] != '.'))
++		goto out;
++	str_work++;
++	ret = pure_hex(&str_work, devno, 4, 4, __MAX_SUBCHANNEL);
++	if (ret || (str_work[0] != '\0'))
++		goto out;
++
++	rc = 0;
++out:
++	if (rc && msgtrigger)
++		printk(KERN_WARNING "cio: Invalid cio_ignore device '%s'\n",
++		       str);
++
++	return rc;
++}
+ 
+-			rc = blacklist_busid(&str, &from_id0,
+-					     &from_ssid, &from);
+-			if (rc)
+-				continue;
+-			to = from;
+-			to_id0 = from_id0;
+-			to_ssid = from_ssid;
+-			if (*str == '-') {
+-				str++;
+-				rc = blacklist_busid(&str, &to_id0,
+-						     &to_ssid, &to);
+-				if (rc)
+-					continue;
+-			}
+-			if (*str == '-') {
+-				printk(KERN_WARNING "cio: invalid cio_ignore "
+-					"parameter '%s'\n",
+-					strsep(&str, ",\n"));
+-				continue;
+-			}
+-			if ((from_id0 != to_id0) ||
+-			    (from_ssid != to_ssid)) {
+-				printk(KERN_WARNING "cio: invalid cio_ignore "
+-				       "range %x.%x.%04x-%x.%x.%04x\n",
+-				       from_id0, from_ssid, from,
+-				       to_id0, to_ssid, to);
+-				continue;
++static int blacklist_parse_parameters(char *str, range_action action,
++				      int msgtrigger)
++{
++	int from_cssid, to_cssid, from_ssid, to_ssid, from, to;
++	int rc, totalrc;
++	char *parm;
++	range_action ra;
++
++	totalrc = 0;
++
++	while ((parm = strsep(&str, ","))) {
++		rc = 0;
++		ra = action;
++		if (*parm == '!') {
++			if (ra == add)
++				ra = free;
++			else
++				ra = add;
++			parm++;
++		}
++		if (strcmp(parm, "all") == 0) {
++			from_cssid = 0;
++			from_ssid = 0;
++			from = 0;
++			to_cssid = __MAX_CSSID;
++			to_ssid = __MAX_SSID;
++			to = __MAX_SUBCHANNEL;
++		} else {
++			rc = parse_busid(strsep(&parm, "-"), &from_cssid,
++					 &from_ssid, &from, msgtrigger);
++			if (!rc) {
++				if (parm != NULL)
++					rc = parse_busid(parm, &to_cssid,
++							 &to_ssid, &to,
++							 msgtrigger);
++				else {
++					to_cssid = from_cssid;
++					to_ssid = from_ssid;
++					to = from;
++				}
+ 			}
+-			blacklist_range (ra, from, to, to_ssid);
+ 		}
++		if (!rc) {
++			rc = blacklist_range(ra, from_ssid, to_ssid, from, to,
++					     msgtrigger);
++			if (rc)
++				totalrc = 1;
++		} else
++			totalrc = 1;
+ 	}
+-	return 1;
++
++	return totalrc;
+ }
+ 
+-/* Parsing the commandline for blacklist parameters, e.g. to blacklist
+- * bus ids 0.0.1234, 0.0.1235 and 0.0.1236, you could use any of:
+- * - cio_ignore=1234-1236
+- * - cio_ignore=0x1234-0x1235,1236
+- * - cio_ignore=0x1234,1235-1236
+- * - cio_ignore=1236 cio_ignore=1234-0x1236
+- * - cio_ignore=1234 cio_ignore=1236 cio_ignore=0x1235
+- * - cio_ignore=0.0.1234-0.0.1236
+- * - cio_ignore=0.0.1234,0x1235,1236
+- * - ...
+- */
+ static int __init
+ blacklist_setup (char *str)
+ {
+ 	CIO_MSG_EVENT(6, "Reading blacklist parameters\n");
+-	return blacklist_parse_parameters (str, add);
++	if (blacklist_parse_parameters(str, add, 1))
++		return 0;
++	return 1;
+ }
+ 
+ __setup ("cio_ignore=", blacklist_setup);
+@@ -224,27 +230,23 @@ is_blacklisted (int ssid, int devno)
+  * Function: blacklist_parse_proc_parameters
+  * parse the stuff which is piped to /proc/cio_ignore
+  */
+-static void
+-blacklist_parse_proc_parameters (char *buf)
++static int blacklist_parse_proc_parameters(char *buf)
+ {
+-	if (strncmp (buf, "free ", 5) == 0) {
+-		blacklist_parse_parameters (buf + 5, free);
+-	} else if (strncmp (buf, "add ", 4) == 0) {
+-		/* 
+-		 * We don't need to check for known devices since
+-		 * css_probe_device will handle this correctly. 
+-		 */
+-		blacklist_parse_parameters (buf + 4, add);
+-	} else {
+-		printk (KERN_WARNING "cio: cio_ignore: Parse error; \n"
+-			KERN_WARNING "try using 'free all|<devno-range>,"
+-				     "<devno-range>,...'\n"
+-			KERN_WARNING "or 'add <devno-range>,"
+-				     "<devno-range>,...'\n");
+-		return;
+-	}
++	int rc;
++	char *parm;
++
++	parm = strsep(&buf, " ");
++
++	if (strcmp("free", parm) == 0)
++		rc = blacklist_parse_parameters(buf, free, 0);
++	else if (strcmp("add", parm) == 0)
++		rc = blacklist_parse_parameters(buf, add, 0);
++	else
++		return 1;
+ 
+ 	css_schedule_reprobe();
++
++	return rc;
+ }
+ 
+ /* Iterator struct for all devices. */
+@@ -328,6 +330,8 @@ cio_ignore_write(struct file *file, const char __user *user_buf,
+ 		 size_t user_len, loff_t *offset)
+ {
+ 	char *buf;
++	size_t i;
++	ssize_t rc, ret;
+ 
+ 	if (*offset)
+ 		return -EINVAL;
+@@ -336,16 +340,27 @@ cio_ignore_write(struct file *file, const char __user *user_buf,
+ 	buf = vmalloc (user_len + 1); /* maybe better use the stack? */
+ 	if (buf == NULL)
+ 		return -ENOMEM;
++	memset(buf, 0, user_len + 1);
++
+ 	if (strncpy_from_user (buf, user_buf, user_len) < 0) {
+-		vfree (buf);
+-		return -EFAULT;
++		rc = -EFAULT;
++		goto out_free;
+ 	}
+-	buf[user_len] = '\0';
+ 
+-	blacklist_parse_proc_parameters (buf);
++	i = user_len - 1;
++	while ((i >= 0) && (isspace(buf[i]) || (buf[i] == 0))) {
++		buf[i] = '\0';
++		i--;
++	}
++	ret = blacklist_parse_proc_parameters(buf);
++	if (ret)
++		rc = -EINVAL;
++	else
++		rc = user_len;
+ 
++out_free:
+ 	vfree (buf);
+-	return user_len;
++	return rc;
+ }
+ 
+ static const struct seq_operations cio_ignore_proc_seq_ops = {
+diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
+index 08a5781..82c6a2d 100644
+--- a/drivers/s390/cio/cio.c
++++ b/drivers/s390/cio/cio.c
+@@ -39,23 +39,6 @@ debug_info_t *cio_debug_msg_id;
+ debug_info_t *cio_debug_trace_id;
+ debug_info_t *cio_debug_crw_id;
+ 
+-int cio_show_msg;
+-
+-static int __init
+-cio_setup (char *parm)
+-{
+-	if (!strcmp (parm, "yes"))
+-		cio_show_msg = 1;
+-	else if (!strcmp (parm, "no"))
+-		cio_show_msg = 0;
+-	else
+-		printk(KERN_ERR "cio: cio_setup: "
+-		       "invalid cio_msg parameter '%s'", parm);
+-	return 1;
+-}
+-
+-__setup ("cio_msg=", cio_setup);
+-
+ /*
+  * Function: cio_debug_init
+  * Initializes three debug logs for common I/O:
+@@ -166,7 +149,7 @@ cio_start_handle_notoper(struct subchannel *sch, __u8 lpm)
+ 
+ 	stsch (sch->schid, &sch->schib);
+ 
+-	CIO_MSG_EVENT(0, "cio_start: 'not oper' status for "
++	CIO_MSG_EVENT(2, "cio_start: 'not oper' status for "
+ 		      "subchannel 0.%x.%04x!\n", sch->schid.ssid,
+ 		      sch->schid.sch_no);
+ 	sprintf(dbf_text, "no%s", sch->dev.bus_id);
+@@ -567,10 +550,9 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
+ 	 * ... just being curious we check for non I/O subchannels
+ 	 */
+ 	if (sch->st != 0) {
+-		CIO_DEBUG(KERN_INFO, 0,
+-			  "Subchannel 0.%x.%04x reports "
+-			  "non-I/O subchannel type %04X\n",
+-			  sch->schid.ssid, sch->schid.sch_no, sch->st);
++		CIO_MSG_EVENT(4, "Subchannel 0.%x.%04x reports "
++			      "non-I/O subchannel type %04X\n",
++			      sch->schid.ssid, sch->schid.sch_no, sch->st);
+ 		/* We stop here for non-io subchannels. */
+ 		err = sch->st;
+ 		goto out;
+@@ -588,7 +570,7 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
+ 		 * This device must not be known to Linux. So we simply
+ 		 * say that there is no device and return ENODEV.
+ 		 */
+-		CIO_MSG_EVENT(4, "Blacklisted device detected "
++		CIO_MSG_EVENT(6, "Blacklisted device detected "
+ 			      "at devno %04X, subchannel set %x\n",
+ 			      sch->schib.pmcw.dev, sch->schid.ssid);
+ 		err = -ENODEV;
+@@ -601,12 +583,11 @@ cio_validate_subchannel (struct subchannel *sch, struct subchannel_id schid)
+ 	sch->lpm = sch->schib.pmcw.pam & sch->opm;
+ 	sch->isc = 3;
+ 
+-	CIO_DEBUG(KERN_INFO, 0,
+-		  "Detected device %04x on subchannel 0.%x.%04X"
+-		  " - PIM = %02X, PAM = %02X, POM = %02X\n",
+-		  sch->schib.pmcw.dev, sch->schid.ssid,
+-		  sch->schid.sch_no, sch->schib.pmcw.pim,
+-		  sch->schib.pmcw.pam, sch->schib.pmcw.pom);
++	CIO_MSG_EVENT(6, "Detected device %04x on subchannel 0.%x.%04X "
++		      "- PIM = %02X, PAM = %02X, POM = %02X\n",
++		      sch->schib.pmcw.dev, sch->schid.ssid,
++		      sch->schid.sch_no, sch->schib.pmcw.pim,
++		      sch->schib.pmcw.pam, sch->schib.pmcw.pom);
+ 
+ 	/*
+ 	 * We now have to initially ...
+diff --git a/drivers/s390/cio/cio.h b/drivers/s390/cio/cio.h
+index 3c75412..6e933ae 100644
+--- a/drivers/s390/cio/cio.h
++++ b/drivers/s390/cio/cio.h
+@@ -118,6 +118,4 @@ extern void *cio_get_console_priv(void);
+ #define cio_get_console_priv() NULL
+ #endif
+ 
+-extern int cio_show_msg;
+-
+ #endif
+diff --git a/drivers/s390/cio/cio_debug.h b/drivers/s390/cio/cio_debug.h
+index d7429ef..e64e827 100644
+--- a/drivers/s390/cio/cio_debug.h
++++ b/drivers/s390/cio/cio_debug.h
+@@ -31,10 +31,4 @@ static inline void CIO_HEX_EVENT(int level, void *data, int length)
+ 	}
+ }
+ 
+-#define CIO_DEBUG(printk_level, event_level, msg...) do {	\
+-		if (cio_show_msg)				\
+-			printk(printk_level "cio: " msg);	\
+-		CIO_MSG_EVENT(event_level, msg);		\
+-	} while (0)
+-
+ #endif
+diff --git a/drivers/s390/cio/css.c b/drivers/s390/cio/css.c
+index 595e327..a769565 100644
+--- a/drivers/s390/cio/css.c
++++ b/drivers/s390/cio/css.c
+@@ -570,7 +570,7 @@ static void reprobe_all(struct work_struct *unused)
+ {
+ 	int ret;
+ 
+-	CIO_MSG_EVENT(2, "reprobe start\n");
++	CIO_MSG_EVENT(4, "reprobe start\n");
+ 
+ 	need_reprobe = 0;
+ 	/* Make sure initial subchannel scan is done. */
+@@ -578,7 +578,7 @@ static void reprobe_all(struct work_struct *unused)
+ 		   atomic_read(&ccw_device_init_count) == 0);
+ 	ret = for_each_subchannel_staged(NULL, reprobe_subchannel, NULL);
+ 
+-	CIO_MSG_EVENT(2, "reprobe done (rc=%d, need_reprobe=%d)\n", ret,
++	CIO_MSG_EVENT(4, "reprobe done (rc=%d, need_reprobe=%d)\n", ret,
+ 		      need_reprobe);
+ }
+ 
+diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c
+index abfd601..e22813d 100644
+--- a/drivers/s390/cio/device.c
++++ b/drivers/s390/cio/device.c
+@@ -341,7 +341,7 @@ ccw_device_remove_disconnected(struct ccw_device *cdev)
+ 		rc = device_schedule_callback(&cdev->dev,
+ 					      ccw_device_remove_orphan_cb);
+ 		if (rc)
+-			CIO_MSG_EVENT(2, "Couldn't unregister orphan "
++			CIO_MSG_EVENT(0, "Couldn't unregister orphan "
+ 				      "0.%x.%04x\n",
+ 				      cdev->private->dev_id.ssid,
+ 				      cdev->private->dev_id.devno);
+@@ -351,7 +351,7 @@ ccw_device_remove_disconnected(struct ccw_device *cdev)
+ 	rc = device_schedule_callback(cdev->dev.parent,
+ 				      ccw_device_remove_sch_cb);
+ 	if (rc)
+-		CIO_MSG_EVENT(2, "Couldn't unregister disconnected device "
++		CIO_MSG_EVENT(0, "Couldn't unregister disconnected device "
+ 			      "0.%x.%04x\n",
+ 			      cdev->private->dev_id.ssid,
+ 			      cdev->private->dev_id.devno);
+@@ -397,7 +397,7 @@ int ccw_device_set_offline(struct ccw_device *cdev)
+ 	if (ret == 0)
+ 		wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
+ 	else {
+-		CIO_MSG_EVENT(2, "ccw_device_offline returned %d, "
++		CIO_MSG_EVENT(0, "ccw_device_offline returned %d, "
+ 			      "device 0.%x.%04x\n",
+ 			      ret, cdev->private->dev_id.ssid,
+ 			      cdev->private->dev_id.devno);
+@@ -433,7 +433,7 @@ int ccw_device_set_online(struct ccw_device *cdev)
+ 	if (ret == 0)
+ 		wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
+ 	else {
+-		CIO_MSG_EVENT(2, "ccw_device_online returned %d, "
++		CIO_MSG_EVENT(0, "ccw_device_online returned %d, "
+ 			      "device 0.%x.%04x\n",
+ 			      ret, cdev->private->dev_id.ssid,
+ 			      cdev->private->dev_id.devno);
+@@ -451,7 +451,7 @@ int ccw_device_set_online(struct ccw_device *cdev)
+ 	if (ret == 0)
+ 		wait_event(cdev->private->wait_q, dev_fsm_final_state(cdev));
+ 	else
+-		CIO_MSG_EVENT(2, "ccw_device_offline returned %d, "
++		CIO_MSG_EVENT(0, "ccw_device_offline returned %d, "
+ 			      "device 0.%x.%04x\n",
+ 			      ret, cdev->private->dev_id.ssid,
+ 			      cdev->private->dev_id.devno);
+@@ -803,7 +803,7 @@ static void sch_attach_disconnected_device(struct subchannel *sch,
+ 	other_sch = to_subchannel(get_device(cdev->dev.parent));
+ 	ret = device_move(&cdev->dev, &sch->dev);
+ 	if (ret) {
+-		CIO_MSG_EVENT(2, "Moving disconnected device 0.%x.%04x failed "
++		CIO_MSG_EVENT(0, "Moving disconnected device 0.%x.%04x failed "
+ 			      "(ret=%d)!\n", cdev->private->dev_id.ssid,
+ 			      cdev->private->dev_id.devno, ret);
+ 		put_device(&other_sch->dev);
+@@ -933,7 +933,7 @@ io_subchannel_register(struct work_struct *work)
+ 			ret = device_reprobe(&cdev->dev);
+ 			if (ret)
+ 				/* We can't do much here. */
+-				CIO_MSG_EVENT(2, "device_reprobe() returned"
++				CIO_MSG_EVENT(0, "device_reprobe() returned"
+ 					      " %d for 0.%x.%04x\n", ret,
+ 					      cdev->private->dev_id.ssid,
+ 					      cdev->private->dev_id.devno);
+@@ -1086,7 +1086,7 @@ static void ccw_device_move_to_sch(struct work_struct *work)
+ 	rc = device_move(&cdev->dev, &sch->dev);
+ 	mutex_unlock(&sch->reg_mutex);
+ 	if (rc) {
+-		CIO_MSG_EVENT(2, "Moving device 0.%x.%04x to subchannel "
++		CIO_MSG_EVENT(0, "Moving device 0.%x.%04x to subchannel "
+ 			      "0.%x.%04x failed (ret=%d)!\n",
+ 			      cdev->private->dev_id.ssid,
+ 			      cdev->private->dev_id.devno, sch->schid.ssid,
+@@ -1446,8 +1446,7 @@ ccw_device_remove (struct device *dev)
+ 			wait_event(cdev->private->wait_q,
+ 				   dev_fsm_final_state(cdev));
+ 		else
+-			//FIXME: we can't fail!
+-			CIO_MSG_EVENT(2, "ccw_device_offline returned %d, "
++			CIO_MSG_EVENT(0, "ccw_device_offline returned %d, "
+ 				      "device 0.%x.%04x\n",
+ 				      ret, cdev->private->dev_id.ssid,
+ 				      cdev->private->dev_id.devno);
+@@ -1524,7 +1523,7 @@ static int recovery_check(struct device *dev, void *data)
+ 	spin_lock_irq(cdev->ccwlock);
+ 	switch (cdev->private->state) {
+ 	case DEV_STATE_DISCONNECTED:
+-		CIO_MSG_EVENT(3, "recovery: trigger 0.%x.%04x\n",
++		CIO_MSG_EVENT(4, "recovery: trigger 0.%x.%04x\n",
+ 			      cdev->private->dev_id.ssid,
+ 			      cdev->private->dev_id.devno);
+ 		dev_fsm_event(cdev, DEV_EVENT_VERIFY);
+@@ -1554,7 +1553,7 @@ static void recovery_work_func(struct work_struct *unused)
+ 		}
+ 		spin_unlock_irq(&recovery_lock);
+ 	} else
+-		CIO_MSG_EVENT(2, "recovery: end\n");
++		CIO_MSG_EVENT(4, "recovery: end\n");
+ }
+ 
+ static DECLARE_WORK(recovery_work, recovery_work_func);
+@@ -1572,7 +1571,7 @@ void ccw_device_schedule_recovery(void)
+ {
+ 	unsigned long flags;
+ 
+-	CIO_MSG_EVENT(2, "recovery: schedule\n");
++	CIO_MSG_EVENT(4, "recovery: schedule\n");
+ 	spin_lock_irqsave(&recovery_lock, flags);
+ 	if (!timer_pending(&recovery_timer) || (recovery_phase != 0)) {
+ 		recovery_phase = 0;
+diff --git a/drivers/s390/cio/device_fsm.c b/drivers/s390/cio/device_fsm.c
+index 99403b0..e268d5a 100644
+--- a/drivers/s390/cio/device_fsm.c
++++ b/drivers/s390/cio/device_fsm.c
+@@ -322,10 +322,10 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
+ 	same_dev = 0; /* Keep the compiler quiet... */
+ 	switch (state) {
+ 	case DEV_STATE_NOT_OPER:
+-		CIO_DEBUG(KERN_WARNING, 2,
+-			  "SenseID : unknown device %04x on subchannel "
+-			  "0.%x.%04x\n", cdev->private->dev_id.devno,
+-			  sch->schid.ssid, sch->schid.sch_no);
++		CIO_MSG_EVENT(2, "SenseID : unknown device %04x on "
++			      "subchannel 0.%x.%04x\n",
++			      cdev->private->dev_id.devno,
++			      sch->schid.ssid, sch->schid.sch_no);
+ 		break;
+ 	case DEV_STATE_OFFLINE:
+ 		if (cdev->private->state == DEV_STATE_DISCONNECTED_SENSE_ID) {
+@@ -348,20 +348,19 @@ ccw_device_recog_done(struct ccw_device *cdev, int state)
+ 			return;
+ 		}
+ 		/* Issue device info message. */
+-		CIO_DEBUG(KERN_INFO, 2,
+-			  "SenseID : device 0.%x.%04x reports: "
+-			  "CU  Type/Mod = %04X/%02X, Dev Type/Mod = "
+-			  "%04X/%02X\n",
+-			  cdev->private->dev_id.ssid,
+-			  cdev->private->dev_id.devno,
+-			  cdev->id.cu_type, cdev->id.cu_model,
+-			  cdev->id.dev_type, cdev->id.dev_model);
++		CIO_MSG_EVENT(4, "SenseID : device 0.%x.%04x reports: "
++			      "CU  Type/Mod = %04X/%02X, Dev Type/Mod = "
++			      "%04X/%02X\n",
++			      cdev->private->dev_id.ssid,
++			      cdev->private->dev_id.devno,
++			      cdev->id.cu_type, cdev->id.cu_model,
++			      cdev->id.dev_type, cdev->id.dev_model);
+ 		break;
+ 	case DEV_STATE_BOXED:
+-		CIO_DEBUG(KERN_WARNING, 2,
+-			  "SenseID : boxed device %04x on subchannel "
+-			  "0.%x.%04x\n", cdev->private->dev_id.devno,
+-			  sch->schid.ssid, sch->schid.sch_no);
++		CIO_MSG_EVENT(0, "SenseID : boxed device %04x on "
++			      " subchannel 0.%x.%04x\n",
++			      cdev->private->dev_id.devno,
++			      sch->schid.ssid, sch->schid.sch_no);
+ 		break;
+ 	}
+ 	cdev->private->state = state;
+@@ -443,9 +442,8 @@ ccw_device_done(struct ccw_device *cdev, int state)
+ 
+ 
+ 	if (state == DEV_STATE_BOXED)
+-		CIO_DEBUG(KERN_WARNING, 2,
+-			  "Boxed device %04x on subchannel %04x\n",
+-			  cdev->private->dev_id.devno, sch->schid.sch_no);
++		CIO_MSG_EVENT(0, "Boxed device %04x on subchannel %04x\n",
++			      cdev->private->dev_id.devno, sch->schid.sch_no);
+ 
+ 	if (cdev->private->flags.donotify) {
+ 		cdev->private->flags.donotify = 0;
+@@ -900,7 +898,7 @@ ccw_device_w4sense(struct ccw_device *cdev, enum dev_event dev_event)
+ 			/* Basic sense hasn't started. Try again. */
+ 			ccw_device_do_sense(cdev, irb);
+ 		else {
+-			CIO_MSG_EVENT(2, "Huh? 0.%x.%04x: unsolicited "
++			CIO_MSG_EVENT(0, "0.%x.%04x: unsolicited "
+ 				      "interrupt during w4sense...\n",
+ 				      cdev->private->dev_id.ssid,
+ 				      cdev->private->dev_id.devno);
+@@ -1169,8 +1167,10 @@ ccw_device_nop(struct ccw_device *cdev, enum dev_event dev_event)
+ static void
+ ccw_device_bug(struct ccw_device *cdev, enum dev_event dev_event)
+ {
+-	CIO_MSG_EVENT(0, "dev_jumptable[%i][%i] == NULL\n",
+-		      cdev->private->state, dev_event);
++	CIO_MSG_EVENT(0, "Internal state [%i][%i] not handled for device "
++		      "0.%x.%04x\n", cdev->private->state, dev_event,
++		      cdev->private->dev_id.ssid,
++		      cdev->private->dev_id.devno);
+ 	BUG();
+ }
+ 
+diff --git a/drivers/s390/cio/device_id.c b/drivers/s390/cio/device_id.c
+index dc4d87f..cba7020 100644
+--- a/drivers/s390/cio/device_id.c
++++ b/drivers/s390/cio/device_id.c
+@@ -214,7 +214,7 @@ ccw_device_check_sense_id(struct ccw_device *cdev)
+ 		 *     sense id information. So, for intervention required,
+ 		 *     we use the "whack it until it talks" strategy...
+ 		 */
+-		CIO_MSG_EVENT(2, "SenseID : device %04x on Subchannel "
++		CIO_MSG_EVENT(0, "SenseID : device %04x on Subchannel "
+ 			      "0.%x.%04x reports cmd reject\n",
+ 			      cdev->private->dev_id.devno, sch->schid.ssid,
+ 			      sch->schid.sch_no);
+@@ -239,7 +239,7 @@ ccw_device_check_sense_id(struct ccw_device *cdev)
+ 
+ 		lpm = to_io_private(sch)->orb.lpm;
+ 		if ((lpm & sch->schib.pmcw.pim & sch->schib.pmcw.pam) != 0)
+-			CIO_MSG_EVENT(2, "SenseID : path %02X for device %04x "
++			CIO_MSG_EVENT(4, "SenseID : path %02X for device %04x "
+ 				      "on subchannel 0.%x.%04x is "
+ 				      "'not operational'\n", lpm,
+ 				      cdev->private->dev_id.devno,
+diff --git a/drivers/s390/cio/device_pgid.c b/drivers/s390/cio/device_pgid.c
+index c52449a..ba55905 100644
+--- a/drivers/s390/cio/device_pgid.c
++++ b/drivers/s390/cio/device_pgid.c
+@@ -79,7 +79,7 @@ __ccw_device_sense_pgid_start(struct ccw_device *cdev)
+ 			/* ret is 0, -EBUSY, -EACCES or -ENODEV */
+ 			if (ret != -EACCES)
+ 				return ret;
+-			CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel "
++			CIO_MSG_EVENT(3, "SNID - Device %04x on Subchannel "
+ 				      "0.%x.%04x, lpm %02X, became 'not "
+ 				      "operational'\n",
+ 				      cdev->private->dev_id.devno,
+@@ -159,7 +159,7 @@ __ccw_device_check_sense_pgid(struct ccw_device *cdev)
+ 		u8 lpm;
+ 
+ 		lpm = to_io_private(sch)->orb.lpm;
+-		CIO_MSG_EVENT(2, "SNID - Device %04x on Subchannel 0.%x.%04x,"
++		CIO_MSG_EVENT(3, "SNID - Device %04x on Subchannel 0.%x.%04x,"
+ 			      " lpm %02X, became 'not operational'\n",
+ 			      cdev->private->dev_id.devno, sch->schid.ssid,
+ 			      sch->schid.sch_no, lpm);
+@@ -275,7 +275,7 @@ __ccw_device_do_pgid(struct ccw_device *cdev, __u8 func)
+ 			return ret;
+ 	}
+ 	/* PGID command failed on this path. */
+-	CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel "
++	CIO_MSG_EVENT(3, "SPID - Device %04x on Subchannel "
+ 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
+ 		      cdev->private->dev_id.devno, sch->schid.ssid,
+ 		      sch->schid.sch_no, cdev->private->imask);
+@@ -317,7 +317,7 @@ static int __ccw_device_do_nop(struct ccw_device *cdev)
+ 			return ret;
+ 	}
+ 	/* nop command failed on this path. */
+-	CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel "
++	CIO_MSG_EVENT(3, "NOP - Device %04x on Subchannel "
+ 		      "0.%x.%04x, lpm %02X, became 'not operational'\n",
+ 		      cdev->private->dev_id.devno, sch->schid.ssid,
+ 		      sch->schid.sch_no, cdev->private->imask);
+@@ -362,7 +362,7 @@ __ccw_device_check_pgid(struct ccw_device *cdev)
+ 		return -EAGAIN;
+ 	}
+ 	if (irb->scsw.cc == 3) {
+-		CIO_MSG_EVENT(2, "SPID - Device %04x on Subchannel 0.%x.%04x,"
++		CIO_MSG_EVENT(3, "SPID - Device %04x on Subchannel 0.%x.%04x,"
+ 			      " lpm %02X, became 'not operational'\n",
+ 			      cdev->private->dev_id.devno, sch->schid.ssid,
+ 			      sch->schid.sch_no, cdev->private->imask);
+@@ -391,7 +391,7 @@ static int __ccw_device_check_nop(struct ccw_device *cdev)
+ 		return -ETIME;
+ 	}
+ 	if (irb->scsw.cc == 3) {
+-		CIO_MSG_EVENT(2, "NOP - Device %04x on Subchannel 0.%x.%04x,"
++		CIO_MSG_EVENT(3, "NOP - Device %04x on Subchannel 0.%x.%04x,"
+ 			      " lpm %02X, became 'not operational'\n",
+ 			      cdev->private->dev_id.devno, sch->schid.ssid,
+ 			      sch->schid.sch_no, cdev->private->imask);
+diff --git a/drivers/s390/s390mach.c b/drivers/s390/s390mach.c
+index 4d4b542..5080f34 100644
+--- a/drivers/s390/s390mach.c
++++ b/drivers/s390/s390mach.c
+@@ -48,10 +48,11 @@ s390_collect_crw_info(void *param)
+ 	int ccode;
+ 	struct semaphore *sem;
+ 	unsigned int chain;
++	int ignore;
+ 
+ 	sem = (struct semaphore *)param;
+ repeat:
+-	down_interruptible(sem);
++	ignore = down_interruptible(sem);
+ 	chain = 0;
+ 	while (1) {
+ 		if (unlikely(chain > 1)) {
+diff --git a/drivers/sbus/char/bpp.c b/drivers/sbus/char/bpp.c
+index 4fab0c2..b87037e 100644
+--- a/drivers/sbus/char/bpp.c
++++ b/drivers/sbus/char/bpp.c
+@@ -41,7 +41,7 @@
+ #define BPP_DELAY 100
+ 
+ static const unsigned  BPP_MAJOR = LP_MAJOR;
+-static const char* dev_name = "bpp";
++static const char *bpp_dev_name = "bpp";
+ 
+ /* When switching from compatibility to a mode where I can read, try
+    the following mode first. */
+diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
+index 46d7e40..81ccbd7 100644
+--- a/drivers/scsi/Kconfig
++++ b/drivers/scsi/Kconfig
+@@ -1679,6 +1679,7 @@ config MAC_SCSI
+ config SCSI_MAC_ESP
+ 	tristate "Macintosh NCR53c9[46] SCSI"
+ 	depends on MAC && SCSI
++	select SCSI_SPI_ATTRS
+ 	help
+ 	  This is the NCR 53c9x SCSI controller found on most of the 68040
+ 	  based Macintoshes.
+diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
+index 0fb5bf4..8508816 100644
+--- a/drivers/scsi/dpt_i2o.c
++++ b/drivers/scsi/dpt_i2o.c
+@@ -1967,45 +1967,6 @@ cleanup:
+ 	return rcode;
+ }
+ 
+-
+-/*
+- * This routine returns information about the system.  This does not effect
+- * any logic and if the info is wrong - it doesn't matter.
+- */
+-
+-/* Get all the info we can not get from kernel services */
+-static int adpt_system_info(void __user *buffer)
+-{
+-	sysInfo_S si;
+-
+-	memset(&si, 0, sizeof(si));
+-
+-	si.osType = OS_LINUX;
+-	si.osMajorVersion = 0;
+-	si.osMinorVersion = 0;
+-	si.osRevision = 0;
+-	si.busType = SI_PCI_BUS;
+-	si.processorFamily = DPTI_sig.dsProcessorFamily;
+-
+-#if defined __i386__ 
+-	adpt_i386_info(&si);
+-#elif defined (__ia64__)
+-	adpt_ia64_info(&si);
+-#elif defined(__sparc__)
+-	adpt_sparc_info(&si);
+-#elif defined (__alpha__)
+-	adpt_alpha_info(&si);
+-#else
+-	si.processorType = 0xff ;
+-#endif
+-	if(copy_to_user(buffer, &si, sizeof(si))){
+-		printk(KERN_WARNING"dpti: Could not copy buffer TO user\n");
+-		return -EFAULT;
+-	}
+-
+-	return 0;
+-}
+-
+ #if defined __ia64__ 
+ static void adpt_ia64_info(sysInfo_S* si)
+ {
+@@ -2016,7 +1977,6 @@ static void adpt_ia64_info(sysInfo_S* si)
+ }
+ #endif
+ 
+-
+ #if defined __sparc__ 
+ static void adpt_sparc_info(sysInfo_S* si)
+ {
+@@ -2026,7 +1986,6 @@ static void adpt_sparc_info(sysInfo_S* si)
+ 	si->processorType = PROC_ULTRASPARC;
+ }
+ #endif
+-
+ #if defined __alpha__ 
+ static void adpt_alpha_info(sysInfo_S* si)
+ {
+@@ -2038,7 +1997,6 @@ static void adpt_alpha_info(sysInfo_S* si)
+ #endif
+ 
+ #if defined __i386__
+-
+ static void adpt_i386_info(sysInfo_S* si)
+ {
+ 	// This is all the info we need for now
+@@ -2059,9 +2017,45 @@ static void adpt_i386_info(sysInfo_S* si)
+ 		break;
+ 	}
+ }
++#endif
++
++/*
++ * This routine returns information about the system.  This does not effect
++ * any logic and if the info is wrong - it doesn't matter.
++ */
+ 
++/* Get all the info we can not get from kernel services */
++static int adpt_system_info(void __user *buffer)
++{
++	sysInfo_S si;
++
++	memset(&si, 0, sizeof(si));
++
++	si.osType = OS_LINUX;
++	si.osMajorVersion = 0;
++	si.osMinorVersion = 0;
++	si.osRevision = 0;
++	si.busType = SI_PCI_BUS;
++	si.processorFamily = DPTI_sig.dsProcessorFamily;
++
++#if defined __i386__
++	adpt_i386_info(&si);
++#elif defined (__ia64__)
++	adpt_ia64_info(&si);
++#elif defined(__sparc__)
++	adpt_sparc_info(&si);
++#elif defined (__alpha__)
++	adpt_alpha_info(&si);
++#else
++	si.processorType = 0xff ;
+ #endif
++	if (copy_to_user(buffer, &si, sizeof(si))){
++		printk(KERN_WARNING"dpti: Could not copy buffer TO user\n");
++		return -EFAULT;
++	}
+ 
++	return 0;
++}
+ 
+ static int adpt_ioctl(struct inode *inode, struct file *file, uint cmd,
+ 	      ulong arg)
+diff --git a/drivers/scsi/dpti.h b/drivers/scsi/dpti.h
+index 924cd5a..337746d 100644
+--- a/drivers/scsi/dpti.h
++++ b/drivers/scsi/dpti.h
+@@ -316,19 +316,6 @@ static int adpt_close(struct inode *inode, struct file *file);
+ static void adpt_delay(int millisec);
+ #endif
+ 
+-#if defined __ia64__ 
+-static void adpt_ia64_info(sysInfo_S* si);
+-#endif
+-#if defined __sparc__ 
+-static void adpt_sparc_info(sysInfo_S* si);
+-#endif
+-#if defined __alpha__ 
+-static void adpt_sparc_info(sysInfo_S* si);
+-#endif
+-#if defined __i386__
+-static void adpt_i386_info(sysInfo_S* si);
+-#endif
+-
+ #define PRINT_BUFFER_SIZE     512
+ 
+ #define HBA_FLAGS_DBG_FLAGS_MASK         0xffff0000	// Mask for debug flags
+diff --git a/drivers/serial/serial_core.c b/drivers/serial/serial_core.c
+index 1e2b9d8..eab0327 100644
+--- a/drivers/serial/serial_core.c
++++ b/drivers/serial/serial_core.c
+@@ -556,7 +556,7 @@ static int uart_chars_in_buffer(struct tty_struct *tty)
+ static void uart_flush_buffer(struct tty_struct *tty)
+ {
+ 	struct uart_state *state = tty->driver_data;
+-	struct uart_port *port = state->port;
++	struct uart_port *port;
+ 	unsigned long flags;
+ 
+ 	/*
+@@ -568,6 +568,7 @@ static void uart_flush_buffer(struct tty_struct *tty)
+ 		return;
+ 	}
+ 
++	port = state->port;
+ 	pr_debug("uart_flush_buffer(%d) called\n", tty->index);
+ 
+ 	spin_lock_irqsave(&port->lock, flags);
+diff --git a/drivers/usb/host/Kconfig b/drivers/usb/host/Kconfig
+index 33b467a..1ef6df3 100644
+--- a/drivers/usb/host/Kconfig
++++ b/drivers/usb/host/Kconfig
+@@ -129,7 +129,7 @@ config USB_ISP1760_PCI
+ 
+ config USB_ISP1760_OF
+ 	bool "Support for the OF platform bus"
+-	depends on USB_ISP1760_HCD && OF
++	depends on USB_ISP1760_HCD && PPC_OF
+ 	---help---
+ 	  Enables support for the device present on the PowerPC
+ 	  OpenFirmware platform bus.
+diff --git a/drivers/usb/serial/iuu_phoenix.c b/drivers/usb/serial/iuu_phoenix.c
+index 8a21764..a01e987 100644
+--- a/drivers/usb/serial/iuu_phoenix.c
++++ b/drivers/usb/serial/iuu_phoenix.c
+@@ -643,7 +643,7 @@ static void read_buf_callback(struct urb *urb)
+ static int iuu_bulk_write(struct usb_serial_port *port)
+ {
+ 	struct iuu_private *priv = usb_get_serial_port_data(port);
+-	unsigned int flags;
++	unsigned long flags;
+ 	int result;
+ 	int i;
+ 	char *buf_ptr = port->write_urb->transfer_buffer;
+@@ -694,7 +694,7 @@ static void iuu_uart_read_callback(struct urb *urb)
+ {
+ 	struct usb_serial_port *port = urb->context;
+ 	struct iuu_private *priv = usb_get_serial_port_data(port);
+-	unsigned int flags;
++	unsigned long flags;
+ 	int status;
+ 	int error = 0;
+ 	int len = 0;
+@@ -759,7 +759,7 @@ static int iuu_uart_write(struct usb_serial_port *port, const u8 *buf,
+ 			  int count)
+ {
+ 	struct iuu_private *priv = usb_get_serial_port_data(port);
+-	unsigned int flags;
++	unsigned long flags;
+ 	dbg("%s - enter", __func__);
+ 
+ 	if (count > 256)
+diff --git a/drivers/video/bw2.c b/drivers/video/bw2.c
+index 275d9da..79f85dc 100644
+--- a/drivers/video/bw2.c
++++ b/drivers/video/bw2.c
+@@ -329,7 +329,7 @@ static int __devinit bw2_probe(struct of_device *op, const struct of_device_id *
+ 	if (!info->screen_base)
+ 		goto out_unmap_regs;
+ 
+-	bw2_blank(0, info);
++	bw2_blank(FB_BLANK_UNBLANK, info);
+ 
+ 	bw2_init_fix(info, linebytes);
+ 
+diff --git a/drivers/video/cg3.c b/drivers/video/cg3.c
+index 010ea53..e31e26a 100644
+--- a/drivers/video/cg3.c
++++ b/drivers/video/cg3.c
+@@ -398,7 +398,7 @@ static int __devinit cg3_probe(struct of_device *op,
+ 	if (!info->screen_base)
+ 		goto out_unmap_regs;
+ 
+-	cg3_blank(0, info);
++	cg3_blank(FB_BLANK_UNBLANK, info);
+ 
+ 	if (!of_find_property(dp, "width", NULL)) {
+ 		err = cg3_do_default_mode(par);
+diff --git a/drivers/video/cg6.c b/drivers/video/cg6.c
+index fc90db6..8000bcc 100644
+--- a/drivers/video/cg6.c
++++ b/drivers/video/cg6.c
+@@ -767,7 +767,7 @@ static int __devinit cg6_probe(struct of_device *op,
+ 
+ 	cg6_bt_init(par);
+ 	cg6_chip_init(info);
+-	cg6_blank(0, info);
++	cg6_blank(FB_BLANK_UNBLANK, info);
+ 
+ 	if (fb_alloc_cmap(&info->cmap, 256, 0))
+ 		goto out_unmap_regs;
+diff --git a/drivers/video/ffb.c b/drivers/video/ffb.c
+index 93dca3e..0f42a69 100644
+--- a/drivers/video/ffb.c
++++ b/drivers/video/ffb.c
+@@ -987,7 +987,7 @@ static int __devinit ffb_probe(struct of_device *op,
+ 	 * chosen console, it will have video outputs off in
+ 	 * the DAC.
+ 	 */
+-	ffb_blank(0, info);
++	ffb_blank(FB_BLANK_UNBLANK, info);
+ 
+ 	if (fb_alloc_cmap(&info->cmap, 256, 0))
+ 		goto out_unmap_dac;
+diff --git a/drivers/video/leo.c b/drivers/video/leo.c
+index f3160fc..fb12992 100644
+--- a/drivers/video/leo.c
++++ b/drivers/video/leo.c
+@@ -601,7 +601,7 @@ static int __devinit leo_probe(struct of_device *op, const struct of_device_id *
+ 	leo_init_wids(info);
+ 	leo_init_hw(info);
+ 
+-	leo_blank(0, info);
++	leo_blank(FB_BLANK_UNBLANK, info);
+ 
+ 	if (fb_alloc_cmap(&info->cmap, 256, 0))
+ 		goto out_unmap_regs;
+diff --git a/drivers/video/p9100.c b/drivers/video/p9100.c
+index c95874f..676ffb0 100644
+--- a/drivers/video/p9100.c
++++ b/drivers/video/p9100.c
+@@ -295,7 +295,7 @@ static int __devinit p9100_probe(struct of_device *op, const struct of_device_id
+ 	if (!info->screen_base)
+ 		goto out_unmap_regs;
+ 
+-	p9100_blank(0, info);
++	p9100_blank(FB_BLANK_UNBLANK, info);
+ 
+ 	if (fb_alloc_cmap(&info->cmap, 256, 0))
+ 		goto out_unmap_screen;
+diff --git a/drivers/video/tcx.c b/drivers/video/tcx.c
+index a717743..44e8c27 100644
+--- a/drivers/video/tcx.c
++++ b/drivers/video/tcx.c
+@@ -84,7 +84,7 @@ struct tcx_tec {
+ 
+ struct tcx_thc {
+ 	u32 thc_rev;
+-        u32 thc_pad0[511];
++	u32 thc_pad0[511];
+ 	u32 thc_hs;		/* hsync timing */
+ 	u32 thc_hsdvs;
+ 	u32 thc_hd;
+@@ -126,10 +126,10 @@ struct tcx_par {
+ };
+ 
+ /* Reset control plane so that WID is 8-bit plane. */
+-static void __tcx_set_control_plane (struct tcx_par *par)
++static void __tcx_set_control_plane(struct tcx_par *par)
+ {
+ 	u32 __iomem *p, *pend;
+-        
++
+ 	if (par->lowdepth)
+ 		return;
+ 
+@@ -143,8 +143,8 @@ static void __tcx_set_control_plane (struct tcx_par *par)
+ 		sbus_writel(tmp, p);
+ 	}
+ }
+-                                                
+-static void tcx_reset (struct fb_info *info)
++
++static void tcx_reset(struct fb_info *info)
+ {
+ 	struct tcx_par *par = (struct tcx_par *) info->par;
+ 	unsigned long flags;
+@@ -365,7 +365,8 @@ static void tcx_unmap_regs(struct of_device *op, struct fb_info *info,
+ 			   info->screen_base, par->fbsize);
+ }
+ 
+-static int __devinit tcx_init_one(struct of_device *op)
++static int __devinit tcx_probe(struct of_device *op,
++			       const struct of_device_id *match)
+ {
+ 	struct device_node *dp = op->node;
+ 	struct fb_info *info;
+@@ -488,13 +489,6 @@ out_err:
+ 	return err;
+ }
+ 
+-static int __devinit tcx_probe(struct of_device *dev, const struct of_device_id *match)
+-{
+-	struct of_device *op = to_of_device(&dev->dev);
+-
+-	return tcx_init_one(op);
+-}
+-
+ static int __devexit tcx_remove(struct of_device *op)
+ {
+ 	struct fb_info *info = dev_get_drvdata(&op->dev);
+diff --git a/fs/affs/affs.h b/fs/affs/affs.h
+index d5bd497..223b191 100644
+--- a/fs/affs/affs.h
++++ b/fs/affs/affs.h
+@@ -48,7 +48,7 @@ struct affs_ext_key {
+  * affs fs inode data in memory
+  */
+ struct affs_inode_info {
+-	u32	 i_opencnt;
++	atomic_t i_opencnt;
+ 	struct semaphore i_link_lock;		/* Protects internal inode access. */
+ 	struct semaphore i_ext_lock;		/* Protects internal inode access. */
+ #define i_hash_lock i_ext_lock
+@@ -170,8 +170,6 @@ extern int	affs_rename(struct inode *old_dir, struct dentry *old_dentry,
+ extern unsigned long		 affs_parent_ino(struct inode *dir);
+ extern struct inode		*affs_new_inode(struct inode *dir);
+ extern int			 affs_notify_change(struct dentry *dentry, struct iattr *attr);
+-extern void			 affs_put_inode(struct inode *inode);
+-extern void			 affs_drop_inode(struct inode *inode);
+ extern void			 affs_delete_inode(struct inode *inode);
+ extern void			 affs_clear_inode(struct inode *inode);
+ extern struct inode		*affs_iget(struct super_block *sb,
+diff --git a/fs/affs/file.c b/fs/affs/file.c
+index 1a4f092..6eac7bd 100644
+--- a/fs/affs/file.c
++++ b/fs/affs/file.c
+@@ -48,8 +48,9 @@ affs_file_open(struct inode *inode, struct file *filp)
+ {
+ 	if (atomic_read(&filp->f_count) != 1)
+ 		return 0;
+-	pr_debug("AFFS: open(%d)\n", AFFS_I(inode)->i_opencnt);
+-	AFFS_I(inode)->i_opencnt++;
++	pr_debug("AFFS: open(%lu,%d)\n",
++		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
++	atomic_inc(&AFFS_I(inode)->i_opencnt);
+ 	return 0;
+ }
+ 
+@@ -58,10 +59,16 @@ affs_file_release(struct inode *inode, struct file *filp)
+ {
+ 	if (atomic_read(&filp->f_count) != 0)
+ 		return 0;
+-	pr_debug("AFFS: release(%d)\n", AFFS_I(inode)->i_opencnt);
+-	AFFS_I(inode)->i_opencnt--;
+-	if (!AFFS_I(inode)->i_opencnt)
++	pr_debug("AFFS: release(%lu, %d)\n",
++		 inode->i_ino, atomic_read(&AFFS_I(inode)->i_opencnt));
++
++	if (atomic_dec_and_test(&AFFS_I(inode)->i_opencnt)) {
++		mutex_lock(&inode->i_mutex);
++		if (inode->i_size != AFFS_I(inode)->mmu_private)
++			affs_truncate(inode);
+ 		affs_free_prealloc(inode);
++		mutex_unlock(&inode->i_mutex);
++	}
+ 
+ 	return 0;
+ }
+@@ -180,7 +187,7 @@ affs_get_extblock(struct inode *inode, u32 ext)
+ 	/* inline the simplest case: same extended block as last time */
+ 	struct buffer_head *bh = AFFS_I(inode)->i_ext_bh;
+ 	if (ext == AFFS_I(inode)->i_ext_last)
+-		atomic_inc(&bh->b_count);
++		get_bh(bh);
+ 	else
+ 		/* we have to do more (not inlined) */
+ 		bh = affs_get_extblock_slow(inode, ext);
+@@ -306,7 +313,7 @@ store_ext:
+ 	affs_brelse(AFFS_I(inode)->i_ext_bh);
+ 	AFFS_I(inode)->i_ext_last = ext;
+ 	AFFS_I(inode)->i_ext_bh = bh;
+-	atomic_inc(&bh->b_count);
++	get_bh(bh);
+ 
+ 	return bh;
+ 
+@@ -324,7 +331,6 @@ affs_get_block(struct inode *inode, sector_t block, struct buffer_head *bh_resul
+ 
+ 	pr_debug("AFFS: get_block(%u, %lu)\n", (u32)inode->i_ino, (unsigned long)block);
+ 
+-
+ 	BUG_ON(block > (sector_t)0x7fffffffUL);
+ 
+ 	if (block >= AFFS_I(inode)->i_blkcnt) {
+@@ -827,6 +833,8 @@ affs_truncate(struct inode *inode)
+ 		res = mapping->a_ops->write_begin(NULL, mapping, size, 0, 0, &page, &fsdata);
+ 		if (!res)
+ 			res = mapping->a_ops->write_end(NULL, mapping, size, 0, 0, page, fsdata);
++		else
++			inode->i_size = AFFS_I(inode)->mmu_private;
+ 		mark_inode_dirty(inode);
+ 		return;
+ 	} else if (inode->i_size == AFFS_I(inode)->mmu_private)
+@@ -862,6 +870,7 @@ affs_truncate(struct inode *inode)
+ 		blk++;
+ 	} else
+ 		AFFS_HEAD(ext_bh)->first_data = 0;
++	AFFS_HEAD(ext_bh)->block_count = cpu_to_be32(i);
+ 	size = AFFS_SB(sb)->s_hashsize;
+ 	if (size > blkcnt - blk + i)
+ 		size = blkcnt - blk + i;
+diff --git a/fs/affs/inode.c b/fs/affs/inode.c
+index 27fe6cb..a13b334 100644
+--- a/fs/affs/inode.c
++++ b/fs/affs/inode.c
+@@ -58,7 +58,7 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
+ 	AFFS_I(inode)->i_extcnt = 1;
+ 	AFFS_I(inode)->i_ext_last = ~1;
+ 	AFFS_I(inode)->i_protect = prot;
+-	AFFS_I(inode)->i_opencnt = 0;
++	atomic_set(&AFFS_I(inode)->i_opencnt, 0);
+ 	AFFS_I(inode)->i_blkcnt = 0;
+ 	AFFS_I(inode)->i_lc = NULL;
+ 	AFFS_I(inode)->i_lc_size = 0;
+@@ -108,8 +108,6 @@ struct inode *affs_iget(struct super_block *sb, unsigned long ino)
+ 			inode->i_mode |= S_IFDIR;
+ 		} else
+ 			inode->i_mode = S_IRUGO | S_IXUGO | S_IWUSR | S_IFDIR;
+-		if (tail->link_chain)
+-			inode->i_nlink = 2;
+ 		/* Maybe it should be controlled by mount parameter? */
+ 		//inode->i_mode |= S_ISVTX;
+ 		inode->i_op = &affs_dir_inode_operations;
+@@ -245,31 +243,12 @@ out:
+ }
+ 
+ void
+-affs_put_inode(struct inode *inode)
+-{
+-	pr_debug("AFFS: put_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+-	affs_free_prealloc(inode);
+-}
+-
+-void
+-affs_drop_inode(struct inode *inode)
+-{
+-	mutex_lock(&inode->i_mutex);
+-	if (inode->i_size != AFFS_I(inode)->mmu_private)
+-		affs_truncate(inode);
+-	mutex_unlock(&inode->i_mutex);
+-
+-	generic_drop_inode(inode);
+-}
+-
+-void
+ affs_delete_inode(struct inode *inode)
+ {
+ 	pr_debug("AFFS: delete_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
+ 	truncate_inode_pages(&inode->i_data, 0);
+ 	inode->i_size = 0;
+-	if (S_ISREG(inode->i_mode))
+-		affs_truncate(inode);
++	affs_truncate(inode);
+ 	clear_inode(inode);
+ 	affs_free_block(inode->i_sb, inode->i_ino);
+ }
+@@ -277,9 +256,12 @@ affs_delete_inode(struct inode *inode)
+ void
+ affs_clear_inode(struct inode *inode)
+ {
+-	unsigned long cache_page = (unsigned long) AFFS_I(inode)->i_lc;
++	unsigned long cache_page;
+ 
+ 	pr_debug("AFFS: clear_inode(ino=%lu, nlink=%u)\n", inode->i_ino, inode->i_nlink);
++
++	affs_free_prealloc(inode);
++	cache_page = (unsigned long)AFFS_I(inode)->i_lc;
+ 	if (cache_page) {
+ 		pr_debug("AFFS: freeing ext cache\n");
+ 		AFFS_I(inode)->i_lc = NULL;
+@@ -316,7 +298,7 @@ affs_new_inode(struct inode *dir)
+ 	inode->i_ino     = block;
+ 	inode->i_nlink   = 1;
+ 	inode->i_mtime   = inode->i_atime = inode->i_ctime = CURRENT_TIME_SEC;
+-	AFFS_I(inode)->i_opencnt = 0;
++	atomic_set(&AFFS_I(inode)->i_opencnt, 0);
+ 	AFFS_I(inode)->i_blkcnt = 0;
+ 	AFFS_I(inode)->i_lc = NULL;
+ 	AFFS_I(inode)->i_lc_size = 0;
+@@ -369,12 +351,12 @@ affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s3
+ 	switch (type) {
+ 	case ST_LINKFILE:
+ 	case ST_LINKDIR:
+-		inode_bh = bh;
+ 		retval = -ENOSPC;
+ 		block = affs_alloc_block(dir, dir->i_ino);
+ 		if (!block)
+ 			goto err;
+ 		retval = -EIO;
++		inode_bh = bh;
+ 		bh = affs_getzeroblk(sb, block);
+ 		if (!bh)
+ 			goto err;
+diff --git a/fs/affs/namei.c b/fs/affs/namei.c
+index 2218f1e..cfcf1b6 100644
+--- a/fs/affs/namei.c
++++ b/fs/affs/namei.c
+@@ -234,7 +234,8 @@ affs_lookup(struct inode *dir, struct dentry *dentry, struct nameidata *nd)
+ int
+ affs_unlink(struct inode *dir, struct dentry *dentry)
+ {
+-	pr_debug("AFFS: unlink(dir=%d, \"%.*s\")\n", (u32)dir->i_ino,
++	pr_debug("AFFS: unlink(dir=%d, %lu \"%.*s\")\n", (u32)dir->i_ino,
++		 dentry->d_inode->i_ino,
+ 		 (int)dentry->d_name.len, dentry->d_name.name);
+ 
+ 	return affs_remove_header(dentry);
+@@ -302,7 +303,8 @@ affs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+ int
+ affs_rmdir(struct inode *dir, struct dentry *dentry)
+ {
+-	pr_debug("AFFS: rmdir(dir=%u, \"%.*s\")\n", (u32)dir->i_ino,
++	pr_debug("AFFS: rmdir(dir=%u, %lu \"%.*s\")\n", (u32)dir->i_ino,
++		 dentry->d_inode->i_ino,
+ 		 (int)dentry->d_name.len, dentry->d_name.name);
+ 
+ 	return affs_remove_header(dentry);
+diff --git a/fs/affs/super.c b/fs/affs/super.c
+index 01d25d5..d214837 100644
+--- a/fs/affs/super.c
++++ b/fs/affs/super.c
+@@ -71,12 +71,18 @@ static struct kmem_cache * affs_inode_cachep;
+ 
+ static struct inode *affs_alloc_inode(struct super_block *sb)
+ {
+-	struct affs_inode_info *ei;
+-	ei = (struct affs_inode_info *)kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
+-	if (!ei)
++	struct affs_inode_info *i;
++
++	i = kmem_cache_alloc(affs_inode_cachep, GFP_KERNEL);
++	if (!i)
+ 		return NULL;
+-	ei->vfs_inode.i_version = 1;
+-	return &ei->vfs_inode;
++
++	i->vfs_inode.i_version = 1;
++	i->i_lc = NULL;
++	i->i_ext_bh = NULL;
++	i->i_pa_cnt = 0;
++
++	return &i->vfs_inode;
+ }
+ 
+ static void affs_destroy_inode(struct inode *inode)
+@@ -114,8 +120,6 @@ static const struct super_operations affs_sops = {
+ 	.alloc_inode	= affs_alloc_inode,
+ 	.destroy_inode	= affs_destroy_inode,
+ 	.write_inode	= affs_write_inode,
+-	.put_inode	= affs_put_inode,
+-	.drop_inode	= affs_drop_inode,
+ 	.delete_inode	= affs_delete_inode,
+ 	.clear_inode	= affs_clear_inode,
+ 	.put_super	= affs_put_super,
+diff --git a/fs/bio.c b/fs/bio.c
+index 799f86d..7856257 100644
+--- a/fs/bio.c
++++ b/fs/bio.c
+@@ -158,7 +158,7 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
+ 
+ 		bio_init(bio);
+ 		if (likely(nr_iovecs)) {
+-			unsigned long idx = 0; /* shut up gcc */
++			unsigned long uninitialized_var(idx);
+ 
+ 			bvl = bvec_alloc_bs(gfp_mask, nr_iovecs, &idx, bs);
+ 			if (unlikely(!bvl)) {
+@@ -963,6 +963,7 @@ static void bio_copy_kern_endio(struct bio *bio, int err)
+  *	@data: pointer to buffer to copy
+  *	@len: length in bytes
+  *	@gfp_mask: allocation flags for bio and page allocation
++ *	@reading: data direction is READ
+  *
+  *	copy the kernel address into a bio suitable for io to a block
+  *	device. Returns an error pointer in case of error.
+diff --git a/fs/inode.c b/fs/inode.c
+index bf64781..c36d948 100644
+--- a/fs/inode.c
++++ b/fs/inode.c
+@@ -1149,13 +1149,8 @@ static inline void iput_final(struct inode *inode)
+ void iput(struct inode *inode)
+ {
+ 	if (inode) {
+-		const struct super_operations *op = inode->i_sb->s_op;
+-
+ 		BUG_ON(inode->i_state == I_CLEAR);
+ 
+-		if (op && op->put_inode)
+-			op->put_inode(inode);
+-
+ 		if (atomic_dec_and_lock(&inode->i_count, &inode_lock))
+ 			iput_final(inode);
+ 	}
+diff --git a/fs/locks.c b/fs/locks.c
+index 663c069..0ac6b92 100644
+--- a/fs/locks.c
++++ b/fs/locks.c
+@@ -1753,6 +1753,7 @@ int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
+ 	struct file_lock *file_lock = locks_alloc_lock();
+ 	struct flock flock;
+ 	struct inode *inode;
++	struct file *f;
+ 	int error;
+ 
+ 	if (file_lock == NULL)
+@@ -1825,7 +1826,15 @@ again:
+ 	 * Attempt to detect a close/fcntl race and recover by
+ 	 * releasing the lock that was just acquired.
+ 	 */
+-	if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
++	/*
++	 * we need that spin_lock here - it prevents reordering between
++	 * update of inode->i_flock and check for it done in close().
++	 * rcu_read_lock() wouldn't do.
++	 */
++	spin_lock(&current->files->file_lock);
++	f = fcheck(fd);
++	spin_unlock(&current->files->file_lock);
++	if (!error && f != filp && flock.l_type != F_UNLCK) {
+ 		flock.l_type = F_UNLCK;
+ 		goto again;
+ 	}
+@@ -1881,6 +1890,7 @@ int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
+ 	struct file_lock *file_lock = locks_alloc_lock();
+ 	struct flock64 flock;
+ 	struct inode *inode;
++	struct file *f;
+ 	int error;
+ 
+ 	if (file_lock == NULL)
+@@ -1953,7 +1963,10 @@ again:
+ 	 * Attempt to detect a close/fcntl race and recover by
+ 	 * releasing the lock that was just acquired.
+ 	 */
+-	if (!error && fcheck(fd) != filp && flock.l_type != F_UNLCK) {
++	spin_lock(&current->files->file_lock);
++	f = fcheck(fd);
++	spin_unlock(&current->files->file_lock);
++	if (!error && f != filp && flock.l_type != F_UNLCK) {
+ 		flock.l_type = F_UNLCK;
+ 		goto again;
+ 	}
+diff --git a/fs/pipe.c b/fs/pipe.c
+index f73492b..ec228bc 100644
+--- a/fs/pipe.c
++++ b/fs/pipe.c
+@@ -17,6 +17,7 @@
+ #include <linux/highmem.h>
+ #include <linux/pagemap.h>
+ #include <linux/audit.h>
++#include <linux/syscalls.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+@@ -1076,6 +1077,26 @@ int do_pipe(int *fd)
+ }
+ 
+ /*
++ * sys_pipe() is the normal C calling standard for creating
++ * a pipe. It's not the way Unix traditionally does this, though.
++ */
++asmlinkage long __weak sys_pipe(int __user *fildes)
++{
++	int fd[2];
++	int error;
++
++	error = do_pipe(fd);
++	if (!error) {
++		if (copy_to_user(fildes, fd, sizeof(fd))) {
++			sys_close(fd[0]);
++			sys_close(fd[1]);
++			error = -EFAULT;
++		}
++	}
++	return error;
++}
++
++/*
+  * pipefs should _never_ be mounted by userland - too much of security hassle,
+  * no real gain from having the whole whorehouse mounted. So we don't need
+  * any operations on the root directory. However, we need a non-trivial
+diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
+index e2b8e76..88717c0 100644
+--- a/fs/proc/task_mmu.c
++++ b/fs/proc/task_mmu.c
+@@ -5,11 +5,9 @@
+ #include <linux/highmem.h>
+ #include <linux/ptrace.h>
+ #include <linux/pagemap.h>
+-#include <linux/ptrace.h>
+ #include <linux/mempolicy.h>
+ #include <linux/swap.h>
+ #include <linux/swapops.h>
+-#include <linux/seq_file.h>
+ 
+ #include <asm/elf.h>
+ #include <asm/uaccess.h>
+diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
+index 4b733f1..4b4f9cc 100644
+--- a/fs/proc/task_nommu.c
++++ b/fs/proc/task_nommu.c
+@@ -1,6 +1,7 @@
+ 
+ #include <linux/mm.h>
+ #include <linux/file.h>
++#include <linux/fdtable.h>
+ #include <linux/mount.h>
+ #include <linux/ptrace.h>
+ #include <linux/seq_file.h>
+diff --git a/fs/splice.c b/fs/splice.c
+index 633f58e..7815003 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -811,24 +811,19 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ {
+ 	struct address_space *mapping = out->f_mapping;
+ 	struct inode *inode = mapping->host;
+-	int killsuid, killpriv;
++	struct splice_desc sd = {
++		.total_len = len,
++		.flags = flags,
++		.pos = *ppos,
++		.u.file = out,
++	};
+ 	ssize_t ret;
+-	int err = 0;
+-
+-	killpriv = security_inode_need_killpriv(out->f_path.dentry);
+-	killsuid = should_remove_suid(out->f_path.dentry);
+-	if (unlikely(killsuid || killpriv)) {
+-		mutex_lock(&inode->i_mutex);
+-		if (killpriv)
+-			err = security_inode_killpriv(out->f_path.dentry);
+-		if (!err && killsuid)
+-			err = __remove_suid(out->f_path.dentry, killsuid);
+-		mutex_unlock(&inode->i_mutex);
+-		if (err)
+-			return err;
+-	}
+ 
+-	ret = splice_from_pipe(pipe, out, ppos, len, flags, pipe_to_file);
++	inode_double_lock(inode, pipe->inode);
++	ret = remove_suid(out->f_path.dentry);
++	if (likely(!ret))
++		ret = __splice_from_pipe(pipe, &sd, pipe_to_file);
++	inode_double_unlock(inode, pipe->inode);
+ 	if (ret > 0) {
+ 		unsigned long nr_pages;
+ 
+@@ -840,6 +835,8 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
+ 		 * sync it.
+ 		 */
+ 		if (unlikely((out->f_flags & O_SYNC) || IS_SYNC(inode))) {
++			int err;
++
+ 			mutex_lock(&inode->i_mutex);
+ 			err = generic_osync_inode(inode, mapping,
+ 						  OSYNC_METADATA|OSYNC_DATA);
+@@ -1075,7 +1072,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+ 
+ 	ret = splice_direct_to_actor(in, &sd, direct_splice_actor);
+ 	if (ret > 0)
+-		*ppos = sd.pos;
++		*ppos += ret;
+ 
+ 	return ret;
+ }
+diff --git a/fs/udf/namei.c b/fs/udf/namei.c
+index 2b34c8c..d323194 100644
+--- a/fs/udf/namei.c
++++ b/fs/udf/namei.c
+@@ -32,6 +32,7 @@
+ #include <linux/buffer_head.h>
+ #include <linux/sched.h>
+ #include <linux/crc-itu-t.h>
++#include <linux/exportfs.h>
+ 
+ static inline int udf_match(int len1, const char *name1, int len2,
+ 			    const char *name2)
+@@ -158,6 +159,8 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
+ 	sector_t offset;
+ 	struct extent_position epos = {};
+ 	struct udf_inode_info *dinfo = UDF_I(dir);
++	int isdotdot = dentry->d_name.len == 2 &&
++		dentry->d_name.name[0] == '.' && dentry->d_name.name[1] == '.';
+ 
+ 	size = udf_ext0_offset(dir) + dir->i_size;
+ 	f_pos = udf_ext0_offset(dir);
+@@ -225,6 +228,12 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
+ 				continue;
+ 		}
+ 
++		if ((cfi->fileCharacteristics & FID_FILE_CHAR_PARENT) &&
++		    isdotdot) {
++			brelse(epos.bh);
++			return fi;
++		}
++
+ 		if (!lfi)
+ 			continue;
+ 
+@@ -286,9 +295,8 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
+ 		}
+ 	}
+ 	unlock_kernel();
+-	d_add(dentry, inode);
+ 
+-	return NULL;
++	return d_splice_alias(inode, dentry);
+ }
+ 
+ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
+@@ -307,7 +315,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
+ 	uint16_t liu;
+ 	int block;
+ 	kernel_lb_addr eloc;
+-	uint32_t elen;
++	uint32_t elen = 0;
+ 	sector_t offset;
+ 	struct extent_position epos = {};
+ 	struct udf_inode_info *dinfo;
+@@ -398,7 +406,8 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
+ 	}
+ 
+ add:
+-	if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB) {
++	/* Is there any extent whose size we need to round up? */
++	if (dinfo->i_alloc_type != ICBTAG_FLAG_AD_IN_ICB && elen) {
+ 		elen = (elen + sb->s_blocksize - 1) & ~(sb->s_blocksize - 1);
+ 		if (dinfo->i_alloc_type == ICBTAG_FLAG_AD_SHORT)
+ 			epos.offset -= sizeof(short_ad);
+@@ -1232,6 +1241,134 @@ end_rename:
+ 	return retval;
+ }
+ 
++static struct dentry *udf_get_parent(struct dentry *child)
++{
++	struct dentry *parent;
++	struct inode *inode = NULL;
++	struct dentry dotdot;
++	struct fileIdentDesc cfi;
++	struct udf_fileident_bh fibh;
++
++	dotdot.d_name.name = "..";
++	dotdot.d_name.len = 2;
++
++	lock_kernel();
++	if (!udf_find_entry(child->d_inode, &dotdot, &fibh, &cfi))
++		goto out_unlock;
++
++	if (fibh.sbh != fibh.ebh)
++		brelse(fibh.ebh);
++	brelse(fibh.sbh);
++
++	inode = udf_iget(child->d_inode->i_sb,
++			 lelb_to_cpu(cfi.icb.extLocation));
++	if (!inode)
++		goto out_unlock;
++	unlock_kernel();
++
++	parent = d_alloc_anon(inode);
++	if (!parent) {
++		iput(inode);
++		parent = ERR_PTR(-ENOMEM);
++	}
++
++	return parent;
++out_unlock:
++	unlock_kernel();
++	return ERR_PTR(-EACCES);
++}
++
++
++static struct dentry *udf_nfs_get_inode(struct super_block *sb, u32 block,
++					u16 partref, __u32 generation)
++{
++	struct inode *inode;
++	struct dentry *result;
++	kernel_lb_addr loc;
++
++	if (block == 0)
++		return ERR_PTR(-ESTALE);
++
++	loc.logicalBlockNum = block;
++	loc.partitionReferenceNum = partref;
++	inode = udf_iget(sb, loc);
++
++	if (inode == NULL)
++		return ERR_PTR(-ENOMEM);
++
++	if (generation && inode->i_generation != generation) {
++		iput(inode);
++		return ERR_PTR(-ESTALE);
++	}
++	result = d_alloc_anon(inode);
++	if (!result) {
++		iput(inode);
++		return ERR_PTR(-ENOMEM);
++	}
++	return result;
++}
++
++static struct dentry *udf_fh_to_dentry(struct super_block *sb,
++				       struct fid *fid, int fh_len, int fh_type)
++{
++	if ((fh_len != 3 && fh_len != 5) ||
++	    (fh_type != FILEID_UDF_WITH_PARENT &&
++	     fh_type != FILEID_UDF_WITHOUT_PARENT))
++		return NULL;
++
++	return udf_nfs_get_inode(sb, fid->udf.block, fid->udf.partref,
++			fid->udf.generation);
++}
++
++static struct dentry *udf_fh_to_parent(struct super_block *sb,
++				       struct fid *fid, int fh_len, int fh_type)
++{
++	if (fh_len != 5 || fh_type != FILEID_UDF_WITH_PARENT)
++		return NULL;
++
++	return udf_nfs_get_inode(sb, fid->udf.parent_block,
++				 fid->udf.parent_partref,
++				 fid->udf.parent_generation);
++}
++static int udf_encode_fh(struct dentry *de, __u32 *fh, int *lenp,
++			 int connectable)
++{
++	int len = *lenp;
++	struct inode *inode =  de->d_inode;
++	kernel_lb_addr location = UDF_I(inode)->i_location;
++	struct fid *fid = (struct fid *)fh;
++	int type = FILEID_UDF_WITHOUT_PARENT;
++
++	if (len < 3 || (connectable && len < 5))
++		return 255;
++
++	*lenp = 3;
++	fid->udf.block = location.logicalBlockNum;
++	fid->udf.partref = location.partitionReferenceNum;
++	fid->udf.generation = inode->i_generation;
++
++	if (connectable && !S_ISDIR(inode->i_mode)) {
++		spin_lock(&de->d_lock);
++		inode = de->d_parent->d_inode;
++		location = UDF_I(inode)->i_location;
++		fid->udf.parent_block = location.logicalBlockNum;
++		fid->udf.parent_partref = location.partitionReferenceNum;
++		fid->udf.parent_generation = inode->i_generation;
++		spin_unlock(&de->d_lock);
++		*lenp = 5;
++		type = FILEID_UDF_WITH_PARENT;
++	}
++
++	return type;
++}
++
++const struct export_operations udf_export_ops = {
++	.encode_fh	= udf_encode_fh,
++	.fh_to_dentry   = udf_fh_to_dentry,
++	.fh_to_parent   = udf_fh_to_parent,
++	.get_parent     = udf_get_parent,
++};
++
+ const struct inode_operations udf_dir_inode_operations = {
+ 	.lookup				= udf_lookup,
+ 	.create				= udf_create,
+diff --git a/fs/udf/partition.c b/fs/udf/partition.c
+index 63610f0..96dfd20 100644
+--- a/fs/udf/partition.c
++++ b/fs/udf/partition.c
+@@ -27,8 +27,8 @@
+ #include <linux/slab.h>
+ #include <linux/buffer_head.h>
+ 
+-inline uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
+-			       uint16_t partition, uint32_t offset)
++uint32_t udf_get_pblock(struct super_block *sb, uint32_t block,
++			uint16_t partition, uint32_t offset)
+ {
+ 	struct udf_sb_info *sbi = UDF_SB(sb);
+ 	struct udf_part_map *map;
+diff --git a/fs/udf/super.c b/fs/udf/super.c
+index 9fb18a3..7a5f69b 100644
+--- a/fs/udf/super.c
++++ b/fs/udf/super.c
+@@ -1933,6 +1933,7 @@ static int udf_fill_super(struct super_block *sb, void *options, int silent)
+ 
+ 	/* Fill in the rest of the superblock */
+ 	sb->s_op = &udf_sb_ops;
++	sb->s_export_op = &udf_export_ops;
+ 	sb->dq_op = NULL;
+ 	sb->s_dirt = 0;
+ 	sb->s_magic = UDF_SUPER_MAGIC;
+diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
+index f3f45d0..8fa9c2d 100644
+--- a/fs/udf/udfdecl.h
++++ b/fs/udf/udfdecl.h
+@@ -73,6 +73,7 @@ struct task_struct;
+ struct buffer_head;
+ struct super_block;
+ 
++extern const struct export_operations udf_export_ops;
+ extern const struct inode_operations udf_dir_inode_operations;
+ extern const struct file_operations udf_dir_operations;
+ extern const struct inode_operations udf_file_inode_operations;
+diff --git a/include/asm-alpha/types.h b/include/asm-alpha/types.h
+index a9e34ca..c154135 100644
+--- a/include/asm-alpha/types.h
++++ b/include/asm-alpha/types.h
+@@ -23,5 +23,11 @@ typedef unsigned int umode_t;
+ 
+ #define BITS_PER_LONG 64
+ 
++#ifndef __ASSEMBLY__
++
++typedef u64 dma_addr_t;
++typedef u64 dma64_addr_t;
++
++#endif /* __ASSEMBLY__ */
+ #endif /* __KERNEL__ */
+ #endif /* _ALPHA_TYPES_H */
+diff --git a/include/asm-m68k/machw.h b/include/asm-m68k/machw.h
+index d2e0e25..3562499 100644
+--- a/include/asm-m68k/machw.h
++++ b/include/asm-m68k/machw.h
+@@ -66,36 +66,6 @@ struct MAC_SCC
+ # define mac_scc ((*(volatile struct SCC*)MAC_SCC_BAS))
+ #endif
+ 
+-/* hardware stuff */
+-
+-#define MACHW_DECLARE(name)	unsigned name : 1
+-#define MACHW_SET(name)		(mac_hw_present.name = 1)
+-#define MACHW_PRESENT(name)	(mac_hw_present.name)
+-
+-struct mac_hw_present {
+-  /* video hardware */
+-  /* sound hardware */
+-  /* disk storage interfaces */
+-  MACHW_DECLARE(MAC_SCSI_80);     /* Directly mapped NCR5380 */
+-  MACHW_DECLARE(MAC_SCSI_96);     /* 53c9[46] */
+-  MACHW_DECLARE(MAC_SCSI_96_2);   /* 2nd 53c9[46] Q900 and Q950 */
+-  MACHW_DECLARE(IDE);             /* IDE Interface */
+-  /* other I/O hardware */
+-  MACHW_DECLARE(SCC);             /* Serial Communications Contr. */
+-  /* DMA */
+-  MACHW_DECLARE(SCSI_DMA);        /* DMA for the NCR5380 */
+-  /* real time clocks */
+-  MACHW_DECLARE(RTC_CLK);         /* clock chip */
+-  /* supporting hardware */
+-  MACHW_DECLARE(VIA1);            /* Versatile Interface Ad. 1 */
+-  MACHW_DECLARE(VIA2);            /* Versatile Interface Ad. 2 */
+-  MACHW_DECLARE(RBV);             /* Versatile Interface Ad. 2+ */
+-  /* NUBUS */
+-  MACHW_DECLARE(NUBUS);           /* NUBUS */
+-};
+-
+-extern struct mac_hw_present mac_hw_present;
+-
+ #endif /* __ASSEMBLY__ */
+ 
+ #endif /* linux/machw.h */
+diff --git a/include/asm-mips/types.h b/include/asm-mips/types.h
+index 7a2ee4f..bcbb8d6 100644
+--- a/include/asm-mips/types.h
++++ b/include/asm-mips/types.h
+@@ -19,8 +19,6 @@
+ 
+ typedef unsigned short umode_t;
+ 
+-#endif
+-
+ #endif /* __ASSEMBLY__ */
+ 
+ /*
+diff --git a/include/asm-mn10300/processor.h b/include/asm-mn10300/processor.h
+index f1b081f..7323927 100644
+--- a/include/asm-mn10300/processor.h
++++ b/include/asm-mn10300/processor.h
+@@ -58,7 +58,7 @@ extern struct mn10300_cpuinfo boot_cpu_data;
+ extern void identify_cpu(struct mn10300_cpuinfo *);
+ extern void print_cpu_info(struct mn10300_cpuinfo *);
+ extern void dodgy_tsc(void);
+-#define cpu_relax() do {} while (0)
++#define cpu_relax() barrier()
+ 
+ /*
+  * User space process size: 1.75GB (default).
+diff --git a/include/asm-powerpc/io.h b/include/asm-powerpc/io.h
+index afae069..e0062d7 100644
+--- a/include/asm-powerpc/io.h
++++ b/include/asm-powerpc/io.h
+@@ -2,7 +2,7 @@
+ #define _ASM_POWERPC_IO_H
+ #ifdef __KERNEL__
+ 
+-/* 
++/*
+  * This program is free software; you can redistribute it and/or
+  * modify it under the terms of the GNU General Public License
+  * as published by the Free Software Foundation; either version
+@@ -18,6 +18,9 @@ extern int check_legacy_ioport(unsigned long base_port);
+ #define _PNPWRP		0xa79
+ #define PNPBIOS_BASE	0xf000
+ 
++#include <linux/device.h>
++#include <linux/io.h>
++
+ #include <linux/compiler.h>
+ #include <asm/page.h>
+ #include <asm/byteorder.h>
+@@ -744,6 +747,9 @@ static inline void * bus_to_virt(unsigned long address)
+ 
+ #define clrsetbits_8(addr, clear, set) clrsetbits(8, addr, clear, set)
+ 
++void __iomem *devm_ioremap_prot(struct device *dev, resource_size_t offset,
++				size_t size, unsigned long flags);
++
+ #endif /* __KERNEL__ */
+ 
+ #endif /* _ASM_POWERPC_IO_H */
+diff --git a/include/asm-powerpc/kvm_host.h b/include/asm-powerpc/kvm_host.h
+index 04ffbb8..81a69d7 100644
+--- a/include/asm-powerpc/kvm_host.h
++++ b/include/asm-powerpc/kvm_host.h
+@@ -59,6 +59,7 @@ struct kvm_vcpu_stat {
+ 	u32 emulated_inst_exits;
+ 	u32 dec_exits;
+ 	u32 ext_intr_exits;
++	u32 halt_wakeup;
+ };
+ 
+ struct tlbe {
+diff --git a/include/asm-powerpc/kvm_ppc.h b/include/asm-powerpc/kvm_ppc.h
+index 7ac8203..b35a7e3 100644
+--- a/include/asm-powerpc/kvm_ppc.h
++++ b/include/asm-powerpc/kvm_ppc.h
+@@ -77,12 +77,17 @@ static inline void kvmppc_clear_exception(struct kvm_vcpu *vcpu, int exception)
+ 	clear_bit(priority, &vcpu->arch.pending_exceptions);
+ }
+ 
++/* Helper function for "full" MSR writes. No need to call this if only EE is
++ * changing. */
+ static inline void kvmppc_set_msr(struct kvm_vcpu *vcpu, u32 new_msr)
+ {
+ 	if ((new_msr & MSR_PR) != (vcpu->arch.msr & MSR_PR))
+ 		kvmppc_mmu_priv_switch(vcpu, new_msr & MSR_PR);
+ 
+ 	vcpu->arch.msr = new_msr;
++
++	if (vcpu->arch.msr & MSR_WE)
++		kvm_vcpu_block(vcpu);
+ }
+ 
+ #endif /* __POWERPC_KVM_PPC_H__ */
+diff --git a/include/asm-powerpc/syscalls.h b/include/asm-powerpc/syscalls.h
+index b3ca41f..2b8a458 100644
+--- a/include/asm-powerpc/syscalls.h
++++ b/include/asm-powerpc/syscalls.h
+@@ -30,7 +30,7 @@ asmlinkage int sys_fork(unsigned long p1, unsigned long p2,
+ asmlinkage int sys_vfork(unsigned long p1, unsigned long p2,
+ 		unsigned long p3, unsigned long p4, unsigned long p5,
+ 		unsigned long p6, struct pt_regs *regs);
+-asmlinkage int sys_pipe(int __user *fildes);
++asmlinkage long sys_pipe(int __user *fildes);
+ asmlinkage long sys_rt_sigaction(int sig,
+ 		const struct sigaction __user *act,
+ 		struct sigaction __user *oact, size_t sigsetsize);
+diff --git a/include/asm-s390/kvm_host.h b/include/asm-s390/kvm_host.h
+index f8204a4..18cbd8a 100644
+--- a/include/asm-s390/kvm_host.h
++++ b/include/asm-s390/kvm_host.h
+@@ -104,6 +104,7 @@ struct sie_block {
+ 
+ struct kvm_vcpu_stat {
+ 	u32 exit_userspace;
++	u32 exit_null;
+ 	u32 exit_external_request;
+ 	u32 exit_external_interrupt;
+ 	u32 exit_stop_request;
+diff --git a/include/asm-s390/page.h b/include/asm-s390/page.h
+index f0f4579..12fd9c4 100644
+--- a/include/asm-s390/page.h
++++ b/include/asm-s390/page.h
+@@ -125,6 +125,17 @@ page_get_storage_key(unsigned long addr)
+ 	return skey;
+ }
+ 
++#ifdef CONFIG_PAGE_STATES
++
++struct page;
++void arch_free_page(struct page *page, int order);
++void arch_alloc_page(struct page *page, int order);
++
++#define HAVE_ARCH_FREE_PAGE
++#define HAVE_ARCH_ALLOC_PAGE
++
++#endif
++
+ #endif /* !__ASSEMBLY__ */
+ 
+ /* to align the pointer to the (next) page boundary */
+diff --git a/include/asm-s390/ptrace.h b/include/asm-s390/ptrace.h
+index 441d7c2..d7d4e2e 100644
+--- a/include/asm-s390/ptrace.h
++++ b/include/asm-s390/ptrace.h
+@@ -471,6 +471,8 @@ struct task_struct;
+ extern void user_enable_single_step(struct task_struct *);
+ extern void user_disable_single_step(struct task_struct *);
+ 
++#define __ARCH_WANT_COMPAT_SYS_PTRACE
++
+ #define user_mode(regs) (((regs)->psw.mask & PSW_MASK_PSTATE) != 0)
+ #define instruction_pointer(regs) ((regs)->psw.addr & PSW_ADDR_INSN)
+ #define regs_return_value(regs)((regs)->gprs[2])
+diff --git a/include/asm-s390/system.h b/include/asm-s390/system.h
+index c819ae2..e0d4500 100644
+--- a/include/asm-s390/system.h
++++ b/include/asm-s390/system.h
+@@ -116,6 +116,12 @@ extern void pfault_fini(void);
+ #define pfault_fini()		do { } while (0)
+ #endif /* CONFIG_PFAULT */
+ 
++#ifdef CONFIG_PAGE_STATES
++extern void cmma_init(void);
++#else
++static inline void cmma_init(void) { }
++#endif
++
+ #define finish_arch_switch(prev) do {					     \
+ 	set_fs(current->thread.mm_segment);				     \
+ 	account_vtime(prev);						     \
+diff --git a/include/asm-x86/bootparam.h b/include/asm-x86/bootparam.h
+index e865990..f62f473 100644
+--- a/include/asm-x86/bootparam.h
++++ b/include/asm-x86/bootparam.h
+@@ -14,10 +14,10 @@
+ 
+ /* extensible setup data list node */
+ struct setup_data {
+-	u64 next;
+-	u32 type;
+-	u32 len;
+-	u8 data[0];
++	__u64 next;
++	__u32 type;
++	__u32 len;
++	__u8 data[0];
+ };
+ 
+ struct setup_header {
+diff --git a/include/asm-x86/kvm_host.h b/include/asm-x86/kvm_host.h
+index 9d963cd..1d8cd01 100644
+--- a/include/asm-x86/kvm_host.h
++++ b/include/asm-x86/kvm_host.h
+@@ -314,6 +314,9 @@ struct kvm_arch{
+ 	struct page *apic_access_page;
+ 
+ 	gpa_t wall_clock;
++
++	struct page *ept_identity_pagetable;
++	bool ept_identity_pagetable_done;
+ };
+ 
+ struct kvm_vm_stat {
+@@ -422,6 +425,7 @@ struct kvm_x86_ops {
+ 				       struct kvm_run *run);
+ 
+ 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
++	int (*get_tdp_level)(void);
+ };
+ 
+ extern struct kvm_x86_ops *kvm_x86_ops;
+@@ -433,6 +437,9 @@ void kvm_mmu_destroy(struct kvm_vcpu *vcpu);
+ int kvm_mmu_create(struct kvm_vcpu *vcpu);
+ int kvm_mmu_setup(struct kvm_vcpu *vcpu);
+ void kvm_mmu_set_nonpresent_ptes(u64 trap_pte, u64 notrap_pte);
++void kvm_mmu_set_base_ptes(u64 base_pte);
++void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
++		u64 dirty_mask, u64 nx_mask, u64 x_mask);
+ 
+ int kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
+ void kvm_mmu_slot_remove_write_access(struct kvm *kvm, int slot);
+@@ -620,7 +627,7 @@ static inline void fx_restore(struct i387_fxsave_struct *image)
+ 	asm("fxrstor (%0)":: "r" (image));
+ }
+ 
+-static inline void fpu_init(void)
++static inline void fx_finit(void)
+ {
+ 	asm("finit");
+ }
+@@ -644,6 +651,7 @@ static inline void kvm_inject_gp(struct kvm_vcpu *vcpu, u32 error_code)
+ #define ASM_VMX_VMWRITE_RSP_RDX   ".byte 0x0f, 0x79, 0xd4"
+ #define ASM_VMX_VMXOFF            ".byte 0x0f, 0x01, 0xc4"
+ #define ASM_VMX_VMXON_RAX         ".byte 0xf3, 0x0f, 0xc7, 0x30"
++#define ASM_VMX_INVEPT		  ".byte 0x66, 0x0f, 0x38, 0x80, 0x08"
+ #define ASM_VMX_INVVPID		  ".byte 0x66, 0x0f, 0x38, 0x81, 0x08"
+ 
+ #define MSR_IA32_TIME_STAMP_COUNTER		0x010
+diff --git a/include/asm-x86/pgtable_32.h b/include/asm-x86/pgtable_32.h
+index 577ab79..d7f0403 100644
+--- a/include/asm-x86/pgtable_32.h
++++ b/include/asm-x86/pgtable_32.h
+@@ -88,14 +88,7 @@ extern unsigned long pg0[];
+ /* To avoid harmful races, pmd_none(x) should check only the lower when PAE */
+ #define pmd_none(x)	(!(unsigned long)pmd_val((x)))
+ #define pmd_present(x)	(pmd_val((x)) & _PAGE_PRESENT)
+-
+-extern int pmd_bad(pmd_t pmd);
+-
+-#define pmd_bad_v1(x)							\
+-	(_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER)))
+-#define	pmd_bad_v2(x)							\
+-	(_KERNPG_TABLE != (pmd_val((x)) & ~(PAGE_MASK | _PAGE_USER |	\
+-					    _PAGE_PSE | _PAGE_NX)))
++#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
+ 
+ #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+ 
+diff --git a/include/asm-x86/pgtable_64.h b/include/asm-x86/pgtable_64.h
+index a3bbf87..efe83dc 100644
+--- a/include/asm-x86/pgtable_64.h
++++ b/include/asm-x86/pgtable_64.h
+@@ -158,14 +158,12 @@ static inline unsigned long pgd_bad(pgd_t pgd)
+ 
+ static inline unsigned long pud_bad(pud_t pud)
+ {
+-	return pud_val(pud) &
+-		~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
++	return pud_val(pud) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
+ }
+ 
+ static inline unsigned long pmd_bad(pmd_t pmd)
+ {
+-	return pmd_val(pmd) &
+-		~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER | _PAGE_PSE | _PAGE_NX);
++	return pmd_val(pmd) & ~(PTE_MASK | _KERNPG_TABLE | _PAGE_USER);
+ }
+ 
+ #define pte_none(x)	(!pte_val((x)))
+diff --git a/include/linux/exportfs.h b/include/linux/exportfs.h
+index de8387b..f5abd13 100644
+--- a/include/linux/exportfs.h
++++ b/include/linux/exportfs.h
+@@ -33,6 +33,19 @@ enum fid_type {
+ 	 * 32 bit parent directory inode number.
+ 	 */
+ 	FILEID_INO32_GEN_PARENT = 2,
++
++	/*
++	 * 32 bit block number, 16 bit partition reference,
++	 * 16 bit unused, 32 bit generation number.
++	 */
++	FILEID_UDF_WITHOUT_PARENT = 0x51,
++
++	/*
++	 * 32 bit block number, 16 bit partition reference,
++	 * 16 bit unused, 32 bit generation number,
++	 * 32 bit parent block number, 32 bit parent generation number
++	 */
++	FILEID_UDF_WITH_PARENT = 0x52,
+ };
+ 
+ struct fid {
+@@ -43,6 +56,14 @@ struct fid {
+ 			u32 parent_ino;
+ 			u32 parent_gen;
+ 		} i32;
++ 		struct {
++ 			u32 block;
++ 			u16 partref;
++ 			u16 parent_partref;
++ 			u32 generation;
++ 			u32 parent_block;
++ 			u32 parent_generation;
++ 		} udf;
+ 		__u32 raw[0];
+ 	};
+ };
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index a1ba005..f413085 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -1289,17 +1289,12 @@ extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
+ extern ssize_t vfs_writev(struct file *, const struct iovec __user *,
+ 		unsigned long, loff_t *);
+ 
+-/*
+- * NOTE: write_inode, delete_inode, clear_inode, put_inode can be called
+- * without the big kernel lock held in all filesystems.
+- */
+ struct super_operations {
+    	struct inode *(*alloc_inode)(struct super_block *sb);
+ 	void (*destroy_inode)(struct inode *);
+ 
+    	void (*dirty_inode) (struct inode *);
+ 	int (*write_inode) (struct inode *, int);
+-	void (*put_inode) (struct inode *);
+ 	void (*drop_inode) (struct inode *);
+ 	void (*delete_inode) (struct inode *);
+ 	void (*put_super) (struct super_block *);
+@@ -1821,7 +1816,6 @@ extern void iget_failed(struct inode *);
+ extern void clear_inode(struct inode *);
+ extern void destroy_inode(struct inode *);
+ extern struct inode *new_inode(struct super_block *);
+-extern int __remove_suid(struct dentry *, int);
+ extern int should_remove_suid(struct dentry *);
+ extern int remove_suid(struct dentry *);
+ 
+diff --git a/include/linux/genhd.h b/include/linux/genhd.h
+index ecd2bf6..e9874e7 100644
+--- a/include/linux/genhd.h
++++ b/include/linux/genhd.h
+@@ -178,17 +178,17 @@ static inline struct hd_struct *get_part(struct gendisk *gendiskp,
+ 
+ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
+ 	int i;
++
+ 	for_each_possible_cpu(i)
+ 		memset(per_cpu_ptr(gendiskp->dkstats, i), value,
+-				sizeof (struct disk_stats));
++				sizeof(struct disk_stats));
+ }		
+ 
+ #define __part_stat_add(part, field, addnd)				\
+ 	(per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd)
+ 
+-#define __all_stat_add(gendiskp, field, addnd, sector)		\
++#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
+ ({								\
+-	struct hd_struct *part = get_part(gendiskp, sector);	\
+ 	if (part)						\
+ 		__part_stat_add(part, field, addnd);		\
+ 	__disk_stat_add(gendiskp, field, addnd);		\
+@@ -203,11 +203,13 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)	{
+ 	res;								\
+ })
+ 
+-static inline void part_stat_set_all(struct hd_struct *part, int value)	{
++static inline void part_stat_set_all(struct hd_struct *part, int value)
++{
+ 	int i;
++
+ 	for_each_possible_cpu(i)
+ 		memset(per_cpu_ptr(part->dkstats, i), value,
+-		       sizeof(struct disk_stats));
++				sizeof(struct disk_stats));
+ }
+ 				
+ #else /* !CONFIG_SMP */
+@@ -223,9 +225,8 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value)
+ #define __part_stat_add(part, field, addnd) \
+ 	(part->dkstats.field += addnd)
+ 
+-#define __all_stat_add(gendiskp, field, addnd, sector)		\
++#define __all_stat_add(gendiskp, part, field, addnd, sector)	\
+ ({								\
+-	struct hd_struct *part = get_part(gendiskp, sector);	\
+ 	if (part)						\
+ 		part->dkstats.field += addnd;			\
+ 	__disk_stat_add(gendiskp, field, addnd);		\
+@@ -276,10 +277,10 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
+ #define part_stat_sub(gendiskp, field, subnd) \
+ 		part_stat_add(gendiskp, field, -subnd)
+ 
+-#define all_stat_add(gendiskp, field, addnd, sector)		\
++#define all_stat_add(gendiskp, part, field, addnd, sector)	\
+ 	do {							\
+ 		preempt_disable();				\
+-		__all_stat_add(gendiskp, field, addnd, sector);	\
++		__all_stat_add(gendiskp, part, field, addnd, sector);	\
+ 		preempt_enable();				\
+ 	} while (0)
+ 
+@@ -288,15 +289,15 @@ static inline void part_stat_set_all(struct hd_struct *part, int value)
+ #define all_stat_dec(gendiskp, field, sector) \
+ 		all_stat_add(gendiskp, field, -1, sector)
+ 
+-#define __all_stat_inc(gendiskp, field, sector) \
+-		__all_stat_add(gendiskp, field, 1, sector)
+-#define all_stat_inc(gendiskp, field, sector) \
+-		all_stat_add(gendiskp, field, 1, sector)
++#define __all_stat_inc(gendiskp, part, field, sector) \
++		__all_stat_add(gendiskp, part, field, 1, sector)
++#define all_stat_inc(gendiskp, part, field, sector) \
++		all_stat_add(gendiskp, part, field, 1, sector)
+ 
+-#define __all_stat_sub(gendiskp, field, subnd, sector) \
+-		__all_stat_add(gendiskp, field, -subnd, sector)
+-#define all_stat_sub(gendiskp, field, subnd, sector) \
+-		all_stat_add(gendiskp, field, -subnd, sector)
++#define __all_stat_sub(gendiskp, part, field, subnd, sector) \
++		__all_stat_add(gendiskp, part, field, -subnd, sector)
++#define all_stat_sub(gendiskp, part, field, subnd, sector) \
++		all_stat_add(gendiskp, part, field, -subnd, sector)
+ 
+ /* Inlines to alloc and free disk stats in struct gendisk */
+ #ifdef  CONFIG_SMP
+diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
+index 31a4d65..6d93dce 100644
+--- a/include/linux/hrtimer.h
++++ b/include/linux/hrtimer.h
+@@ -316,6 +316,15 @@ static inline int hrtimer_is_queued(struct hrtimer *timer)
+ 		(HRTIMER_STATE_ENQUEUED | HRTIMER_STATE_PENDING);
+ }
+ 
++/*
++ * Helper function to check, whether the timer is running the callback
++ * function
++ */
++static inline int hrtimer_callback_running(struct hrtimer *timer)
++{
++	return timer->state & HRTIMER_STATE_CALLBACK;
++}
++
+ /* Forward a hrtimer so it expires after now: */
+ extern u64
+ hrtimer_forward(struct hrtimer *timer, ktime_t now, ktime_t interval);
+diff --git a/include/linux/io.h b/include/linux/io.h
+index 3a03a36..6c7f0ba 100644
+--- a/include/linux/io.h
++++ b/include/linux/io.h
+@@ -65,5 +65,6 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
+ void devm_iounmap(struct device *dev, void __iomem *addr);
+ int check_signature(const volatile void __iomem *io_addr,
+ 			const unsigned char *signature, int length);
++void devm_ioremap_release(struct device *dev, void *res);
+ 
+ #endif /* _LINUX_IO_H */
+diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h
+index 2a3bb1b..f98a656 100644
+--- a/include/linux/ioprio.h
++++ b/include/linux/ioprio.h
+@@ -68,6 +68,20 @@ static inline int task_nice_ioprio(struct task_struct *task)
+ }
+ 
+ /*
++ * This is for the case where the task hasn't asked for a specific IO class.
++ * Check for idle and rt task process, and return appropriate IO class.
++ */
++static inline int task_nice_ioclass(struct task_struct *task)
++{
++	if (task->policy == SCHED_IDLE)
++		return IOPRIO_CLASS_IDLE;
++	else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR)
++		return IOPRIO_CLASS_RT;
++	else
++		return IOPRIO_CLASS_BE;
++}
++
++/*
+  * For inheritance, return the highest of the two given priorities
+  */
+ extern int ioprio_best(unsigned short aprio, unsigned short bprio);
+diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
+index 9757b1a..6adcc29 100644
+--- a/include/linux/kgdb.h
++++ b/include/linux/kgdb.h
+@@ -261,10 +261,12 @@ struct kgdb_io {
+ 
+ extern struct kgdb_arch		arch_kgdb_ops;
+ 
++extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
++
+ extern int kgdb_register_io_module(struct kgdb_io *local_kgdb_io_ops);
+ extern void kgdb_unregister_io_module(struct kgdb_io *local_kgdb_io_ops);
+ 
+-extern int kgdb_hex2long(char **ptr, long *long_val);
++extern int kgdb_hex2long(char **ptr, unsigned long *long_val);
+ extern int kgdb_mem2hex(char *mem, char *buf, int count);
+ extern int kgdb_hex2mem(char *buf, char *mem, int count);
+ 
+diff --git a/include/linux/libata.h b/include/linux/libata.h
+index d1dfe87..7e206da 100644
+--- a/include/linux/libata.h
++++ b/include/linux/libata.h
+@@ -1039,6 +1039,7 @@ extern void ata_eh_thaw_port(struct ata_port *ap);
+ 
+ extern void ata_eh_qc_complete(struct ata_queued_cmd *qc);
+ extern void ata_eh_qc_retry(struct ata_queued_cmd *qc);
++extern void ata_eh_analyze_ncq_error(struct ata_link *link);
+ 
+ extern void ata_do_eh(struct ata_port *ap, ata_prereset_fn_t prereset,
+ 		      ata_reset_fn_t softreset, ata_reset_fn_t hardreset,
+@@ -1381,6 +1382,21 @@ static inline struct ata_port *ata_shost_to_port(struct Scsi_Host *host)
+ 	return *(struct ata_port **)&host->hostdata[0];
+ }
+ 
++static inline int ata_check_ready(u8 status)
++{
++	/* Some controllers report 0x77 or 0x7f during intermediate
++	 * not-ready stages.
++	 */
++	if (status == 0x77 || status == 0x7f)
++		return 0;
++
++	/* 0xff indicates either no device or device not ready */
++	if (status == 0xff)
++		return -ENODEV;
++
++	return !(status & ATA_BUSY);
++}
++
+ 
+ /**************************************************************************
+  * PMP - drivers/ata/libata-pmp.c
+diff --git a/include/linux/mv643xx_eth.h b/include/linux/mv643xx_eth.h
+index 30e11aa..a15cdd4 100644
+--- a/include/linux/mv643xx_eth.h
++++ b/include/linux/mv643xx_eth.h
+@@ -1,19 +1,31 @@
+ /*
+  * MV-643XX ethernet platform device data definition file.
+  */
++
+ #ifndef __LINUX_MV643XX_ETH_H
+ #define __LINUX_MV643XX_ETH_H
+ 
+-#define MV643XX_ETH_SHARED_NAME		"mv643xx_eth_shared"
+-#define MV643XX_ETH_NAME		"mv643xx_eth"
++#include <linux/mbus.h>
++
++#define MV643XX_ETH_SHARED_NAME		"mv643xx_eth"
++#define MV643XX_ETH_NAME		"mv643xx_eth_port"
+ #define MV643XX_ETH_SHARED_REGS		0x2000
+ #define MV643XX_ETH_SHARED_REGS_SIZE	0x2000
+ #define MV643XX_ETH_BAR_4		0x2220
+ #define MV643XX_ETH_SIZE_REG_4		0x2224
+ #define MV643XX_ETH_BASE_ADDR_ENABLE_REG	0x2290
+ 
++struct mv643xx_eth_shared_platform_data {
++	struct mbus_dram_target_info	*dram;
++	unsigned int	t_clk;
++};
++
+ struct mv643xx_eth_platform_data {
++	struct platform_device	*shared;
+ 	int		port_number;
++
++	struct platform_device	*shared_smi;
++
+ 	u16		force_phy_addr;	/* force override if phy_addr == 0 */
+ 	u16		phy_addr;
+ 
+diff --git a/include/linux/netfilter/nf_conntrack_sip.h b/include/linux/netfilter/nf_conntrack_sip.h
+index 5da04e5..23aa2ec 100644
+--- a/include/linux/netfilter/nf_conntrack_sip.h
++++ b/include/linux/netfilter/nf_conntrack_sip.h
+@@ -7,6 +7,7 @@
+ 
+ struct nf_ct_sip_master {
+ 	unsigned int	register_cseq;
++	unsigned int	invite_cseq;
+ };
+ 
+ enum sip_expectation_classes {
+diff --git a/include/linux/pci.h b/include/linux/pci.h
+index 96acd0d..509159b 100644
+--- a/include/linux/pci.h
++++ b/include/linux/pci.h
+@@ -44,6 +44,7 @@
+ #include <linux/mod_devicetable.h>
+ 
+ #include <linux/types.h>
++#include <linux/init.h>
+ #include <linux/ioport.h>
+ #include <linux/list.h>
+ #include <linux/compiler.h>
+@@ -474,7 +475,7 @@ extern struct pci_bus *pci_find_bus(int domain, int busnr);
+ void pci_bus_add_devices(struct pci_bus *bus);
+ struct pci_bus *pci_scan_bus_parented(struct device *parent, int bus,
+ 				      struct pci_ops *ops, void *sysdata);
+-static inline struct pci_bus *pci_scan_bus(int bus, struct pci_ops *ops,
++static inline struct pci_bus * __devinit pci_scan_bus(int bus, struct pci_ops *ops,
+ 					   void *sysdata)
+ {
+ 	struct pci_bus *root_bus;
+@@ -666,7 +667,7 @@ int pci_scan_bridge(struct pci_bus *bus, struct pci_dev *dev, int max,
+ 
+ void pci_walk_bus(struct pci_bus *top, void (*cb)(struct pci_dev *, void *),
+ 		  void *userdata);
+-int pci_cfg_space_size_ext(struct pci_dev *dev, unsigned check_exp_pcix);
++int pci_cfg_space_size_ext(struct pci_dev *dev);
+ int pci_cfg_space_size(struct pci_dev *dev);
+ unsigned char pci_bus_max_busnr(struct pci_bus *bus);
+ 
+diff --git a/include/linux/phy.h b/include/linux/phy.h
+index 02df20f..7224c40 100644
+--- a/include/linux/phy.h
++++ b/include/linux/phy.h
+@@ -412,6 +412,8 @@ int mdiobus_register(struct mii_bus *bus);
+ void mdiobus_unregister(struct mii_bus *bus);
+ void phy_sanitize_settings(struct phy_device *phydev);
+ int phy_stop_interrupts(struct phy_device *phydev);
++int phy_enable_interrupts(struct phy_device *phydev);
++int phy_disable_interrupts(struct phy_device *phydev);
+ 
+ static inline int phy_read_status(struct phy_device *phydev) {
+ 	return phydev->drv->read_status(phydev);
+@@ -447,5 +449,8 @@ int phy_register_fixup_for_uid(u32 phy_uid, u32 phy_uid_mask,
+ 		int (*run)(struct phy_device *));
+ int phy_scan_fixups(struct phy_device *phydev);
+ 
++int __init mdio_bus_init(void);
++void mdio_bus_exit(void);
++
+ extern struct bus_type mdio_bus_type;
+ #endif /* __PHY_H */
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index 03c2380..0c35b03 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -158,6 +158,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq)
+ }
+ #endif
+ 
++extern unsigned long long time_sync_thresh;
++
+ /*
+  * Task state bitmask. NOTE! These bits are also
+  * encoded in fs/proc/array.c: get_task_state().
+@@ -1551,6 +1553,35 @@ static inline int set_cpus_allowed(struct task_struct *p, cpumask_t new_mask)
+ 
+ extern unsigned long long sched_clock(void);
+ 
++#ifndef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
++static inline void sched_clock_init(void)
++{
++}
++
++static inline u64 sched_clock_cpu(int cpu)
++{
++	return sched_clock();
++}
++
++static inline void sched_clock_tick(void)
++{
++}
++
++static inline void sched_clock_idle_sleep_event(void)
++{
++}
++
++static inline void sched_clock_idle_wakeup_event(u64 delta_ns)
++{
++}
++#else
++extern void sched_clock_init(void);
++extern u64 sched_clock_cpu(int cpu);
++extern void sched_clock_tick(void);
++extern void sched_clock_idle_sleep_event(void);
++extern void sched_clock_idle_wakeup_event(u64 delta_ns);
++#endif
++
+ /*
+  * For kernel-internal use: high-speed (but slightly incorrect) per-cpu
+  * clock constructed from sched_clock():
+@@ -1977,6 +2008,11 @@ static inline void clear_tsk_need_resched(struct task_struct *tsk)
+ 	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
+ }
+ 
++static inline int test_tsk_need_resched(struct task_struct *tsk)
++{
++	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
++}
++
+ static inline int signal_pending(struct task_struct *p)
+ {
+ 	return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
+@@ -1991,7 +2027,7 @@ static inline int fatal_signal_pending(struct task_struct *p)
+ 
+ static inline int need_resched(void)
+ {
+-	return unlikely(test_thread_flag(TIF_NEED_RESCHED));
++	return unlikely(test_tsk_need_resched(current));
+ }
+ 
+ /*
+diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
+index 27bad59..7858eac 100644
+--- a/include/linux/sysfs.h
++++ b/include/linux/sysfs.h
+@@ -196,12 +196,6 @@ static inline int sysfs_update_group(struct kobject *kobj,
+ 	return 0;
+ }
+ 
+-static inline int sysfs_update_group(struct kobject *kobj,
+-				const struct attribute_group *grp)
+-{
+-	return 0;
+-}
+-
+ static inline void sysfs_remove_group(struct kobject *kobj,
+ 				      const struct attribute_group *grp)
+ {
+diff --git a/include/net/ip.h b/include/net/ip.h
+index 6d7bcd5..3b40bc2 100644
+--- a/include/net/ip.h
++++ b/include/net/ip.h
+@@ -210,7 +210,7 @@ int ip_dont_fragment(struct sock *sk, struct dst_entry *dst)
+ {
+ 	return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO ||
+ 		(inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT &&
+-		 !(dst_metric(dst, RTAX_LOCK)&(1<<RTAX_MTU))));
++		 !(dst_metric_locked(dst, RTAX_MTU))));
+ }
+ 
+ extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more);
+diff --git a/include/net/xfrm.h b/include/net/xfrm.h
+index d1350bc..2933d74 100644
+--- a/include/net/xfrm.h
++++ b/include/net/xfrm.h
+@@ -648,14 +648,46 @@ extern void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
+ extern void xfrm_audit_state_icvfail(struct xfrm_state *x,
+ 				     struct sk_buff *skb, u8 proto);
+ #else
+-#define xfrm_audit_policy_add(x, r, a, se, s)	do { ; } while (0)
+-#define xfrm_audit_policy_delete(x, r, a, se, s)	do { ; } while (0)
+-#define xfrm_audit_state_add(x, r, a, se, s)	do { ; } while (0)
+-#define xfrm_audit_state_delete(x, r, a, se, s)	do { ; } while (0)
+-#define xfrm_audit_state_replay_overflow(x, s)	do { ; } while (0)
+-#define xfrm_audit_state_notfound_simple(s, f)	do { ; } while (0)
+-#define xfrm_audit_state_notfound(s, f, sp, sq)	do { ; } while (0)
+-#define xfrm_audit_state_icvfail(x, s, p)	do { ; } while (0)
++
++static inline void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
++				  u32 auid, u32 ses, u32 secid)
++{
++}
++
++static inline void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
++				  u32 auid, u32 ses, u32 secid)
++{
++}
++
++static inline void xfrm_audit_state_add(struct xfrm_state *x, int result,
++				 u32 auid, u32 ses, u32 secid)
++{
++}
++
++static inline void xfrm_audit_state_delete(struct xfrm_state *x, int result,
++				    u32 auid, u32 ses, u32 secid)
++{
++}
++
++static inline void xfrm_audit_state_replay_overflow(struct xfrm_state *x,
++					     struct sk_buff *skb)
++{
++}
++
++static inline void xfrm_audit_state_notfound_simple(struct sk_buff *skb,
++				      u16 family)
++{
++}
++
++static inline void xfrm_audit_state_notfound(struct sk_buff *skb, u16 family,
++				      __be32 net_spi, __be32 net_seq)
++{
++}
++
++static inline void xfrm_audit_state_icvfail(struct xfrm_state *x,
++				     struct sk_buff *skb, u8 proto)
++{
++}
+ #endif /* CONFIG_AUDITSYSCALL */
+ 
+ static inline void xfrm_pol_hold(struct xfrm_policy *policy)
+diff --git a/init/Kconfig b/init/Kconfig
+index 6a44def..3b5adbf 100644
+--- a/init/Kconfig
++++ b/init/Kconfig
+@@ -316,9 +316,16 @@ config CPUSETS
+ 
+ 	  Say N if unsure.
+ 
++#
++# Architectures with an unreliable sched_clock() should select this:
++#
++config HAVE_UNSTABLE_SCHED_CLOCK
++	bool
++
+ config GROUP_SCHED
+ 	bool "Group CPU scheduler"
+-	default y
++	depends on EXPERIMENTAL
++	default n
+ 	help
+ 	  This feature lets CPU scheduler recognize task groups and control CPU
+ 	  bandwidth allocation to such task groups.
+@@ -326,7 +333,7 @@ config GROUP_SCHED
+ config FAIR_GROUP_SCHED
+ 	bool "Group scheduling for SCHED_OTHER"
+ 	depends on GROUP_SCHED
+-	default y
++	default GROUP_SCHED
+ 
+ config RT_GROUP_SCHED
+ 	bool "Group scheduling for SCHED_RR/FIFO"
+@@ -627,6 +634,14 @@ config ELF_CORE
+ 	help
+ 	  Enable support for generating core dumps. Disabling saves about 4k.
+ 
++config PCSPKR_PLATFORM
++	bool "Enable PC-Speaker support" if EMBEDDED
++	depends on ALPHA || X86 || MIPS || PPC_PREP || PPC_CHRP || PPC_PSERIES
++	default y
++	help
++          This option allows to disable the internal PC-Speaker
++          support, saving some memory.
++
+ config COMPAT_BRK
+ 	bool "Disable heap randomization"
+ 	default y
+@@ -825,6 +840,15 @@ menuconfig MODULES
+ 
+ 	  If unsure, say Y.
+ 
++config MODULE_FORCE_LOAD
++	bool "Forced module loading"
++	depends on MODULES
++	default n
++	help
++	  This option allows loading of modules even if that would set the
++          'F' (forced) taint, due to lack of version info.  Which is
++	  usually a really bad idea.
++
+ config MODULE_UNLOAD
+ 	bool "Module unloading"
+ 	depends on MODULES
+diff --git a/init/main.c b/init/main.c
+index a87d4ca..ddada7a 100644
+--- a/init/main.c
++++ b/init/main.c
+@@ -602,6 +602,7 @@ asmlinkage void __init start_kernel(void)
+ 	softirq_init();
+ 	timekeeping_init();
+ 	time_init();
++	sched_clock_init();
+ 	profile_init();
+ 	if (!irqs_disabled())
+ 		printk("start_kernel(): bug: interrupts were enabled early\n");
+diff --git a/ipc/mqueue.c b/ipc/mqueue.c
+index 94fd3b0..b3b69fd 100644
+--- a/ipc/mqueue.c
++++ b/ipc/mqueue.c
+@@ -673,7 +673,7 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
+ 	if (IS_ERR(name = getname(u_name)))
+ 		return PTR_ERR(name);
+ 
+-	fd = get_unused_fd();
++	fd = get_unused_fd_flags(O_CLOEXEC);
+ 	if (fd < 0)
+ 		goto out_putname;
+ 
+@@ -709,7 +709,6 @@ asmlinkage long sys_mq_open(const char __user *u_name, int oflag, mode_t mode,
+ 		goto out_putfd;
+ 	}
+ 
+-	set_close_on_exec(fd, 1);
+ 	fd_install(fd, filp);
+ 	goto out_upsem;
+ 
+diff --git a/kernel/Makefile b/kernel/Makefile
+index 188c432..1c9938a 100644
+--- a/kernel/Makefile
++++ b/kernel/Makefile
+@@ -9,7 +9,7 @@ obj-y     = sched.o fork.o exec_domain.o panic.o printk.o profile.o \
+ 	    rcupdate.o extable.o params.o posix-timers.o \
+ 	    kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \
+ 	    hrtimer.o rwsem.o nsproxy.o srcu.o semaphore.o \
+-	    notifier.o ksysfs.o pm_qos_params.o
++	    notifier.o ksysfs.o pm_qos_params.o sched_clock.o
+ 
+ obj-$(CONFIG_SYSCTL_SYSCALL_CHECK) += sysctl_check.o
+ obj-$(CONFIG_STACKTRACE) += stacktrace.o
+diff --git a/kernel/cpuset.c b/kernel/cpuset.c
+index 8da627d..86ea9e3 100644
+--- a/kernel/cpuset.c
++++ b/kernel/cpuset.c
+@@ -1031,11 +1031,9 @@ int current_cpuset_is_being_rebound(void)
+ 	return task_cs(current) == cpuset_being_rebound;
+ }
+ 
+-static int update_relax_domain_level(struct cpuset *cs, char *buf)
++static int update_relax_domain_level(struct cpuset *cs, s64 val)
+ {
+-	int val = simple_strtol(buf, NULL, 10);
+-
+-	if (val < 0)
++	if ((int)val < 0)
+ 		val = -1;
+ 
+ 	if (val != cs->relax_domain_level) {
+@@ -1280,9 +1278,6 @@ static ssize_t cpuset_common_file_write(struct cgroup *cont,
+ 	case FILE_MEMLIST:
+ 		retval = update_nodemask(cs, buffer);
+ 		break;
+-	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
+-		retval = update_relax_domain_level(cs, buffer);
+-		break;
+ 	default:
+ 		retval = -EINVAL;
+ 		goto out2;
+@@ -1348,6 +1343,30 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val)
+ 	return retval;
+ }
+ 
++static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val)
++{
++	int retval = 0;
++	struct cpuset *cs = cgroup_cs(cgrp);
++	cpuset_filetype_t type = cft->private;
++
++	cgroup_lock();
++
++	if (cgroup_is_removed(cgrp)) {
++		cgroup_unlock();
++		return -ENODEV;
++	}
++	switch (type) {
++	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
++		retval = update_relax_domain_level(cs, val);
++		break;
++	default:
++		retval = -EINVAL;
++		break;
++	}
++	cgroup_unlock();
++	return retval;
++}
++
+ /*
+  * These ascii lists should be read in a single call, by using a user
+  * buffer large enough to hold the entire map.  If read in smaller
+@@ -1406,9 +1425,6 @@ static ssize_t cpuset_common_file_read(struct cgroup *cont,
+ 	case FILE_MEMLIST:
+ 		s += cpuset_sprintf_memlist(s, cs);
+ 		break;
+-	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
+-		s += sprintf(s, "%d", cs->relax_domain_level);
+-		break;
+ 	default:
+ 		retval = -EINVAL;
+ 		goto out;
+@@ -1449,6 +1465,18 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft)
+ 	}
+ }
+ 
++static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft)
++{
++	struct cpuset *cs = cgroup_cs(cont);
++	cpuset_filetype_t type = cft->private;
++	switch (type) {
++	case FILE_SCHED_RELAX_DOMAIN_LEVEL:
++		return cs->relax_domain_level;
++	default:
++		BUG();
++	}
++}
++
+ 
+ /*
+  * for the common functions, 'private' gives the type of file
+@@ -1499,8 +1527,8 @@ static struct cftype files[] = {
+ 
+ 	{
+ 		.name = "sched_relax_domain_level",
+-		.read_u64 = cpuset_read_u64,
+-		.write_u64 = cpuset_write_u64,
++		.read_s64 = cpuset_read_s64,
++		.write_s64 = cpuset_write_s64,
+ 		.private = FILE_SCHED_RELAX_DOMAIN_LEVEL,
+ 	},
+ 
+diff --git a/kernel/futex.c b/kernel/futex.c
+index 98092c9..449def8 100644
+--- a/kernel/futex.c
++++ b/kernel/futex.c
+@@ -104,10 +104,6 @@ struct futex_q {
+ 	/* Key which the futex is hashed on: */
+ 	union futex_key key;
+ 
+-	/* For fd, sigio sent using these: */
+-	int fd;
+-	struct file *filp;
+-
+ 	/* Optional priority inheritance state: */
+ 	struct futex_pi_state *pi_state;
+ 	struct task_struct *task;
+@@ -126,9 +122,6 @@ struct futex_hash_bucket {
+ 
+ static struct futex_hash_bucket futex_queues[1<<FUTEX_HASHBITS];
+ 
+-/* Futex-fs vfsmount entry: */
+-static struct vfsmount *futex_mnt;
+-
+ /*
+  * Take mm->mmap_sem, when futex is shared
+  */
+@@ -610,8 +603,6 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
+ static void wake_futex(struct futex_q *q)
+ {
+ 	plist_del(&q->list, &q->list.plist);
+-	if (q->filp)
+-		send_sigio(&q->filp->f_owner, q->fd, POLL_IN);
+ 	/*
+ 	 * The lock in wake_up_all() is a crucial memory barrier after the
+ 	 * plist_del() and also before assigning to q->lock_ptr.
+@@ -988,14 +979,10 @@ out:
+ }
+ 
+ /* The key must be already stored in q->key. */
+-static inline struct futex_hash_bucket *
+-queue_lock(struct futex_q *q, int fd, struct file *filp)
++static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
+ {
+ 	struct futex_hash_bucket *hb;
+ 
+-	q->fd = fd;
+-	q->filp = filp;
+-
+ 	init_waitqueue_head(&q->waiters);
+ 
+ 	get_futex_key_refs(&q->key);
+@@ -1006,7 +993,7 @@ queue_lock(struct futex_q *q, int fd, struct file *filp)
+ 	return hb;
+ }
+ 
+-static inline void __queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
++static inline void queue_me(struct futex_q *q, struct futex_hash_bucket *hb)
+ {
+ 	int prio;
+ 
+@@ -1041,15 +1028,6 @@ queue_unlock(struct futex_q *q, struct futex_hash_bucket *hb)
+  * exactly once.  They are called with the hashed spinlock held.
+  */
+ 
+-/* The key must be already stored in q->key. */
+-static void queue_me(struct futex_q *q, int fd, struct file *filp)
+-{
+-	struct futex_hash_bucket *hb;
+-
+-	hb = queue_lock(q, fd, filp);
+-	__queue_me(q, hb);
+-}
+-
+ /* Return 1 if we were still queued (ie. 0 means we were woken) */
+ static int unqueue_me(struct futex_q *q)
+ {
+@@ -1194,7 +1172,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
+ 	if (unlikely(ret != 0))
+ 		goto out_release_sem;
+ 
+-	hb = queue_lock(&q, -1, NULL);
++	hb = queue_lock(&q);
+ 
+ 	/*
+ 	 * Access the page AFTER the futex is queued.
+@@ -1238,7 +1216,7 @@ static int futex_wait(u32 __user *uaddr, struct rw_semaphore *fshared,
+ 		goto out_unlock_release_sem;
+ 
+ 	/* Only actually queue if *uaddr contained val.  */
+-	__queue_me(&q, hb);
++	queue_me(&q, hb);
+ 
+ 	/*
+ 	 * Now the futex is queued and we have checked the data, we
+@@ -1386,7 +1364,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
+ 		goto out_release_sem;
+ 
+  retry_unlocked:
+-	hb = queue_lock(&q, -1, NULL);
++	hb = queue_lock(&q);
+ 
+  retry_locked:
+ 	ret = lock_taken = 0;
+@@ -1499,7 +1477,7 @@ static int futex_lock_pi(u32 __user *uaddr, struct rw_semaphore *fshared,
+ 	/*
+ 	 * Only actually queue now that the atomic ops are done:
+ 	 */
+-	__queue_me(&q, hb);
++	queue_me(&q, hb);
+ 
+ 	/*
+ 	 * Now the futex is queued and we have checked the data, we
+@@ -1746,121 +1724,6 @@ pi_faulted:
+ 	return ret;
+ }
+ 
+-static int futex_close(struct inode *inode, struct file *filp)
+-{
+-	struct futex_q *q = filp->private_data;
+-
+-	unqueue_me(q);
+-	kfree(q);
+-
+-	return 0;
+-}
+-
+-/* This is one-shot: once it's gone off you need a new fd */
+-static unsigned int futex_poll(struct file *filp,
+-			       struct poll_table_struct *wait)
+-{
+-	struct futex_q *q = filp->private_data;
+-	int ret = 0;
+-
+-	poll_wait(filp, &q->waiters, wait);
+-
+-	/*
+-	 * plist_node_empty() is safe here without any lock.
+-	 * q->lock_ptr != 0 is not safe, because of ordering against wakeup.
+-	 */
+-	if (plist_node_empty(&q->list))
+-		ret = POLLIN | POLLRDNORM;
+-
+-	return ret;
+-}
+-
+-static const struct file_operations futex_fops = {
+-	.release	= futex_close,
+-	.poll		= futex_poll,
+-};
+-
+-/*
+- * Signal allows caller to avoid the race which would occur if they
+- * set the sigio stuff up afterwards.
+- */
+-static int futex_fd(u32 __user *uaddr, int signal)
+-{
+-	struct futex_q *q;
+-	struct file *filp;
+-	int ret, err;
+-	struct rw_semaphore *fshared;
+-	static unsigned long printk_interval;
+-
+-	if (printk_timed_ratelimit(&printk_interval, 60 * 60 * 1000)) {
+-		printk(KERN_WARNING "Process `%s' used FUTEX_FD, which "
+-		       "will be removed from the kernel in June 2007\n",
+-		       current->comm);
+-	}
+-
+-	ret = -EINVAL;
+-	if (!valid_signal(signal))
+-		goto out;
+-
+-	ret = get_unused_fd();
+-	if (ret < 0)
+-		goto out;
+-	filp = get_empty_filp();
+-	if (!filp) {
+-		put_unused_fd(ret);
+-		ret = -ENFILE;
+-		goto out;
+-	}
+-	filp->f_op = &futex_fops;
+-	filp->f_path.mnt = mntget(futex_mnt);
+-	filp->f_path.dentry = dget(futex_mnt->mnt_root);
+-	filp->f_mapping = filp->f_path.dentry->d_inode->i_mapping;
+-
+-	if (signal) {
+-		err = __f_setown(filp, task_pid(current), PIDTYPE_PID, 1);
+-		if (err < 0) {
+-			goto error;
+-		}
+-		filp->f_owner.signum = signal;
+-	}
+-
+-	q = kmalloc(sizeof(*q), GFP_KERNEL);
+-	if (!q) {
+-		err = -ENOMEM;
+-		goto error;
+-	}
+-	q->pi_state = NULL;
+-
+-	fshared = &current->mm->mmap_sem;
+-	down_read(fshared);
+-	err = get_futex_key(uaddr, fshared, &q->key);
+-
+-	if (unlikely(err != 0)) {
+-		up_read(fshared);
+-		kfree(q);
+-		goto error;
+-	}
+-
+-	/*
+-	 * queue_me() must be called before releasing mmap_sem, because
+-	 * key->shared.inode needs to be referenced while holding it.
+-	 */
+-	filp->private_data = q;
+-
+-	queue_me(q, ret, filp);
+-	up_read(fshared);
+-
+-	/* Now we map fd to filp, so userspace can access it */
+-	fd_install(ret, filp);
+-out:
+-	return ret;
+-error:
+-	put_unused_fd(ret);
+-	put_filp(filp);
+-	ret = err;
+-	goto out;
+-}
+-
+ /*
+  * Support for robust futexes: the kernel cleans up held futexes at
+  * thread exit time.
+@@ -2092,10 +1955,6 @@ long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+ 	case FUTEX_WAKE_BITSET:
+ 		ret = futex_wake(uaddr, fshared, val, val3);
+ 		break;
+-	case FUTEX_FD:
+-		/* non-zero val means F_SETOWN(getpid()) & F_SETSIG(val) */
+-		ret = futex_fd(uaddr, val);
+-		break;
+ 	case FUTEX_REQUEUE:
+ 		ret = futex_requeue(uaddr, fshared, uaddr2, val, val2, NULL);
+ 		break;
+@@ -2156,19 +2015,6 @@ asmlinkage long sys_futex(u32 __user *uaddr, int op, u32 val,
+ 	return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
+ }
+ 
+-static int futexfs_get_sb(struct file_system_type *fs_type,
+-			  int flags, const char *dev_name, void *data,
+-			  struct vfsmount *mnt)
+-{
+-	return get_sb_pseudo(fs_type, "futex", NULL, FUTEXFS_SUPER_MAGIC, mnt);
+-}
+-
+-static struct file_system_type futex_fs_type = {
+-	.name		= "futexfs",
+-	.get_sb		= futexfs_get_sb,
+-	.kill_sb	= kill_anon_super,
+-};
+-
+ static int __init futex_init(void)
+ {
+ 	u32 curval;
+@@ -2193,16 +2039,6 @@ static int __init futex_init(void)
+ 		spin_lock_init(&futex_queues[i].lock);
+ 	}
+ 
+-	i = register_filesystem(&futex_fs_type);
+-	if (i)
+-		return i;
+-
+-	futex_mnt = kern_mount(&futex_fs_type);
+-	if (IS_ERR(futex_mnt)) {
+-		unregister_filesystem(&futex_fs_type);
+-		return PTR_ERR(futex_mnt);
+-	}
+-
+ 	return 0;
+ }
+ __initcall(futex_init);
+diff --git a/kernel/hrtimer.c b/kernel/hrtimer.c
+index 9af1d6a..421be5f 100644
+--- a/kernel/hrtimer.c
++++ b/kernel/hrtimer.c
+@@ -154,15 +154,6 @@ static void hrtimer_get_softirq_time(struct hrtimer_cpu_base *base)
+ }
+ 
+ /*
+- * Helper function to check, whether the timer is running the callback
+- * function
+- */
+-static inline int hrtimer_callback_running(struct hrtimer *timer)
+-{
+-	return timer->state & HRTIMER_STATE_CALLBACK;
+-}
+-
+-/*
+  * Functions and macros which are different for UP/SMP systems are kept in a
+  * single place
+  */
+diff --git a/kernel/kgdb.c b/kernel/kgdb.c
+index 1bd0ec1..39e31a0 100644
+--- a/kernel/kgdb.c
++++ b/kernel/kgdb.c
+@@ -61,7 +61,7 @@ struct kgdb_state {
+ 	int			err_code;
+ 	int			cpu;
+ 	int			pass_exception;
+-	long			threadid;
++	unsigned long		threadid;
+ 	long			kgdb_usethreadid;
+ 	struct pt_regs		*linux_regs;
+ };
+@@ -146,7 +146,7 @@ atomic_t			kgdb_cpu_doing_single_step = ATOMIC_INIT(-1);
+  * the other CPUs might interfere with your debugging context, so
+  * use this with care:
+  */
+-int				kgdb_do_roundup = 1;
++static int kgdb_do_roundup = 1;
+ 
+ static int __init opt_nokgdbroundup(char *str)
+ {
+@@ -438,7 +438,7 @@ int kgdb_hex2mem(char *buf, char *mem, int count)
+  * While we find nice hex chars, build a long_val.
+  * Return number of chars processed.
+  */
+-int kgdb_hex2long(char **ptr, long *long_val)
++int kgdb_hex2long(char **ptr, unsigned long *long_val)
+ {
+ 	int hex_val;
+ 	int num = 0;
+@@ -709,7 +709,7 @@ int kgdb_isremovedbreak(unsigned long addr)
+ 	return 0;
+ }
+ 
+-int remove_all_break(void)
++static int remove_all_break(void)
+ {
+ 	unsigned long addr;
+ 	int error;
+diff --git a/kernel/module.c b/kernel/module.c
+index 8674a39..8e4528c 100644
+--- a/kernel/module.c
++++ b/kernel/module.c
+@@ -890,6 +890,19 @@ static struct module_attribute *modinfo_attrs[] = {
+ 
+ static const char vermagic[] = VERMAGIC_STRING;
+ 
++static int try_to_force_load(struct module *mod, const char *symname)
++{
++#ifdef CONFIG_MODULE_FORCE_LOAD
++	if (!(tainted & TAINT_FORCED_MODULE))
++		printk("%s: no version for \"%s\" found: kernel tainted.\n",
++		       mod->name, symname);
++	add_taint_module(mod, TAINT_FORCED_MODULE);
++	return 0;
++#else
++	return -ENOEXEC;
++#endif
++}
++
+ #ifdef CONFIG_MODVERSIONS
+ static int check_version(Elf_Shdr *sechdrs,
+ 			 unsigned int versindex,
+@@ -914,18 +927,18 @@ static int check_version(Elf_Shdr *sechdrs,
+ 
+ 		if (versions[i].crc == *crc)
+ 			return 1;
+-		printk("%s: disagrees about version of symbol %s\n",
+-		       mod->name, symname);
+ 		DEBUGP("Found checksum %lX vs module %lX\n",
+ 		       *crc, versions[i].crc);
+-		return 0;
++		goto bad_version;
+ 	}
+-	/* Not in module's version table.  OK, but that taints the kernel. */
+-	if (!(tainted & TAINT_FORCED_MODULE))
+-		printk("%s: no version for \"%s\" found: kernel tainted.\n",
+-		       mod->name, symname);
+-	add_taint_module(mod, TAINT_FORCED_MODULE);
+-	return 1;
++
++	if (!try_to_force_load(mod, symname))
++		return 1;
++
++bad_version:
++	printk("%s: disagrees about version of symbol %s\n",
++	       mod->name, symname);
++	return 0;
+ }
+ 
+ static inline int check_modstruct_version(Elf_Shdr *sechdrs,
+@@ -1853,9 +1866,9 @@ static struct module *load_module(void __user *umod,
+ 	modmagic = get_modinfo(sechdrs, infoindex, "vermagic");
+ 	/* This is allowed: modprobe --force will invalidate it. */
+ 	if (!modmagic) {
+-		add_taint_module(mod, TAINT_FORCED_MODULE);
+-		printk(KERN_WARNING "%s: no version magic, tainting kernel.\n",
+-		       mod->name);
++		err = try_to_force_load(mod, "magic");
++		if (err)
++			goto free_hdr;
+ 	} else if (!same_magic(modmagic, vermagic)) {
+ 		printk(KERN_ERR "%s: version magic '%s' should be '%s'\n",
+ 		       mod->name, modmagic, vermagic);
+@@ -2006,9 +2019,10 @@ static struct module *load_module(void __user *umod,
+ 	    (mod->num_gpl_future_syms && !gplfuturecrcindex) ||
+ 	    (mod->num_unused_syms && !unusedcrcindex) ||
+ 	    (mod->num_unused_gpl_syms && !unusedgplcrcindex)) {
+-		printk(KERN_WARNING "%s: No versions for exported symbols."
+-		       " Tainting kernel.\n", mod->name);
+-		add_taint_module(mod, TAINT_FORCED_MODULE);
++		printk(KERN_WARNING "%s: No versions for exported symbols.\n", mod->name);
++		err = try_to_force_load(mod, "nocrc");
++		if (err)
++			goto cleanup;
+ 	}
+ #endif
+ 	markersindex = find_sec(hdr, sechdrs, secstrings, "__markers");
+diff --git a/kernel/relay.c b/kernel/relay.c
+index 7de644c..bc24dcd 100644
+--- a/kernel/relay.c
++++ b/kernel/relay.c
+@@ -1191,7 +1191,7 @@ static ssize_t relay_file_splice_read(struct file *in,
+ 	ret = 0;
+ 	spliced = 0;
+ 
+-	while (len && !spliced) {
++	while (len) {
+ 		ret = subbuf_splice_actor(in, ppos, pipe, len, flags, &nonpad_ret);
+ 		if (ret < 0)
+ 			break;
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 34bcc5b..58fb8af 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -75,16 +75,6 @@
+ #include <asm/irq_regs.h>
+ 
+ /*
+- * Scheduler clock - returns current time in nanosec units.
+- * This is default implementation.
+- * Architectures and sub-architectures can override this.
+- */
+-unsigned long long __attribute__((weak)) sched_clock(void)
+-{
+-	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
+-}
+-
+-/*
+  * Convert user-nice values [ -20 ... 0 ... 19 ]
+  * to static priority [ MAX_RT_PRIO..MAX_PRIO-1 ],
+  * and back.
+@@ -242,6 +232,12 @@ static void destroy_rt_bandwidth(struct rt_bandwidth *rt_b)
+ }
+ #endif
+ 
++/*
++ * sched_domains_mutex serializes calls to arch_init_sched_domains,
++ * detach_destroy_domains and partition_sched_domains.
++ */
++static DEFINE_MUTEX(sched_domains_mutex);
++
+ #ifdef CONFIG_GROUP_SCHED
+ 
+ #include <linux/cgroup.h>
+@@ -308,9 +304,6 @@ static DEFINE_PER_CPU(struct rt_rq, init_rt_rq) ____cacheline_aligned_in_smp;
+  */
+ static DEFINE_SPINLOCK(task_group_lock);
+ 
+-/* doms_cur_mutex serializes access to doms_cur[] array */
+-static DEFINE_MUTEX(doms_cur_mutex);
+-
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+ #ifdef CONFIG_USER_SCHED
+ # define INIT_TASK_GROUP_LOAD	(2*NICE_0_LOAD)
+@@ -318,7 +311,13 @@ static DEFINE_MUTEX(doms_cur_mutex);
+ # define INIT_TASK_GROUP_LOAD	NICE_0_LOAD
+ #endif
+ 
++/*
++ * A weight of 0, 1 or ULONG_MAX can cause arithmetics problems.
++ * (The default weight is 1024 - so there's no practical
++ *  limitation from this.)
++ */
+ #define MIN_SHARES	2
++#define MAX_SHARES	(ULONG_MAX - 1)
+ 
+ static int init_task_group_load = INIT_TASK_GROUP_LOAD;
+ #endif
+@@ -358,21 +357,9 @@ static inline void set_task_rq(struct task_struct *p, unsigned int cpu)
+ #endif
+ }
+ 
+-static inline void lock_doms_cur(void)
+-{
+-	mutex_lock(&doms_cur_mutex);
+-}
+-
+-static inline void unlock_doms_cur(void)
+-{
+-	mutex_unlock(&doms_cur_mutex);
+-}
+-
+ #else
+ 
+ static inline void set_task_rq(struct task_struct *p, unsigned int cpu) { }
+-static inline void lock_doms_cur(void) { }
+-static inline void unlock_doms_cur(void) { }
+ 
+ #endif	/* CONFIG_GROUP_SCHED */
+ 
+@@ -560,13 +547,7 @@ struct rq {
+ 	unsigned long next_balance;
+ 	struct mm_struct *prev_mm;
+ 
+-	u64 clock, prev_clock_raw;
+-	s64 clock_max_delta;
+-
+-	unsigned int clock_warps, clock_overflows, clock_underflows;
+-	u64 idle_clock;
+-	unsigned int clock_deep_idle_events;
+-	u64 tick_timestamp;
++	u64 clock;
+ 
+ 	atomic_t nr_iowait;
+ 
+@@ -631,82 +612,6 @@ static inline int cpu_of(struct rq *rq)
+ #endif
+ }
+ 
+-#ifdef CONFIG_NO_HZ
+-static inline bool nohz_on(int cpu)
+-{
+-	return tick_get_tick_sched(cpu)->nohz_mode != NOHZ_MODE_INACTIVE;
+-}
+-
+-static inline u64 max_skipped_ticks(struct rq *rq)
+-{
+-	return nohz_on(cpu_of(rq)) ? jiffies - rq->last_tick_seen + 2 : 1;
+-}
+-
+-static inline void update_last_tick_seen(struct rq *rq)
+-{
+-	rq->last_tick_seen = jiffies;
+-}
+-#else
+-static inline u64 max_skipped_ticks(struct rq *rq)
+-{
+-	return 1;
+-}
+-
+-static inline void update_last_tick_seen(struct rq *rq)
+-{
+-}
+-#endif
+-
+-/*
+- * Update the per-runqueue clock, as finegrained as the platform can give
+- * us, but without assuming monotonicity, etc.:
+- */
+-static void __update_rq_clock(struct rq *rq)
+-{
+-	u64 prev_raw = rq->prev_clock_raw;
+-	u64 now = sched_clock();
+-	s64 delta = now - prev_raw;
+-	u64 clock = rq->clock;
+-
+-#ifdef CONFIG_SCHED_DEBUG
+-	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+-#endif
+-	/*
+-	 * Protect against sched_clock() occasionally going backwards:
+-	 */
+-	if (unlikely(delta < 0)) {
+-		clock++;
+-		rq->clock_warps++;
+-	} else {
+-		/*
+-		 * Catch too large forward jumps too:
+-		 */
+-		u64 max_jump = max_skipped_ticks(rq) * TICK_NSEC;
+-		u64 max_time = rq->tick_timestamp + max_jump;
+-
+-		if (unlikely(clock + delta > max_time)) {
+-			if (clock < max_time)
+-				clock = max_time;
+-			else
+-				clock++;
+-			rq->clock_overflows++;
+-		} else {
+-			if (unlikely(delta > rq->clock_max_delta))
+-				rq->clock_max_delta = delta;
+-			clock += delta;
+-		}
+-	}
+-
+-	rq->prev_clock_raw = now;
+-	rq->clock = clock;
+-}
+-
+-static void update_rq_clock(struct rq *rq)
+-{
+-	if (likely(smp_processor_id() == cpu_of(rq)))
+-		__update_rq_clock(rq);
+-}
+-
+ /*
+  * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
+  * See detach_destroy_domains: synchronize_sched for details.
+@@ -722,6 +627,11 @@ static void update_rq_clock(struct rq *rq)
+ #define task_rq(p)		cpu_rq(task_cpu(p))
+ #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
+ 
++static inline void update_rq_clock(struct rq *rq)
++{
++	rq->clock = sched_clock_cpu(cpu_of(rq));
++}
++
+ /*
+  * Tunables that become constants when CONFIG_SCHED_DEBUG is off:
+  */
+@@ -757,14 +667,14 @@ const_debug unsigned int sysctl_sched_features =
+ #define SCHED_FEAT(name, enabled)	\
+ 	#name ,
+ 
+-__read_mostly char *sched_feat_names[] = {
++static __read_mostly char *sched_feat_names[] = {
+ #include "sched_features.h"
+ 	NULL
+ };
+ 
+ #undef SCHED_FEAT
+ 
+-int sched_feat_open(struct inode *inode, struct file *filp)
++static int sched_feat_open(struct inode *inode, struct file *filp)
+ {
+ 	filp->private_data = inode->i_private;
+ 	return 0;
+@@ -899,7 +809,7 @@ static inline u64 global_rt_runtime(void)
+ 	return (u64)sysctl_sched_rt_runtime * NSEC_PER_USEC;
+ }
+ 
+-static const unsigned long long time_sync_thresh = 100000;
++unsigned long long time_sync_thresh = 100000;
+ 
+ static DEFINE_PER_CPU(unsigned long long, time_offset);
+ static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
+@@ -913,11 +823,14 @@ static DEFINE_PER_CPU(unsigned long long, prev_cpu_time);
+ static DEFINE_SPINLOCK(time_sync_lock);
+ static unsigned long long prev_global_time;
+ 
+-static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
++static unsigned long long __sync_cpu_clock(unsigned long long time, int cpu)
+ {
+-	unsigned long flags;
+-
+-	spin_lock_irqsave(&time_sync_lock, flags);
++	/*
++	 * We want this inlined, to not get tracer function calls
++	 * in this critical section:
++	 */
++	spin_acquire(&time_sync_lock.dep_map, 0, 0, _THIS_IP_);
++	__raw_spin_lock(&time_sync_lock.raw_lock);
+ 
+ 	if (time < prev_global_time) {
+ 		per_cpu(time_offset, cpu) += prev_global_time - time;
+@@ -926,7 +839,8 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
+ 		prev_global_time = time;
+ 	}
+ 
+-	spin_unlock_irqrestore(&time_sync_lock, flags);
++	__raw_spin_unlock(&time_sync_lock.raw_lock);
++	spin_release(&time_sync_lock.dep_map, 1, _THIS_IP_);
+ 
+ 	return time;
+ }
+@@ -934,8 +848,6 @@ static unsigned long long __sync_cpu_clock(cycles_t time, int cpu)
+ static unsigned long long __cpu_clock(int cpu)
+ {
+ 	unsigned long long now;
+-	unsigned long flags;
+-	struct rq *rq;
+ 
+ 	/*
+ 	 * Only call sched_clock() if the scheduler has already been
+@@ -944,11 +856,7 @@ static unsigned long long __cpu_clock(int cpu)
+ 	if (unlikely(!scheduler_running))
+ 		return 0;
+ 
+-	local_irq_save(flags);
+-	rq = cpu_rq(cpu);
+-	update_rq_clock(rq);
+-	now = rq->clock;
+-	local_irq_restore(flags);
++	now = sched_clock_cpu(cpu);
+ 
+ 	return now;
+ }
+@@ -960,13 +868,18 @@ static unsigned long long __cpu_clock(int cpu)
+ unsigned long long cpu_clock(int cpu)
+ {
+ 	unsigned long long prev_cpu_time, time, delta_time;
++	unsigned long flags;
+ 
++	local_irq_save(flags);
+ 	prev_cpu_time = per_cpu(prev_cpu_time, cpu);
+ 	time = __cpu_clock(cpu) + per_cpu(time_offset, cpu);
+ 	delta_time = time-prev_cpu_time;
+ 
+-	if (unlikely(delta_time > time_sync_thresh))
++	if (unlikely(delta_time > time_sync_thresh)) {
+ 		time = __sync_cpu_clock(time, cpu);
++		per_cpu(prev_cpu_time, cpu) = time;
++	}
++	local_irq_restore(flags);
+ 
+ 	return time;
+ }
+@@ -1117,43 +1030,6 @@ static struct rq *this_rq_lock(void)
+ 	return rq;
+ }
+ 
+-/*
+- * We are going deep-idle (irqs are disabled):
+- */
+-void sched_clock_idle_sleep_event(void)
+-{
+-	struct rq *rq = cpu_rq(smp_processor_id());
+-
+-	spin_lock(&rq->lock);
+-	__update_rq_clock(rq);
+-	spin_unlock(&rq->lock);
+-	rq->clock_deep_idle_events++;
+-}
+-EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
+-
+-/*
+- * We just idled delta nanoseconds (called with irqs disabled):
+- */
+-void sched_clock_idle_wakeup_event(u64 delta_ns)
+-{
+-	struct rq *rq = cpu_rq(smp_processor_id());
+-	u64 now = sched_clock();
+-
+-	rq->idle_clock += delta_ns;
+-	/*
+-	 * Override the previous timestamp and ignore all
+-	 * sched_clock() deltas that occured while we idled,
+-	 * and use the PM-provided delta_ns to advance the
+-	 * rq clock:
+-	 */
+-	spin_lock(&rq->lock);
+-	rq->prev_clock_raw = now;
+-	rq->clock += delta_ns;
+-	spin_unlock(&rq->lock);
+-	touch_softlockup_watchdog();
+-}
+-EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
+-
+ static void __resched_task(struct task_struct *p, int tif_bit);
+ 
+ static inline void resched_task(struct task_struct *p)
+@@ -1189,6 +1065,7 @@ static inline void resched_rq(struct rq *rq)
+ enum {
+ 	HRTICK_SET,		/* re-programm hrtick_timer */
+ 	HRTICK_RESET,		/* not a new slice */
++	HRTICK_BLOCK,		/* stop hrtick operations */
+ };
+ 
+ /*
+@@ -1200,6 +1077,8 @@ static inline int hrtick_enabled(struct rq *rq)
+ {
+ 	if (!sched_feat(HRTICK))
+ 		return 0;
++	if (unlikely(test_bit(HRTICK_BLOCK, &rq->hrtick_flags)))
++		return 0;
+ 	return hrtimer_is_hres_active(&rq->hrtick_timer);
+ }
+ 
+@@ -1275,14 +1154,70 @@ static enum hrtimer_restart hrtick(struct hrtimer *timer)
+ 	WARN_ON_ONCE(cpu_of(rq) != smp_processor_id());
+ 
+ 	spin_lock(&rq->lock);
+-	__update_rq_clock(rq);
++	update_rq_clock(rq);
+ 	rq->curr->sched_class->task_tick(rq, rq->curr, 1);
+ 	spin_unlock(&rq->lock);
+ 
+ 	return HRTIMER_NORESTART;
+ }
+ 
+-static inline void init_rq_hrtick(struct rq *rq)
++static void hotplug_hrtick_disable(int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
++	spin_lock_irqsave(&rq->lock, flags);
++	rq->hrtick_flags = 0;
++	__set_bit(HRTICK_BLOCK, &rq->hrtick_flags);
++	spin_unlock_irqrestore(&rq->lock, flags);
++
++	hrtick_clear(rq);
++}
++
++static void hotplug_hrtick_enable(int cpu)
++{
++	struct rq *rq = cpu_rq(cpu);
++	unsigned long flags;
++
++	spin_lock_irqsave(&rq->lock, flags);
++	__clear_bit(HRTICK_BLOCK, &rq->hrtick_flags);
++	spin_unlock_irqrestore(&rq->lock, flags);
++}
++
++static int
++hotplug_hrtick(struct notifier_block *nfb, unsigned long action, void *hcpu)
++{
++	int cpu = (int)(long)hcpu;
++
++	switch (action) {
++	case CPU_UP_CANCELED:
++	case CPU_UP_CANCELED_FROZEN:
++	case CPU_DOWN_PREPARE:
++	case CPU_DOWN_PREPARE_FROZEN:
++	case CPU_DEAD:
++	case CPU_DEAD_FROZEN:
++		hotplug_hrtick_disable(cpu);
++		return NOTIFY_OK;
++
++	case CPU_UP_PREPARE:
++	case CPU_UP_PREPARE_FROZEN:
++	case CPU_DOWN_FAILED:
++	case CPU_DOWN_FAILED_FROZEN:
++	case CPU_ONLINE:
++	case CPU_ONLINE_FROZEN:
++		hotplug_hrtick_enable(cpu);
++		return NOTIFY_OK;
++	}
++
++	return NOTIFY_DONE;
++}
++
++static void init_hrtick(void)
++{
++	hotcpu_notifier(hotplug_hrtick, 0);
++}
++
++static void init_rq_hrtick(struct rq *rq)
+ {
+ 	rq->hrtick_flags = 0;
+ 	hrtimer_init(&rq->hrtick_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+@@ -1319,6 +1254,10 @@ static inline void init_rq_hrtick(struct rq *rq)
+ void hrtick_resched(void)
+ {
+ }
++
++static inline void init_hrtick(void)
++{
++}
+ #endif
+ 
+ /*
+@@ -1438,8 +1377,8 @@ calc_delta_mine(unsigned long delta_exec, unsigned long weight,
+ {
+ 	u64 tmp;
+ 
+-	if (unlikely(!lw->inv_weight))
+-		lw->inv_weight = (WMULT_CONST-lw->weight/2) / (lw->weight+1);
++	if (!lw->inv_weight)
++		lw->inv_weight = 1 + (WMULT_CONST-lw->weight/2)/(lw->weight+1);
+ 
+ 	tmp = (u64)delta_exec * weight;
+ 	/*
+@@ -1748,6 +1687,8 @@ __update_group_shares_cpu(struct task_group *tg, struct sched_domain *sd,
+ 
+ 	if (shares < MIN_SHARES)
+ 		shares = MIN_SHARES;
++	else if (shares > MAX_SHARES)
++		shares = MAX_SHARES;
+ 
+ 	__set_se_shares(tg->se[tcpu], shares);
+ }
+@@ -4339,8 +4280,10 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
+ 	struct rq *rq = this_rq();
+ 	cputime64_t tmp;
+ 
+-	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0))
+-		return account_guest_time(p, cputime);
++	if ((p->flags & PF_VCPU) && (irq_count() - hardirq_offset == 0)) {
++		account_guest_time(p, cputime);
++		return;
++	}
+ 
+ 	p->stime = cputime_add(p->stime, cputime);
+ 
+@@ -4404,19 +4347,11 @@ void scheduler_tick(void)
+ 	int cpu = smp_processor_id();
+ 	struct rq *rq = cpu_rq(cpu);
+ 	struct task_struct *curr = rq->curr;
+-	u64 next_tick = rq->tick_timestamp + TICK_NSEC;
++
++	sched_clock_tick();
+ 
+ 	spin_lock(&rq->lock);
+-	__update_rq_clock(rq);
+-	/*
+-	 * Let rq->clock advance by at least TICK_NSEC:
+-	 */
+-	if (unlikely(rq->clock < next_tick)) {
+-		rq->clock = next_tick;
+-		rq->clock_underflows++;
+-	}
+-	rq->tick_timestamp = rq->clock;
+-	update_last_tick_seen(rq);
++	update_rq_clock(rq);
+ 	update_cpu_load(rq);
+ 	curr->sched_class->task_tick(rq, curr, 0);
+ 	spin_unlock(&rq->lock);
+@@ -4570,7 +4505,7 @@ need_resched_nonpreemptible:
+ 	 * Do the rq-clock update outside the rq lock:
+ 	 */
+ 	local_irq_disable();
+-	__update_rq_clock(rq);
++	update_rq_clock(rq);
+ 	spin_lock(&rq->lock);
+ 	clear_tsk_need_resched(prev);
+ 
+@@ -4595,9 +4530,9 @@ need_resched_nonpreemptible:
+ 	prev->sched_class->put_prev_task(rq, prev);
+ 	next = pick_next_task(rq, prev);
+ 
+-	sched_info_switch(prev, next);
+-
+ 	if (likely(prev != next)) {
++		sched_info_switch(prev, next);
++
+ 		rq->nr_switches++;
+ 		rq->curr = next;
+ 		++*switch_count;
+@@ -7755,7 +7690,7 @@ void partition_sched_domains(int ndoms_new, cpumask_t *doms_new,
+ {
+ 	int i, j;
+ 
+-	lock_doms_cur();
++	mutex_lock(&sched_domains_mutex);
+ 
+ 	/* always unregister in case we don't destroy any domains */
+ 	unregister_sched_domain_sysctl();
+@@ -7804,7 +7739,7 @@ match2:
+ 
+ 	register_sched_domain_sysctl();
+ 
+-	unlock_doms_cur();
++	mutex_unlock(&sched_domains_mutex);
+ }
+ 
+ #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT)
+@@ -7813,8 +7748,10 @@ int arch_reinit_sched_domains(void)
+ 	int err;
+ 
+ 	get_online_cpus();
++	mutex_lock(&sched_domains_mutex);
+ 	detach_destroy_domains(&cpu_online_map);
+ 	err = arch_init_sched_domains(&cpu_online_map);
++	mutex_unlock(&sched_domains_mutex);
+ 	put_online_cpus();
+ 
+ 	return err;
+@@ -7932,13 +7869,16 @@ void __init sched_init_smp(void)
+ 	BUG_ON(sched_group_nodes_bycpu == NULL);
+ #endif
+ 	get_online_cpus();
++	mutex_lock(&sched_domains_mutex);
+ 	arch_init_sched_domains(&cpu_online_map);
+ 	cpus_andnot(non_isolated_cpus, cpu_possible_map, cpu_isolated_map);
+ 	if (cpus_empty(non_isolated_cpus))
+ 		cpu_set(smp_processor_id(), non_isolated_cpus);
++	mutex_unlock(&sched_domains_mutex);
+ 	put_online_cpus();
+ 	/* XXX: Theoretical race here - CPU may be hotplugged now */
+ 	hotcpu_notifier(update_sched_domains, 0);
++	init_hrtick();
+ 
+ 	/* Move init over to a non-isolated CPU */
+ 	if (set_cpus_allowed_ptr(current, &non_isolated_cpus) < 0)
+@@ -8025,7 +7965,7 @@ static void init_tg_cfs_entry(struct task_group *tg, struct cfs_rq *cfs_rq,
+ 
+ 	se->my_q = cfs_rq;
+ 	se->load.weight = tg->shares;
+-	se->load.inv_weight = div64_u64(1ULL<<32, se->load.weight);
++	se->load.inv_weight = 0;
+ 	se->parent = parent;
+ }
+ #endif
+@@ -8149,8 +8089,6 @@ void __init sched_init(void)
+ 		spin_lock_init(&rq->lock);
+ 		lockdep_set_class(&rq->lock, &rq->rq_lock_key);
+ 		rq->nr_running = 0;
+-		rq->clock = 1;
+-		update_last_tick_seen(rq);
+ 		init_cfs_rq(&rq->cfs, rq);
+ 		init_rt_rq(&rq->rt, rq);
+ #ifdef CONFIG_FAIR_GROUP_SCHED
+@@ -8294,6 +8232,7 @@ EXPORT_SYMBOL(__might_sleep);
+ static void normalize_task(struct rq *rq, struct task_struct *p)
+ {
+ 	int on_rq;
++
+ 	update_rq_clock(rq);
+ 	on_rq = p->se.on_rq;
+ 	if (on_rq)
+@@ -8325,7 +8264,6 @@ void normalize_rt_tasks(void)
+ 		p->se.sleep_start		= 0;
+ 		p->se.block_start		= 0;
+ #endif
+-		task_rq(p)->clock		= 0;
+ 
+ 		if (!rt_task(p)) {
+ 			/*
+@@ -8692,7 +8630,7 @@ static void __set_se_shares(struct sched_entity *se, unsigned long shares)
+ 		dequeue_entity(cfs_rq, se, 0);
+ 
+ 	se->load.weight = shares;
+-	se->load.inv_weight = div64_u64((1ULL<<32), shares);
++	se->load.inv_weight = 0;
+ 
+ 	if (on_rq)
+ 		enqueue_entity(cfs_rq, se, 0);
+@@ -8722,13 +8660,10 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+ 	if (!tg->se[0])
+ 		return -EINVAL;
+ 
+-	/*
+-	 * A weight of 0 or 1 can cause arithmetics problems.
+-	 * (The default weight is 1024 - so there's no practical
+-	 *  limitation from this.)
+-	 */
+ 	if (shares < MIN_SHARES)
+ 		shares = MIN_SHARES;
++	else if (shares > MAX_SHARES)
++		shares = MAX_SHARES;
+ 
+ 	mutex_lock(&shares_mutex);
+ 	if (tg->shares == shares)
+@@ -8753,7 +8688,7 @@ int sched_group_set_shares(struct task_group *tg, unsigned long shares)
+ 		 * force a rebalance
+ 		 */
+ 		cfs_rq_set_shares(tg->cfs_rq[i], 0);
+-		set_se_shares(tg->se[i], shares/nr_cpu_ids);
++		set_se_shares(tg->se[i], shares);
+ 	}
+ 
+ 	/*
+diff --git a/kernel/sched_clock.c b/kernel/sched_clock.c
+new file mode 100644
+index 0000000..9c597e3
+--- /dev/null
++++ b/kernel/sched_clock.c
+@@ -0,0 +1,236 @@
++/*
++ * sched_clock for unstable cpu clocks
++ *
++ *  Copyright (C) 2008 Red Hat, Inc., Peter Zijlstra <pzijlstr at redhat.com>
++ *
++ * Based on code by:
++ *   Ingo Molnar <mingo at redhat.com>
++ *   Guillaume Chazarain <guichaz at gmail.com>
++ *
++ * Create a semi stable clock from a mixture of other events, including:
++ *  - gtod
++ *  - jiffies
++ *  - sched_clock()
++ *  - explicit idle events
++ *
++ * We use gtod as base and the unstable clock deltas. The deltas are filtered,
++ * making it monotonic and keeping it within an expected window.  This window
++ * is set up using jiffies.
++ *
++ * Furthermore, explicit sleep and wakeup hooks allow us to account for time
++ * that is otherwise invisible (TSC gets stopped).
++ *
++ * The clock: sched_clock_cpu() is monotonic per cpu, and should be somewhat
++ * consistent between cpus (never more than 1 jiffies difference).
++ */
++#include <linux/sched.h>
++#include <linux/percpu.h>
++#include <linux/spinlock.h>
++#include <linux/ktime.h>
++#include <linux/module.h>
++
++
++#ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
++
++struct sched_clock_data {
++	/*
++	 * Raw spinlock - this is a special case: this might be called
++	 * from within instrumentation code so we dont want to do any
++	 * instrumentation ourselves.
++	 */
++	raw_spinlock_t		lock;
++
++	unsigned long		prev_jiffies;
++	u64			prev_raw;
++	u64			tick_raw;
++	u64			tick_gtod;
++	u64			clock;
++};
++
++static DEFINE_PER_CPU_SHARED_ALIGNED(struct sched_clock_data, sched_clock_data);
++
++static inline struct sched_clock_data *this_scd(void)
++{
++	return &__get_cpu_var(sched_clock_data);
++}
++
++static inline struct sched_clock_data *cpu_sdc(int cpu)
++{
++	return &per_cpu(sched_clock_data, cpu);
++}
++
++void sched_clock_init(void)
++{
++	u64 ktime_now = ktime_to_ns(ktime_get());
++	u64 now = 0;
++	int cpu;
++
++	for_each_possible_cpu(cpu) {
++		struct sched_clock_data *scd = cpu_sdc(cpu);
++
++		scd->lock = (raw_spinlock_t)__RAW_SPIN_LOCK_UNLOCKED;
++		scd->prev_jiffies = jiffies;
++		scd->prev_raw = now;
++		scd->tick_raw = now;
++		scd->tick_gtod = ktime_now;
++		scd->clock = ktime_now;
++	}
++}
++
++/*
++ * update the percpu scd from the raw @now value
++ *
++ *  - filter out backward motion
++ *  - use jiffies to generate a min,max window to clip the raw values
++ */
++static void __update_sched_clock(struct sched_clock_data *scd, u64 now)
++{
++	unsigned long now_jiffies = jiffies;
++	long delta_jiffies = now_jiffies - scd->prev_jiffies;
++	u64 clock = scd->clock;
++	u64 min_clock, max_clock;
++	s64 delta = now - scd->prev_raw;
++
++	WARN_ON_ONCE(!irqs_disabled());
++	min_clock = scd->tick_gtod + delta_jiffies * TICK_NSEC;
++
++	if (unlikely(delta < 0)) {
++		clock++;
++		goto out;
++	}
++
++	max_clock = min_clock + TICK_NSEC;
++
++	if (unlikely(clock + delta > max_clock)) {
++		if (clock < max_clock)
++			clock = max_clock;
++		else
++			clock++;
++	} else {
++		clock += delta;
++	}
++
++ out:
++	if (unlikely(clock < min_clock))
++		clock = min_clock;
++
++	scd->prev_raw = now;
++	scd->prev_jiffies = now_jiffies;
++	scd->clock = clock;
++}
++
++static void lock_double_clock(struct sched_clock_data *data1,
++				struct sched_clock_data *data2)
++{
++	if (data1 < data2) {
++		__raw_spin_lock(&data1->lock);
++		__raw_spin_lock(&data2->lock);
++	} else {
++		__raw_spin_lock(&data2->lock);
++		__raw_spin_lock(&data1->lock);
++	}
++}
++
++u64 sched_clock_cpu(int cpu)
++{
++	struct sched_clock_data *scd = cpu_sdc(cpu);
++	u64 now, clock;
++
++	WARN_ON_ONCE(!irqs_disabled());
++	now = sched_clock();
++
++	if (cpu != raw_smp_processor_id()) {
++		/*
++		 * in order to update a remote cpu's clock based on our
++		 * unstable raw time rebase it against:
++		 *   tick_raw		(offset between raw counters)
++		 *   tick_gotd          (tick offset between cpus)
++		 */
++		struct sched_clock_data *my_scd = this_scd();
++
++		lock_double_clock(scd, my_scd);
++
++		now -= my_scd->tick_raw;
++		now += scd->tick_raw;
++
++		now -= my_scd->tick_gtod;
++		now += scd->tick_gtod;
++
++		__raw_spin_unlock(&my_scd->lock);
++	} else {
++		__raw_spin_lock(&scd->lock);
++	}
++
++	__update_sched_clock(scd, now);
++	clock = scd->clock;
++
++	__raw_spin_unlock(&scd->lock);
++
++	return clock;
++}
++
++void sched_clock_tick(void)
++{
++	struct sched_clock_data *scd = this_scd();
++	u64 now, now_gtod;
++
++	WARN_ON_ONCE(!irqs_disabled());
++
++	now = sched_clock();
++	now_gtod = ktime_to_ns(ktime_get());
++
++	__raw_spin_lock(&scd->lock);
++	__update_sched_clock(scd, now);
++	/*
++	 * update tick_gtod after __update_sched_clock() because that will
++	 * already observe 1 new jiffy; adding a new tick_gtod to that would
++	 * increase the clock 2 jiffies.
++	 */
++	scd->tick_raw = now;
++	scd->tick_gtod = now_gtod;
++	__raw_spin_unlock(&scd->lock);
++}
++
++/*
++ * We are going deep-idle (irqs are disabled):
++ */
++void sched_clock_idle_sleep_event(void)
++{
++	sched_clock_cpu(smp_processor_id());
++}
++EXPORT_SYMBOL_GPL(sched_clock_idle_sleep_event);
++
++/*
++ * We just idled delta nanoseconds (called with irqs disabled):
++ */
++void sched_clock_idle_wakeup_event(u64 delta_ns)
++{
++	struct sched_clock_data *scd = this_scd();
++	u64 now = sched_clock();
++
++	/*
++	 * Override the previous timestamp and ignore all
++	 * sched_clock() deltas that occured while we idled,
++	 * and use the PM-provided delta_ns to advance the
++	 * rq clock:
++	 */
++	__raw_spin_lock(&scd->lock);
++	scd->prev_raw = now;
++	scd->clock += delta_ns;
++	__raw_spin_unlock(&scd->lock);
++
++	touch_softlockup_watchdog();
++}
++EXPORT_SYMBOL_GPL(sched_clock_idle_wakeup_event);
++
++#endif
++
++/*
++ * Scheduler clock - returns current time in nanosec units.
++ * This is default implementation.
++ * Architectures and sub-architectures can override this.
++ */
++unsigned long long __attribute__((weak)) sched_clock(void)
++{
++	return (unsigned long long)jiffies * (NSEC_PER_SEC / HZ);
++}
+diff --git a/kernel/sched_debug.c b/kernel/sched_debug.c
+index 6b4a125..5f06118 100644
+--- a/kernel/sched_debug.c
++++ b/kernel/sched_debug.c
+@@ -204,13 +204,6 @@ static void print_cpu(struct seq_file *m, int cpu)
+ 	PN(next_balance);
+ 	P(curr->pid);
+ 	PN(clock);
+-	PN(idle_clock);
+-	PN(prev_clock_raw);
+-	P(clock_warps);
+-	P(clock_overflows);
+-	P(clock_underflows);
+-	P(clock_deep_idle_events);
+-	PN(clock_max_delta);
+ 	P(cpu_load[0]);
+ 	P(cpu_load[1]);
+ 	P(cpu_load[2]);
+diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
+index 89fa32b..e24ecd3 100644
+--- a/kernel/sched_fair.c
++++ b/kernel/sched_fair.c
+@@ -662,10 +662,15 @@ place_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int initial)
+ 	if (!initial) {
+ 		/* sleeps upto a single latency don't count. */
+ 		if (sched_feat(NEW_FAIR_SLEEPERS)) {
++			unsigned long thresh = sysctl_sched_latency;
++
++			/*
++			 * convert the sleeper threshold into virtual time
++			 */
+ 			if (sched_feat(NORMALIZED_SLEEPER))
+-				vruntime -= calc_delta_weight(sysctl_sched_latency, se);
+-			else
+-				vruntime -= sysctl_sched_latency;
++				thresh = calc_delta_fair(thresh, se);
++
++			vruntime -= thresh;
+ 		}
+ 
+ 		/* ensure we never gain time by being placed backwards. */
+@@ -682,6 +687,7 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+ 	 * Update run-time statistics of the 'current'.
+ 	 */
+ 	update_curr(cfs_rq);
++	account_entity_enqueue(cfs_rq, se);
+ 
+ 	if (wakeup) {
+ 		place_entity(cfs_rq, se, 0);
+@@ -692,7 +698,6 @@ enqueue_entity(struct cfs_rq *cfs_rq, struct sched_entity *se, int wakeup)
+ 	check_spread(cfs_rq, se);
+ 	if (se != cfs_rq->curr)
+ 		__enqueue_entity(cfs_rq, se);
+-	account_entity_enqueue(cfs_rq, se);
+ }
+ 
+ static void update_avg(u64 *avg, u64 sample)
+@@ -841,8 +846,10 @@ entity_tick(struct cfs_rq *cfs_rq, struct sched_entity *curr, int queued)
+ 	 * queued ticks are scheduled to match the slice, so don't bother
+ 	 * validating it and just reschedule.
+ 	 */
+-	if (queued)
+-		return resched_task(rq_of(cfs_rq)->curr);
++	if (queued) {
++		resched_task(rq_of(cfs_rq)->curr);
++		return;
++	}
+ 	/*
+ 	 * don't let the period tick interfere with the hrtick preemption
+ 	 */
+@@ -957,7 +964,7 @@ static void yield_task_fair(struct rq *rq)
+ 		return;
+ 
+ 	if (likely(!sysctl_sched_compat_yield) && curr->policy != SCHED_BATCH) {
+-		__update_rq_clock(rq);
++		update_rq_clock(rq);
+ 		/*
+ 		 * Update run-time statistics of the 'current'.
+ 		 */
+@@ -1007,7 +1014,7 @@ static int wake_idle(int cpu, struct task_struct *p)
+ 	 * sibling runqueue info. This will avoid the checks and cache miss
+ 	 * penalities associated with that.
+ 	 */
+-	if (idle_cpu(cpu) || cpu_rq(cpu)->nr_running > 1)
++	if (idle_cpu(cpu) || cpu_rq(cpu)->cfs.nr_running > 1)
+ 		return cpu;
+ 
+ 	for_each_domain(cpu, sd) {
+@@ -1611,30 +1618,6 @@ static const struct sched_class fair_sched_class = {
+ };
+ 
+ #ifdef CONFIG_SCHED_DEBUG
+-static void
+-print_cfs_rq_tasks(struct seq_file *m, struct cfs_rq *cfs_rq, int depth)
+-{
+-	struct sched_entity *se;
+-
+-	if (!cfs_rq)
+-		return;
+-
+-	list_for_each_entry_rcu(se, &cfs_rq->tasks, group_node) {
+-		int i;
+-
+-		for (i = depth; i; i--)
+-			seq_puts(m, "  ");
+-
+-		seq_printf(m, "%lu %s %lu\n",
+-				se->load.weight,
+-				entity_is_task(se) ? "T" : "G",
+-				calc_delta_weight(SCHED_LOAD_SCALE, se)
+-				);
+-		if (!entity_is_task(se))
+-			print_cfs_rq_tasks(m, group_cfs_rq(se), depth + 1);
+-	}
+-}
+-
+ static void print_cfs_stats(struct seq_file *m, int cpu)
+ {
+ 	struct cfs_rq *cfs_rq;
+@@ -1642,9 +1625,6 @@ static void print_cfs_stats(struct seq_file *m, int cpu)
+ 	rcu_read_lock();
+ 	for_each_leaf_cfs_rq(cpu_rq(cpu), cfs_rq)
+ 		print_cfs_rq(m, cpu, cfs_rq);
+-
+-	seq_printf(m, "\nWeight tree:\n");
+-	print_cfs_rq_tasks(m, &cpu_rq(cpu)->cfs, 1);
+ 	rcu_read_unlock();
+ }
+ #endif
+diff --git a/kernel/sched_idletask.c b/kernel/sched_idletask.c
+index 2bcafa3..3a4f92d 100644
+--- a/kernel/sched_idletask.c
++++ b/kernel/sched_idletask.c
+@@ -99,7 +99,7 @@ static void prio_changed_idle(struct rq *rq, struct task_struct *p,
+ /*
+  * Simple, special scheduling class for the per-CPU idle tasks:
+  */
+-const struct sched_class idle_sched_class = {
++static const struct sched_class idle_sched_class = {
+ 	/* .next is NULL */
+ 	/* no enqueue/yield_task for idle tasks */
+ 
+diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c
+index c2730a5..060e87b 100644
+--- a/kernel/sched_rt.c
++++ b/kernel/sched_rt.c
+@@ -1098,11 +1098,14 @@ static void post_schedule_rt(struct rq *rq)
+ 	}
+ }
+ 
+-
++/*
++ * If we are not running and we are not going to reschedule soon, we should
++ * try to push tasks away now
++ */
+ static void task_wake_up_rt(struct rq *rq, struct task_struct *p)
+ {
+ 	if (!task_running(rq, p) &&
+-	    (p->prio >= rq->rt.highest_prio) &&
++	    !test_tsk_need_resched(rq->curr) &&
+ 	    rq->rt.overloaded)
+ 		push_rt_tasks(rq);
+ }
+@@ -1309,7 +1312,7 @@ static void set_curr_task_rt(struct rq *rq)
+ 	p->se.exec_start = rq->clock;
+ }
+ 
+-const struct sched_class rt_sched_class = {
++static const struct sched_class rt_sched_class = {
+ 	.next			= &fair_sched_class,
+ 	.enqueue_task		= enqueue_task_rt,
+ 	.dequeue_task		= dequeue_task_rt,
+diff --git a/kernel/semaphore.c b/kernel/semaphore.c
+index 5c2942e..5e41217 100644
+--- a/kernel/semaphore.c
++++ b/kernel/semaphore.c
+@@ -54,10 +54,9 @@ void down(struct semaphore *sem)
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&sem->lock, flags);
+-	if (likely(sem->count > 0))
+-		sem->count--;
+-	else
++	if (unlikely(!sem->count))
+ 		__down(sem);
++	sem->count--;
+ 	spin_unlock_irqrestore(&sem->lock, flags);
+ }
+ EXPORT_SYMBOL(down);
+@@ -77,10 +76,10 @@ int down_interruptible(struct semaphore *sem)
+ 	int result = 0;
+ 
+ 	spin_lock_irqsave(&sem->lock, flags);
+-	if (likely(sem->count > 0))
+-		sem->count--;
+-	else
++	if (unlikely(!sem->count))
+ 		result = __down_interruptible(sem);
++	if (!result)
++		sem->count--;
+ 	spin_unlock_irqrestore(&sem->lock, flags);
+ 
+ 	return result;
+@@ -103,10 +102,10 @@ int down_killable(struct semaphore *sem)
+ 	int result = 0;
+ 
+ 	spin_lock_irqsave(&sem->lock, flags);
+-	if (likely(sem->count > 0))
+-		sem->count--;
+-	else
++	if (unlikely(!sem->count))
+ 		result = __down_killable(sem);
++	if (!result)
++		sem->count--;
+ 	spin_unlock_irqrestore(&sem->lock, flags);
+ 
+ 	return result;
+@@ -157,10 +156,10 @@ int down_timeout(struct semaphore *sem, long jiffies)
+ 	int result = 0;
+ 
+ 	spin_lock_irqsave(&sem->lock, flags);
+-	if (likely(sem->count > 0))
+-		sem->count--;
+-	else
++	if (unlikely(!sem->count))
+ 		result = __down_timeout(sem, jiffies);
++	if (!result)
++		sem->count--;
+ 	spin_unlock_irqrestore(&sem->lock, flags);
+ 
+ 	return result;
+@@ -179,9 +178,8 @@ void up(struct semaphore *sem)
+ 	unsigned long flags;
+ 
+ 	spin_lock_irqsave(&sem->lock, flags);
+-	if (likely(list_empty(&sem->wait_list)))
+-		sem->count++;
+-	else
++	sem->count++;
++	if (unlikely(!list_empty(&sem->wait_list)))
+ 		__up(sem);
+ 	spin_unlock_irqrestore(&sem->lock, flags);
+ }
+@@ -192,7 +190,6 @@ EXPORT_SYMBOL(up);
+ struct semaphore_waiter {
+ 	struct list_head list;
+ 	struct task_struct *task;
+-	int up;
+ };
+ 
+ /*
+@@ -205,33 +202,34 @@ static inline int __sched __down_common(struct semaphore *sem, long state,
+ {
+ 	struct task_struct *task = current;
+ 	struct semaphore_waiter waiter;
++	int ret = 0;
+ 
+-	list_add_tail(&waiter.list, &sem->wait_list);
+ 	waiter.task = task;
+-	waiter.up = 0;
++	list_add_tail(&waiter.list, &sem->wait_list);
+ 
+ 	for (;;) {
+-		if (state == TASK_INTERRUPTIBLE && signal_pending(task))
+-			goto interrupted;
+-		if (state == TASK_KILLABLE && fatal_signal_pending(task))
+-			goto interrupted;
+-		if (timeout <= 0)
+-			goto timed_out;
++		if (state == TASK_INTERRUPTIBLE && signal_pending(task)) {
++			ret = -EINTR;
++			break;
++		}
++		if (state == TASK_KILLABLE && fatal_signal_pending(task)) {
++			ret = -EINTR;
++			break;
++		}
++		if (timeout <= 0) {
++			ret = -ETIME;
++			break;
++		}
+ 		__set_task_state(task, state);
+ 		spin_unlock_irq(&sem->lock);
+ 		timeout = schedule_timeout(timeout);
+ 		spin_lock_irq(&sem->lock);
+-		if (waiter.up)
+-			return 0;
++		if (sem->count > 0)
++			break;
+ 	}
+ 
+- timed_out:
+-	list_del(&waiter.list);
+-	return -ETIME;
+-
+- interrupted:
+ 	list_del(&waiter.list);
+-	return -EINTR;
++	return ret;
+ }
+ 
+ static noinline void __sched __down(struct semaphore *sem)
+@@ -258,7 +256,5 @@ static noinline void __sched __up(struct semaphore *sem)
+ {
+ 	struct semaphore_waiter *waiter = list_first_entry(&sem->wait_list,
+ 						struct semaphore_waiter, list);
+-	list_del(&waiter->list);
+-	waiter->up = 1;
+ 	wake_up_process(waiter->task);
+ }
+diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
+index 73961f3..dadde53 100644
+--- a/kernel/time/clocksource.c
++++ b/kernel/time/clocksource.c
+@@ -471,10 +471,10 @@ sysfs_show_available_clocksources(struct sys_device *dev, char *buf)
+ /*
+  * Sysfs setup bits:
+  */
+-static SYSDEV_ATTR(current_clocksource, 0600, sysfs_show_current_clocksources,
++static SYSDEV_ATTR(current_clocksource, 0644, sysfs_show_current_clocksources,
+ 		   sysfs_override_clocksource);
+ 
+-static SYSDEV_ATTR(available_clocksource, 0600,
++static SYSDEV_ATTR(available_clocksource, 0444,
+ 		   sysfs_show_available_clocksources, NULL);
+ 
+ static struct sysdev_class clocksource_sysclass = {
+diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
+index f2e01ac..a5d4b1d 100644
+--- a/lib/Kconfig.kgdb
++++ b/lib/Kconfig.kgdb
+@@ -1,4 +1,10 @@
+ 
++config HAVE_ARCH_KGDB_SHADOW_INFO
++	bool
++
++config HAVE_ARCH_KGDB
++	bool
++
+ menuconfig KGDB
+ 	bool "KGDB: kernel debugging with remote gdb"
+ 	select FRAME_POINTER
+@@ -10,15 +16,10 @@ menuconfig KGDB
+ 	  at http://kgdb.sourceforge.net as well as in DocBook form
+ 	  in Documentation/DocBook/.  If unsure, say N.
+ 
+-config HAVE_ARCH_KGDB_SHADOW_INFO
+-	bool
+-
+-config HAVE_ARCH_KGDB
+-	bool
++if KGDB
+ 
+ config KGDB_SERIAL_CONSOLE
+ 	tristate "KGDB: use kgdb over the serial console"
+-	depends on KGDB
+ 	select CONSOLE_POLL
+ 	select MAGIC_SYSRQ
+ 	default y
+@@ -28,7 +29,6 @@ config KGDB_SERIAL_CONSOLE
+ 
+ config KGDB_TESTS
+ 	bool "KGDB: internal test suite"
+-	depends on KGDB
+ 	default n
+ 	help
+ 	  This is a kgdb I/O module specifically designed to test
+@@ -56,3 +56,5 @@ config KGDB_TESTS_BOOT_STRING
+ 	  boot.  See the drivers/misc/kgdbts.c for detailed
+ 	  information about other strings you could use beyond the
+ 	  default of V1F100.
++
++endif # KGDB
+diff --git a/lib/devres.c b/lib/devres.c
+index 26c87c4..72c8909 100644
+--- a/lib/devres.c
++++ b/lib/devres.c
+@@ -2,7 +2,7 @@
+ #include <linux/io.h>
+ #include <linux/module.h>
+ 
+-static void devm_ioremap_release(struct device *dev, void *res)
++void devm_ioremap_release(struct device *dev, void *res)
+ {
+ 	iounmap(*(void __iomem **)res);
+ }
+diff --git a/mm/filemap.c b/mm/filemap.c
+index 239d361..2dead9a 100644
+--- a/mm/filemap.c
++++ b/mm/filemap.c
+@@ -1655,7 +1655,7 @@ int should_remove_suid(struct dentry *dentry)
+ }
+ EXPORT_SYMBOL(should_remove_suid);
+ 
+-int __remove_suid(struct dentry *dentry, int kill)
++static int __remove_suid(struct dentry *dentry, int kill)
+ {
+ 	struct iattr newattrs;
+ 
+diff --git a/mm/memory.c b/mm/memory.c
+index bbab1e3..48c122d 100644
+--- a/mm/memory.c
++++ b/mm/memory.c
+@@ -969,7 +969,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+ 		goto no_page_table;
+ 	
+ 	pmd = pmd_offset(pud, address);
+-	if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd)))
++	if (pmd_none(*pmd))
+ 		goto no_page_table;
+ 
+ 	if (pmd_huge(*pmd)) {
+@@ -978,6 +978,9 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
+ 		goto out;
+ 	}
+ 
++	if (unlikely(pmd_bad(*pmd)))
++		goto no_page_table;
++
+ 	ptep = pte_offset_map_lock(mm, pmd, address, &ptl);
+ 	if (!ptep)
+ 		goto out;
+diff --git a/mm/slub.c b/mm/slub.c
+index d379b78..a505a82 100644
+--- a/mm/slub.c
++++ b/mm/slub.c
+@@ -3762,7 +3762,7 @@ static int any_slab_objects(struct kmem_cache *s)
+ 		if (!n)
+ 			continue;
+ 
+-		if (atomic_read(&n->total_objects))
++		if (atomic_long_read(&n->total_objects))
+ 			return 1;
+ 	}
+ 	return 0;
+diff --git a/net/atm/br2684.c b/net/atm/br2684.c
+index 1b22806..9d52ebf 100644
+--- a/net/atm/br2684.c
++++ b/net/atm/br2684.c
+@@ -346,9 +346,9 @@ static void br2684_push(struct atm_vcc *atmvcc, struct sk_buff *skb)
+ 		/* skb==NULL means VCC is being destroyed */
+ 		br2684_close_vcc(brvcc);
+ 		if (list_empty(&brdev->brvccs)) {
+-			read_lock(&devs_lock);
++			write_lock_irq(&devs_lock);
+ 			list_del(&brdev->br2684_devs);
+-			read_unlock(&devs_lock);
++			write_unlock_irq(&devs_lock);
+ 			unregister_netdev(net_dev);
+ 			free_netdev(net_dev);
+ 		}
+diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
+index 77a981a..c2397f5 100644
+--- a/net/bridge/br_if.c
++++ b/net/bridge/br_if.c
+@@ -273,15 +273,13 @@ int br_add_bridge(const char *name)
+ 	rtnl_lock();
+ 	if (strchr(dev->name, '%')) {
+ 		ret = dev_alloc_name(dev, dev->name);
+-		if (ret < 0) {
+-			free_netdev(dev);
+-			goto out;
+-		}
++		if (ret < 0)
++			goto out_free;
+ 	}
+ 
+ 	ret = register_netdevice(dev);
+ 	if (ret)
+-		goto out;
++		goto out_free;
+ 
+ 	ret = br_sysfs_addbr(dev);
+ 	if (ret)
+@@ -289,6 +287,10 @@ int br_add_bridge(const char *name)
+  out:
+ 	rtnl_unlock();
+ 	return ret;
++
++out_free:
++	free_netdev(dev);
++	goto out;
+ }
+ 
+ int br_del_bridge(const char *name)
+diff --git a/net/can/af_can.c b/net/can/af_can.c
+index 2759b76..7e8ca28 100644
+--- a/net/can/af_can.c
++++ b/net/can/af_can.c
+@@ -208,6 +208,7 @@ static int can_create(struct net *net, struct socket *sock, int protocol)
+  */
+ int can_send(struct sk_buff *skb, int loop)
+ {
++	struct sk_buff *newskb = NULL;
+ 	int err;
+ 
+ 	if (skb->dev->type != ARPHRD_CAN) {
+@@ -244,8 +245,7 @@ int can_send(struct sk_buff *skb, int loop)
+ 			 * If the interface is not capable to do loopback
+ 			 * itself, we do it here.
+ 			 */
+-			struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
+-
++			newskb = skb_clone(skb, GFP_ATOMIC);
+ 			if (!newskb) {
+ 				kfree_skb(skb);
+ 				return -ENOMEM;
+@@ -254,7 +254,6 @@ int can_send(struct sk_buff *skb, int loop)
+ 			newskb->sk = skb->sk;
+ 			newskb->ip_summed = CHECKSUM_UNNECESSARY;
+ 			newskb->pkt_type = PACKET_BROADCAST;
+-			netif_rx(newskb);
+ 		}
+ 	} else {
+ 		/* indication for the CAN driver: no loopback required */
+@@ -266,11 +265,20 @@ int can_send(struct sk_buff *skb, int loop)
+ 	if (err > 0)
+ 		err = net_xmit_errno(err);
+ 
++	if (err) {
++		if (newskb)
++			kfree_skb(newskb);
++		return err;
++	}
++
++	if (newskb)
++		netif_rx(newskb);
++
+ 	/* update statistics */
+ 	can_stats.tx_frames++;
+ 	can_stats.tx_frames_delta++;
+ 
+-	return err;
++	return 0;
+ }
+ EXPORT_SYMBOL(can_send);
+ 
+diff --git a/net/can/bcm.c b/net/can/bcm.c
+index 74fd2d3..d9a3a9d 100644
+--- a/net/can/bcm.c
++++ b/net/can/bcm.c
+@@ -412,12 +412,6 @@ static void bcm_rx_changed(struct bcm_op *op, struct can_frame *data)
+ 	bcm_send_to_user(op, &head, data, 1);
+ }
+ 
+-/* TODO: move to linux/hrtimer.h */
+-static inline int hrtimer_callback_running(struct hrtimer *timer)
+-{
+-        return timer->state & HRTIMER_STATE_CALLBACK;
+-}
+-
+ /*
+  * bcm_rx_update_and_send - process a detected relevant receive content change
+  *                          1. update the last received data
+diff --git a/net/core/dev.c b/net/core/dev.c
+index d334446..a1607bc 100644
+--- a/net/core/dev.c
++++ b/net/core/dev.c
+@@ -994,6 +994,8 @@ int dev_open(struct net_device *dev)
+ {
+ 	int ret = 0;
+ 
++	ASSERT_RTNL();
++
+ 	/*
+ 	 *	Is it already up?
+ 	 */
+@@ -1060,6 +1062,8 @@ int dev_open(struct net_device *dev)
+  */
+ int dev_close(struct net_device *dev)
+ {
++	ASSERT_RTNL();
++
+ 	might_sleep();
+ 
+ 	if (!(dev->flags & IFF_UP))
+@@ -4480,17 +4484,19 @@ static void __net_exit default_device_exit(struct net *net)
+ 	rtnl_lock();
+ 	for_each_netdev_safe(net, dev, next) {
+ 		int err;
++		char fb_name[IFNAMSIZ];
+ 
+ 		/* Ignore unmoveable devices (i.e. loopback) */
+ 		if (dev->features & NETIF_F_NETNS_LOCAL)
+ 			continue;
+ 
+ 		/* Push remaing network devices to init_net */
+-		err = dev_change_net_namespace(dev, &init_net, "dev%d");
++		snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
++		err = dev_change_net_namespace(dev, &init_net, fb_name);
+ 		if (err) {
+-			printk(KERN_WARNING "%s: failed to move %s to init_net: %d\n",
++			printk(KERN_EMERG "%s: failed to move %s to init_net: %d\n",
+ 				__func__, dev->name, err);
+-			unregister_netdevice(dev);
++			BUG();
+ 		}
+ 	}
+ 	rtnl_unlock();
+diff --git a/net/core/skbuff.c b/net/core/skbuff.c
+index 4fe605f..5c459f2 100644
+--- a/net/core/skbuff.c
++++ b/net/core/skbuff.c
+@@ -200,7 +200,9 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
+ 		goto nodata;
+ 
+ 	/*
+-	 * See comment in sk_buff definition, just before the 'tail' member
++	 * Only clear those fields we need to clear, not those that we will
++	 * actually initialise below. Hence, don't put any more fields after
++	 * the tail pointer in struct sk_buff!
+ 	 */
+ 	memset(skb, 0, offsetof(struct sk_buff, tail));
+ 	skb->truesize = size + sizeof(struct sk_buff);
+diff --git a/net/dccp/feat.c b/net/dccp/feat.c
+index 4a4f6ce..933a0ec 100644
+--- a/net/dccp/feat.c
++++ b/net/dccp/feat.c
+@@ -32,7 +32,7 @@ int dccp_feat_change(struct dccp_minisock *dmsk, u8 type, u8 feature,
+ 
+ 	if (len > 3) {
+ 		DCCP_WARN("invalid length %d\n", len);
+-		return 1;
++		return -EINVAL;
+ 	}
+ 	/* XXX add further sanity checks */
+ 
+diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
+index 2f665a5..f50e88b 100644
+--- a/net/decnet/dn_route.c
++++ b/net/decnet/dn_route.c
+@@ -235,14 +235,14 @@ static void dn_dst_update_pmtu(struct dst_entry *dst, u32 mtu)
+ 	else
+ 		min_mtu -= 21;
+ 
+-	if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= min_mtu) {
++	if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= min_mtu) {
+ 		if (!(dst_metric_locked(dst, RTAX_MTU))) {
+ 			dst->metrics[RTAX_MTU-1] = mtu;
+ 			dst_set_expires(dst, dn_rt_mtu_expires);
+ 		}
+ 		if (!(dst_metric_locked(dst, RTAX_ADVMSS))) {
+ 			u32 mss = mtu - DN_MAX_NSP_DATA_HEADER;
+-			if (dst->metrics[RTAX_ADVMSS-1] > mss)
++			if (dst_metric(dst, RTAX_ADVMSS) > mss)
+ 				dst->metrics[RTAX_ADVMSS-1] = mss;
+ 		}
+ 	}
+@@ -805,12 +805,12 @@ static int dn_rt_set_next_hop(struct dn_route *rt, struct dn_fib_res *res)
+ 		rt->u.dst.neighbour = n;
+ 	}
+ 
+-	if (rt->u.dst.metrics[RTAX_MTU-1] == 0 ||
+-	    rt->u.dst.metrics[RTAX_MTU-1] > rt->u.dst.dev->mtu)
++	if (dst_metric(&rt->u.dst, RTAX_MTU) == 0 ||
++	    dst_metric(&rt->u.dst, RTAX_MTU) > rt->u.dst.dev->mtu)
+ 		rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
+ 	mss = dn_mss_from_pmtu(dev, dst_mtu(&rt->u.dst));
+-	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0 ||
+-	    rt->u.dst.metrics[RTAX_ADVMSS-1] > mss)
++	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0 ||
++	    dst_metric(&rt->u.dst, RTAX_ADVMSS) > mss)
+ 		rt->u.dst.metrics[RTAX_ADVMSS-1] = mss;
+ 	return 0;
+ }
+diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
+index 7b4bad6..ff77a4a 100644
+--- a/net/ipv4/ip_input.c
++++ b/net/ipv4/ip_input.c
+@@ -397,7 +397,7 @@ int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt,
+ 	iph = ip_hdr(skb);
+ 
+ 	/*
+-	 *	RFC1122: 3.1.2.2 MUST silently discard any IP frame that fails the checksum.
++	 *	RFC1122: 3.2.1.2 MUST silently discard any IP frame that fails the checksum.
+ 	 *
+ 	 *	Is the datagram acceptable?
+ 	 *
+diff --git a/net/ipv4/route.c b/net/ipv4/route.c
+index 5e3685c..92f90ae 100644
+--- a/net/ipv4/route.c
++++ b/net/ipv4/route.c
+@@ -1468,14 +1468,14 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
+ 
+ 					/* BSD 4.2 compatibility hack :-( */
+ 					if (mtu == 0 &&
+-					    old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] &&
++					    old_mtu >= dst_metric(&rth->u.dst, RTAX_MTU) &&
+ 					    old_mtu >= 68 + (iph->ihl << 2))
+ 						old_mtu -= iph->ihl << 2;
+ 
+ 					mtu = guess_mtu(old_mtu);
+ 				}
+-				if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) {
+-					if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) {
++				if (mtu <= dst_metric(&rth->u.dst, RTAX_MTU)) {
++					if (mtu < dst_metric(&rth->u.dst, RTAX_MTU)) {
+ 						dst_confirm(&rth->u.dst);
+ 						if (mtu < ip_rt_min_pmtu) {
+ 							mtu = ip_rt_min_pmtu;
+@@ -1497,7 +1497,7 @@ unsigned short ip_rt_frag_needed(struct net *net, struct iphdr *iph,
+ 
+ static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+ {
+-	if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 &&
++	if (dst_metric(dst, RTAX_MTU) > mtu && mtu >= 68 &&
+ 	    !(dst_metric_locked(dst, RTAX_MTU))) {
+ 		if (mtu < ip_rt_min_pmtu) {
+ 			mtu = ip_rt_min_pmtu;
+@@ -1613,7 +1613,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
+ 		       sizeof(rt->u.dst.metrics));
+ 		if (fi->fib_mtu == 0) {
+ 			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
+-			if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) &&
++			if (dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
+ 			    rt->rt_gateway != rt->rt_dst &&
+ 			    rt->u.dst.dev->mtu > 576)
+ 				rt->u.dst.metrics[RTAX_MTU-1] = 576;
+@@ -1624,14 +1624,14 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag)
+ 	} else
+ 		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
+ 
+-	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
++	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
+ 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
+-	if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
++	if (dst_metric(&rt->u.dst, RTAX_MTU) > IP_MAX_MTU)
+ 		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
+-	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
++	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) == 0)
+ 		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+ 				       ip_rt_min_advmss);
+-	if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40)
++	if (dst_metric(&rt->u.dst, RTAX_ADVMSS) > 65535 - 40)
+ 		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+ 
+ #ifdef CONFIG_NET_CLS_ROUTE
+diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+index eda4f4a..26c9369 100644
+--- a/net/ipv4/tcp_input.c
++++ b/net/ipv4/tcp_input.c
+@@ -66,6 +66,7 @@
+ #include <linux/mm.h>
+ #include <linux/module.h>
+ #include <linux/sysctl.h>
++#include <net/dst.h>
+ #include <net/tcp.h>
+ #include <net/inet_common.h>
+ #include <linux/ipsec.h>
+@@ -113,8 +114,6 @@ int sysctl_tcp_abc __read_mostly;
+ #define FLAG_FORWARD_PROGRESS	(FLAG_ACKED|FLAG_DATA_SACKED)
+ #define FLAG_ANY_PROGRESS	(FLAG_FORWARD_PROGRESS|FLAG_SND_UNA_ADVANCED)
+ 
+-#define IsSackFrto() (sysctl_tcp_frto == 0x2)
+-
+ #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH)
+ #define TCP_HP_BITS (~(TCP_RESERVED_BITS|TCP_FLAG_PSH))
+ 
+@@ -605,7 +604,7 @@ static u32 tcp_rto_min(struct sock *sk)
+ 	u32 rto_min = TCP_RTO_MIN;
+ 
+ 	if (dst && dst_metric_locked(dst, RTAX_RTO_MIN))
+-		rto_min = dst->metrics[RTAX_RTO_MIN - 1];
++		rto_min = dst_metric(dst, RTAX_RTO_MIN);
+ 	return rto_min;
+ }
+ 
+@@ -769,7 +768,7 @@ void tcp_update_metrics(struct sock *sk)
+ 				dst->metrics[RTAX_RTTVAR - 1] = m;
+ 			else
+ 				dst->metrics[RTAX_RTTVAR-1] -=
+-					(dst->metrics[RTAX_RTTVAR-1] - m)>>2;
++					(dst_metric(dst, RTAX_RTTVAR) - m)>>2;
+ 		}
+ 
+ 		if (tp->snd_ssthresh >= 0xFFFF) {
+@@ -788,21 +787,21 @@ void tcp_update_metrics(struct sock *sk)
+ 				dst->metrics[RTAX_SSTHRESH-1] =
+ 					max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
+ 			if (!dst_metric_locked(dst, RTAX_CWND))
+-				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1;
++				dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_cwnd) >> 1;
+ 		} else {
+ 			/* Else slow start did not finish, cwnd is non-sense,
+ 			   ssthresh may be also invalid.
+ 			 */
+ 			if (!dst_metric_locked(dst, RTAX_CWND))
+-				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1;
+-			if (dst->metrics[RTAX_SSTHRESH-1] &&
++				dst->metrics[RTAX_CWND-1] = (dst_metric(dst, RTAX_CWND) + tp->snd_ssthresh) >> 1;
++			if (dst_metric(dst, RTAX_SSTHRESH) &&
+ 			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
+-			    tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1])
++			    tp->snd_ssthresh > dst_metric(dst, RTAX_SSTHRESH))
+ 				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
+ 		}
+ 
+ 		if (!dst_metric_locked(dst, RTAX_REORDERING)) {
+-			if (dst->metrics[RTAX_REORDERING-1] < tp->reordering &&
++			if (dst_metric(dst, RTAX_REORDERING) < tp->reordering &&
+ 			    tp->reordering != sysctl_tcp_reordering)
+ 				dst->metrics[RTAX_REORDERING-1] = tp->reordering;
+ 		}
+@@ -1685,6 +1684,11 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
+ 	tp->sacked_out = 0;
+ }
+ 
++static int tcp_is_sackfrto(const struct tcp_sock *tp)
++{
++	return (sysctl_tcp_frto == 0x2) && !tcp_is_reno(tp);
++}
++
+ /* F-RTO can only be used if TCP has never retransmitted anything other than
+  * head (SACK enhanced variant from Appendix B of RFC4138 is more robust here)
+  */
+@@ -1701,7 +1705,7 @@ int tcp_use_frto(struct sock *sk)
+ 	if (icsk->icsk_mtup.probe_size)
+ 		return 0;
+ 
+-	if (IsSackFrto())
++	if (tcp_is_sackfrto(tp))
+ 		return 1;
+ 
+ 	/* Avoid expensive walking of rexmit queue if possible */
+@@ -1791,7 +1795,7 @@ void tcp_enter_frto(struct sock *sk)
+ 	/* Earlier loss recovery underway (see RFC4138; Appendix B).
+ 	 * The last condition is necessary at least in tp->frto_counter case.
+ 	 */
+-	if (IsSackFrto() && (tp->frto_counter ||
++	if (tcp_is_sackfrto(tp) && (tp->frto_counter ||
+ 	    ((1 << icsk->icsk_ca_state) & (TCPF_CA_Recovery|TCPF_CA_Loss))) &&
+ 	    after(tp->high_seq, tp->snd_una)) {
+ 		tp->frto_highmark = tp->high_seq;
+@@ -3123,7 +3127,7 @@ static int tcp_process_frto(struct sock *sk, int flag)
+ 		return 1;
+ 	}
+ 
+-	if (!IsSackFrto() || tcp_is_reno(tp)) {
++	if (!tcp_is_sackfrto(tp)) {
+ 		/* RFC4138 shortcoming in step 2; should also have case c):
+ 		 * ACK isn't duplicate nor advances window, e.g., opposite dir
+ 		 * data, winupdate
+diff --git a/net/ipv6/route.c b/net/ipv6/route.c
+index a493ad9..12bba08 100644
+--- a/net/ipv6/route.c
++++ b/net/ipv6/route.c
+@@ -1243,11 +1243,11 @@ install_route:
+ 		}
+ 	}
+ 
+-	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
++	if (dst_metric(&rt->u.dst, RTAX_HOPLIMIT) == 0)
+ 		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = -1;
+-	if (!rt->u.dst.metrics[RTAX_MTU-1])
++	if (!dst_metric(&rt->u.dst, RTAX_MTU))
+ 		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
+-	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
++	if (!dst_metric(&rt->u.dst, RTAX_ADVMSS))
+ 		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(net, dst_mtu(&rt->u.dst));
+ 	rt->u.dst.dev = dev;
+ 	rt->rt6i_idev = idev;
+diff --git a/net/mac80211/main.c b/net/mac80211/main.c
+index 9ad4e36..915afad 100644
+--- a/net/mac80211/main.c
++++ b/net/mac80211/main.c
+@@ -1766,6 +1766,7 @@ fail_wep:
+ fail_rate:
+ 	ieee80211_debugfs_remove_netdev(IEEE80211_DEV_TO_SUB_IF(local->mdev));
+ 	unregister_netdevice(local->mdev);
++	local->mdev = NULL;
+ fail_dev:
+ 	rtnl_unlock();
+ 	sta_info_stop(local);
+@@ -1773,8 +1774,10 @@ fail_sta_info:
+ 	debugfs_hw_del(local);
+ 	destroy_workqueue(local->hw.workqueue);
+ fail_workqueue:
+-	ieee80211_if_free(local->mdev);
+-	local->mdev = NULL;
++	if (local->mdev != NULL) {
++		ieee80211_if_free(local->mdev);
++		local->mdev = NULL;
++	}
+ fail_mdev_alloc:
+ 	wiphy_unregister(local->hw.wiphy);
+ 	return result;
+diff --git a/net/mac80211/rc80211_pid_debugfs.c b/net/mac80211/rc80211_pid_debugfs.c
+index ae75d41..ff5c380 100644
+--- a/net/mac80211/rc80211_pid_debugfs.c
++++ b/net/mac80211/rc80211_pid_debugfs.c
+@@ -85,7 +85,7 @@ static int rate_control_pid_events_open(struct inode *inode, struct file *file)
+ 	struct rc_pid_sta_info *sinfo = inode->i_private;
+ 	struct rc_pid_event_buffer *events = &sinfo->events;
+ 	struct rc_pid_events_file_info *file_info;
+-	unsigned int status;
++	unsigned long status;
+ 
+ 	/* Allocate a state struct */
+ 	file_info = kmalloc(sizeof(*file_info), GFP_KERNEL);
+@@ -135,7 +135,7 @@ static ssize_t rate_control_pid_events_read(struct file *file, char __user *buf,
+ 	char pb[RC_PID_PRINT_BUF_SIZE];
+ 	int ret;
+ 	int p;
+-	unsigned int status;
++	unsigned long status;
+ 
+ 	/* Check if there is something to read. */
+ 	if (events->next_entry == file_info->next_entry) {
+diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
+index c1fc0f1..aa8d80c 100644
+--- a/net/netfilter/Kconfig
++++ b/net/netfilter/Kconfig
+@@ -90,6 +90,7 @@ config NF_CT_PROTO_DCCP
+ 	tristate 'DCCP protocol connection tracking support (EXPERIMENTAL)'
+ 	depends on EXPERIMENTAL && NF_CONNTRACK
+ 	depends on NETFILTER_ADVANCED
++	default IP_DCCP
+ 	help
+ 	  With this option enabled, the layer 3 independent connection
+ 	  tracking code will be able to do state tracking on DCCP connections.
+@@ -104,6 +105,7 @@ config NF_CT_PROTO_SCTP
+ 	tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
+ 	depends on EXPERIMENTAL && NF_CONNTRACK
+ 	depends on NETFILTER_ADVANCED
++	default IP_SCTP
+ 	help
+ 	  With this option enabled, the layer 3 independent connection
+ 	  tracking code will be able to do state tracking on SCTP connections.
+@@ -532,6 +534,7 @@ config NETFILTER_XT_MATCH_DCCP
+ 	tristate '"dccp" protocol match support'
+ 	depends on NETFILTER_XTABLES
+ 	depends on NETFILTER_ADVANCED
++	default IP_DCCP
+ 	help
+ 	  With this option enabled, you will be able to use the iptables
+ 	  `dccp' match in order to match on DCCP source/destination ports
+@@ -725,6 +728,7 @@ config NETFILTER_XT_MATCH_SCTP
+ 	tristate  '"sctp" protocol match support (EXPERIMENTAL)'
+ 	depends on NETFILTER_XTABLES && EXPERIMENTAL
+ 	depends on NETFILTER_ADVANCED
++	default IP_SCTP
+ 	help
+ 	  With this option enabled, you will be able to use the 
+ 	  `sctp' match in order to match on SCTP source/destination ports
+diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
+index 9f49000..2f9bbc0 100644
+--- a/net/netfilter/nf_conntrack_sip.c
++++ b/net/netfilter/nf_conntrack_sip.c
+@@ -870,6 +870,7 @@ static int process_sdp(struct sk_buff *skb,
+ {
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
+ 	unsigned int matchoff, matchlen;
+ 	unsigned int mediaoff, medialen;
+ 	unsigned int sdpoff;
+@@ -959,6 +960,9 @@ static int process_sdp(struct sk_buff *skb,
+ 	if (nf_nat_sdp_session && ct->status & IPS_NAT_MASK)
+ 		ret = nf_nat_sdp_session(skb, dptr, sdpoff, datalen, &rtp_addr);
+ 
++	if (ret == NF_ACCEPT && i > 0)
++		help->help.ct_sip_info.invite_cseq = cseq;
++
+ 	return ret;
+ }
+ static int process_invite_response(struct sk_buff *skb,
+@@ -967,14 +971,14 @@ static int process_invite_response(struct sk_buff *skb,
+ {
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
+ 
+ 	if ((code >= 100 && code <= 199) ||
+ 	    (code >= 200 && code <= 299))
+ 		return process_sdp(skb, dptr, datalen, cseq);
+-	else {
++	else if (help->help.ct_sip_info.invite_cseq == cseq)
+ 		flush_expectations(ct, true);
+-		return NF_ACCEPT;
+-	}
++	return NF_ACCEPT;
+ }
+ 
+ static int process_update_response(struct sk_buff *skb,
+@@ -983,14 +987,14 @@ static int process_update_response(struct sk_buff *skb,
+ {
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
+ 
+ 	if ((code >= 100 && code <= 199) ||
+ 	    (code >= 200 && code <= 299))
+ 		return process_sdp(skb, dptr, datalen, cseq);
+-	else {
++	else if (help->help.ct_sip_info.invite_cseq == cseq)
+ 		flush_expectations(ct, true);
+-		return NF_ACCEPT;
+-	}
++	return NF_ACCEPT;
+ }
+ 
+ static int process_prack_response(struct sk_buff *skb,
+@@ -999,14 +1003,14 @@ static int process_prack_response(struct sk_buff *skb,
+ {
+ 	enum ip_conntrack_info ctinfo;
+ 	struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
++	struct nf_conn_help *help = nfct_help(ct);
+ 
+ 	if ((code >= 100 && code <= 199) ||
+ 	    (code >= 200 && code <= 299))
+ 		return process_sdp(skb, dptr, datalen, cseq);
+-	else {
++	else if (help->help.ct_sip_info.invite_cseq == cseq)
+ 		flush_expectations(ct, true);
+-		return NF_ACCEPT;
+-	}
++	return NF_ACCEPT;
+ }
+ 
+ static int process_bye_request(struct sk_buff *skb,
+diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c
+index 64b2d13..1d421d0 100644
+--- a/net/sched/act_simple.c
++++ b/net/sched/act_simple.c
+@@ -6,7 +6,7 @@
+  *		as published by the Free Software Foundation; either version
+  *		2 of the License, or (at your option) any later version.
+  *
+- * Authors:	Jamal Hadi Salim (2005)
++ * Authors:	Jamal Hadi Salim (2005-8)
+  *
+  */
+ 
+@@ -34,6 +34,7 @@ static struct tcf_hashinfo simp_hash_info = {
+ 	.lock	=	&simp_lock,
+ };
+ 
++#define SIMP_MAX_DATA	32
+ static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res)
+ {
+ 	struct tcf_defact *d = a->priv;
+@@ -69,23 +70,28 @@ static int tcf_simp_release(struct tcf_defact *d, int bind)
+ 	return ret;
+ }
+ 
+-static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
++static int alloc_defdata(struct tcf_defact *d, char *defdata)
+ {
+-	d->tcfd_defdata = kmemdup(defdata, datalen, GFP_KERNEL);
++	d->tcfd_defdata = kstrndup(defdata, SIMP_MAX_DATA, GFP_KERNEL);
+ 	if (unlikely(!d->tcfd_defdata))
+ 		return -ENOMEM;
+-	d->tcfd_datalen = datalen;
++
+ 	return 0;
+ }
+ 
+-static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata)
++static void reset_policy(struct tcf_defact *d, char *defdata,
++			 struct tc_defact *p)
+ {
+-	kfree(d->tcfd_defdata);
+-	return alloc_defdata(d, datalen, defdata);
++	spin_lock_bh(&d->tcf_lock);
++	d->tcf_action = p->action;
++	memset(d->tcfd_defdata, 0, SIMP_MAX_DATA);
++	strlcpy(d->tcfd_defdata, defdata, SIMP_MAX_DATA);
++	spin_unlock_bh(&d->tcf_lock);
+ }
+ 
+ static const struct nla_policy simple_policy[TCA_DEF_MAX + 1] = {
+ 	[TCA_DEF_PARMS]	= { .len = sizeof(struct tc_defact) },
++	[TCA_DEF_DATA]	= { .type = NLA_STRING, .len = SIMP_MAX_DATA },
+ };
+ 
+ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
+@@ -95,28 +101,24 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
+ 	struct tc_defact *parm;
+ 	struct tcf_defact *d;
+ 	struct tcf_common *pc;
+-	void *defdata;
+-	u32 datalen = 0;
++	char *defdata;
+ 	int ret = 0, err;
+ 
+ 	if (nla == NULL)
+ 		return -EINVAL;
+ 
+-	err = nla_parse_nested(tb, TCA_DEF_MAX, nla, NULL);
++	err = nla_parse_nested(tb, TCA_DEF_MAX, nla, simple_policy);
+ 	if (err < 0)
+ 		return err;
+ 
+ 	if (tb[TCA_DEF_PARMS] == NULL)
+ 		return -EINVAL;
+ 
+-	parm = nla_data(tb[TCA_DEF_PARMS]);
+-	defdata = nla_data(tb[TCA_DEF_DATA]);
+-	if (defdata == NULL)
++	if (tb[TCA_DEF_DATA] == NULL)
+ 		return -EINVAL;
+ 
+-	datalen = nla_len(tb[TCA_DEF_DATA]);
+-	if (datalen == 0)
+-		return -EINVAL;
++	parm = nla_data(tb[TCA_DEF_PARMS]);
++	defdata = nla_data(tb[TCA_DEF_DATA]);
+ 
+ 	pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info);
+ 	if (!pc) {
+@@ -126,11 +128,12 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
+ 			return -ENOMEM;
+ 
+ 		d = to_defact(pc);
+-		ret = alloc_defdata(d, datalen, defdata);
++		ret = alloc_defdata(d, defdata);
+ 		if (ret < 0) {
+ 			kfree(pc);
+ 			return ret;
+ 		}
++		d->tcf_action = parm->action;
+ 		ret = ACT_P_CREATED;
+ 	} else {
+ 		d = to_defact(pc);
+@@ -138,13 +141,9 @@ static int tcf_simp_init(struct nlattr *nla, struct nlattr *est,
+ 			tcf_simp_release(d, bind);
+ 			return -EEXIST;
+ 		}
+-		realloc_defdata(d, datalen, defdata);
++		reset_policy(d, defdata, parm);
+ 	}
+ 
+-	spin_lock_bh(&d->tcf_lock);
+-	d->tcf_action = parm->action;
+-	spin_unlock_bh(&d->tcf_lock);
+-
+ 	if (ret == ACT_P_CREATED)
+ 		tcf_hash_insert(pc, &simp_hash_info);
+ 	return ret;
+@@ -172,7 +171,7 @@ static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a,
+ 	opt.bindcnt = d->tcf_bindcnt - bind;
+ 	opt.action = d->tcf_action;
+ 	NLA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt);
+-	NLA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata);
++	NLA_PUT_STRING(skb, TCA_DEF_DATA, d->tcfd_defdata);
+ 	t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install);
+ 	t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse);
+ 	t.expires = jiffies_to_clock_t(d->tcf_tm.expires);
+diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
+index 66148cc..5bc1ed4 100644
+--- a/net/sched/sch_htb.c
++++ b/net/sched/sch_htb.c
+@@ -1197,12 +1197,16 @@ static inline int htb_parent_last_child(struct htb_class *cl)
+ 	return 1;
+ }
+ 
+-static void htb_parent_to_leaf(struct htb_class *cl, struct Qdisc *new_q)
++static void htb_parent_to_leaf(struct htb_sched *q, struct htb_class *cl,
++			       struct Qdisc *new_q)
+ {
+ 	struct htb_class *parent = cl->parent;
+ 
+ 	BUG_TRAP(!cl->level && cl->un.leaf.q && !cl->prio_activity);
+ 
++	if (parent->cmode != HTB_CAN_SEND)
++		htb_safe_rb_erase(&parent->pq_node, q->wait_pq + parent->level);
++
+ 	parent->level = 0;
+ 	memset(&parent->un.inner, 0, sizeof(parent->un.inner));
+ 	INIT_LIST_HEAD(&parent->un.leaf.drop_list);
+@@ -1300,7 +1304,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg)
+ 		htb_deactivate(q, cl);
+ 
+ 	if (last_child)
+-		htb_parent_to_leaf(cl, new_q);
++		htb_parent_to_leaf(q, cl, new_q);
+ 
+ 	if (--cl->refcnt == 0)
+ 		htb_destroy_class(sch, cl);
+diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
+index d74c2d2..01c7e31 100644
+--- a/net/sunrpc/svc.c
++++ b/net/sunrpc/svc.c
+@@ -18,7 +18,6 @@
+ #include <linux/mm.h>
+ #include <linux/interrupt.h>
+ #include <linux/module.h>
+-#include <linux/sched.h>
+ 
+ #include <linux/sunrpc/types.h>
+ #include <linux/sunrpc/xdr.h>
+diff --git a/scripts/kconfig/lkc.h b/scripts/kconfig/lkc.h
+index 4bc68f2..96521cb 100644
+--- a/scripts/kconfig/lkc.h
++++ b/scripts/kconfig/lkc.h
+@@ -11,9 +11,9 @@
+ #ifndef KBUILD_NO_NLS
+ # include <libintl.h>
+ #else
+-# define gettext(Msgid) ((const char *) (Msgid))
+-# define textdomain(Domainname) ((const char *) (Domainname))
+-# define bindtextdomain(Domainname, Dirname) ((const char *) (Dirname))
++static inline const char *gettext(const char *txt) { return txt; }
++static inline void textdomain(const char *domainname) {}
++static inline void bindtextdomain(const char *name, const char *dir) {}
+ #endif
+ 
+ #ifdef __cplusplus
+diff --git a/scripts/kconfig/mconf.c b/scripts/kconfig/mconf.c
+index 734cf4f..6841e95 100644
+--- a/scripts/kconfig/mconf.c
++++ b/scripts/kconfig/mconf.c
+@@ -773,7 +773,7 @@ static void conf_string(struct menu *menu)
+ 
+ 	while (1) {
+ 		int res;
+-		char *heading;
++		const char *heading;
+ 
+ 		switch (sym_get_type(menu->sym)) {
+ 		case S_INT:
+@@ -925,3 +925,4 @@ int main(int ac, char **av)
+ 
+ 	return 0;
+ }
++
+diff --git a/scripts/mod/file2alias.c b/scripts/mod/file2alias.c
+index e04c421..cea4a79 100644
+--- a/scripts/mod/file2alias.c
++++ b/scripts/mod/file2alias.c
+@@ -51,6 +51,15 @@ do {                                                            \
+                 sprintf(str + strlen(str), "*");                \
+ } while(0)
+ 
++/* Always end in a wildcard, for future extension */
++static inline void add_wildcard(char *str)
++{
++	int len = strlen(str);
++
++	if (str[len - 1] != '*')
++		strcat(str + len, "*");
++}
++
+ unsigned int cross_build = 0;
+ /**
+  * Check that sizeof(device_id type) are consistent with size of section
+@@ -133,9 +142,7 @@ static void do_usb_entry(struct usb_device_id *id,
+ 	    id->match_flags&USB_DEVICE_ID_MATCH_INT_PROTOCOL,
+ 	    id->bInterfaceProtocol);
+ 
+-	/* Always end in a wildcard, for future extension */
+-	if (alias[strlen(alias)-1] != '*')
+-		strcat(alias, "*");
++	add_wildcard(alias);
+ 	buf_printf(&mod->dev_table_buf,
+ 		   "MODULE_ALIAS(\"%s\");\n", alias);
+ }
+@@ -219,6 +226,7 @@ static int do_ieee1394_entry(const char *filename,
+ 	ADD(alias, "ver", id->match_flags & IEEE1394_MATCH_VERSION,
+ 	    id->version);
+ 
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -261,6 +269,7 @@ static int do_pci_entry(const char *filename,
+ 	ADD(alias, "bc", baseclass_mask == 0xFF, baseclass);
+ 	ADD(alias, "sc", subclass_mask == 0xFF, subclass);
+ 	ADD(alias, "i", interface_mask == 0xFF, interface);
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -283,6 +292,7 @@ static int do_ccw_entry(const char *filename,
+ 	    id->dev_type);
+ 	ADD(alias, "dm", id->match_flags&CCW_DEVICE_ID_MATCH_DEVICE_MODEL,
+ 	    id->dev_model);
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -290,7 +300,7 @@ static int do_ccw_entry(const char *filename,
+ static int do_ap_entry(const char *filename,
+ 		       struct ap_device_id *id, char *alias)
+ {
+-	sprintf(alias, "ap:t%02X", id->dev_type);
++	sprintf(alias, "ap:t%02X*", id->dev_type);
+ 	return 1;
+ }
+ 
+@@ -309,6 +319,7 @@ static int do_serio_entry(const char *filename,
+ 	ADD(alias, "id", id->id != SERIO_ANY, id->id);
+ 	ADD(alias, "ex", id->extra != SERIO_ANY, id->extra);
+ 
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -316,7 +327,7 @@ static int do_serio_entry(const char *filename,
+ static int do_acpi_entry(const char *filename,
+ 			struct acpi_device_id *id, char *alias)
+ {
+-	sprintf(alias, "acpi*:%s:", id->id);
++	sprintf(alias, "acpi*:%s:*", id->id);
+ 	return 1;
+ }
+ 
+@@ -324,7 +335,7 @@ static int do_acpi_entry(const char *filename,
+ static int do_pnp_entry(const char *filename,
+ 			struct pnp_device_id *id, char *alias)
+ {
+-	sprintf(alias, "pnp:d%s", id->id);
++	sprintf(alias, "pnp:d%s*", id->id);
+ 	return 1;
+ }
+ 
+@@ -409,6 +420,7 @@ static int do_pcmcia_entry(const char *filename,
+        ADD(alias, "pc", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID3, id->prod_id_hash[2]);
+        ADD(alias, "pd", id->match_flags & PCMCIA_DEV_ID_MATCH_PROD_ID4, id->prod_id_hash[3]);
+ 
++	add_wildcard(alias);
+        return 1;
+ }
+ 
+@@ -432,6 +444,7 @@ static int do_of_entry (const char *filename, struct of_device_id *of, char *ali
+         if (isspace (*tmp))
+             *tmp = '_';
+ 
++    add_wildcard(alias);
+     return 1;
+ }
+ 
+@@ -448,6 +461,7 @@ static int do_vio_entry(const char *filename, struct vio_device_id *vio,
+ 		if (isspace (*tmp))
+ 			*tmp = '_';
+ 
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -511,6 +525,8 @@ static int do_eisa_entry(const char *filename, struct eisa_device_id *eisa,
+ {
+ 	if (eisa->sig[0])
+ 		sprintf(alias, EISA_DEVICE_MODALIAS_FMT "*", eisa->sig);
++	else
++		strcat(alias, "*");
+ 	return 1;
+ }
+ 
+@@ -529,6 +545,7 @@ static int do_parisc_entry(const char *filename, struct parisc_device_id *id,
+ 	ADD(alias, "rev", id->hversion_rev != PA_HVERSION_REV_ANY_ID, id->hversion_rev);
+ 	ADD(alias, "sv", id->sversion != PA_SVERSION_ANY_ID, id->sversion);
+ 
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -544,6 +561,7 @@ static int do_sdio_entry(const char *filename,
+ 	ADD(alias, "c", id->class != (__u8)SDIO_ANY_ID, id->class);
+ 	ADD(alias, "v", id->vendor != (__u16)SDIO_ANY_ID, id->vendor);
+ 	ADD(alias, "d", id->device != (__u16)SDIO_ANY_ID, id->device);
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -559,6 +577,7 @@ static int do_ssb_entry(const char *filename,
+ 	ADD(alias, "v", id->vendor != SSB_ANY_VENDOR, id->vendor);
+ 	ADD(alias, "id", id->coreid != SSB_ANY_ID, id->coreid);
+ 	ADD(alias, "rev", id->revision != SSB_ANY_REV, id->revision);
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -573,6 +592,7 @@ static int do_virtio_entry(const char *filename, struct virtio_device_id *id,
+ 	ADD(alias, "d", 1, id->device);
+ 	ADD(alias, "v", id->vendor != VIRTIO_DEV_ANY_ID, id->vendor);
+ 
++	add_wildcard(alias);
+ 	return 1;
+ }
+ 
+@@ -612,9 +632,6 @@ static void do_table(void *symval, unsigned long size,
+ 
+ 	for (i = 0; i < size; i += id_size) {
+ 		if (do_entry(mod->name, symval+i, alias)) {
+-			/* Always end in a wildcard, for future extension */
+-			if (alias[strlen(alias)-1] != '*')
+-				strcat(alias, "*");
+ 			buf_printf(&mod->dev_table_buf,
+ 				   "MODULE_ALIAS(\"%s\");\n", alias);
+ 		}
+diff --git a/sound/drivers/Kconfig b/sound/drivers/Kconfig
+index a78a8d0..379bcb0 100644
+--- a/sound/drivers/Kconfig
++++ b/sound/drivers/Kconfig
+@@ -5,8 +5,8 @@ menu "Generic devices"
+ 
+ 
+ config SND_PCSP
+-	tristate "Internal PC speaker support"
+-	depends on X86_PC && HIGH_RES_TIMERS
++	tristate "PC-Speaker support"
++	depends on PCSPKR_PLATFORM && X86_PC && HIGH_RES_TIMERS
+ 	depends on INPUT
+ 	depends on SND
+ 	select SND_PCM
+diff --git a/sound/drivers/pcsp/pcsp.c b/sound/drivers/pcsp/pcsp.c
+index 5920351..54a1f90 100644
+--- a/sound/drivers/pcsp/pcsp.c
++++ b/sound/drivers/pcsp/pcsp.c
+@@ -194,6 +194,7 @@ static void pcsp_stop_beep(struct snd_pcsp *chip)
+ 	spin_unlock_irq(&chip->substream_lock);
+ }
+ 
++#ifdef CONFIG_PM
+ static int pcsp_suspend(struct platform_device *dev, pm_message_t state)
+ {
+ 	struct snd_pcsp *chip = platform_get_drvdata(dev);
+@@ -201,6 +202,9 @@ static int pcsp_suspend(struct platform_device *dev, pm_message_t state)
+ 	snd_pcm_suspend_all(chip->pcm);
+ 	return 0;
+ }
++#else
++#define pcsp_suspend NULL
++#endif	/* CONFIG_PM */
+ 
+ static void pcsp_shutdown(struct platform_device *dev)
+ {
+diff --git a/sound/oss/kahlua.c b/sound/oss/kahlua.c
+index dfe670f..eb9bc36 100644
+--- a/sound/oss/kahlua.c
++++ b/sound/oss/kahlua.c
+@@ -67,7 +67,7 @@ static int __devinit probe_one(struct pci_dev *pdev, const struct pci_device_id
+ 		return 1;
+ 	
+ 	mem = ioremap(base, 128);
+-	if(mem == 0UL)
++	if (!mem)
+ 		return 1;
+ 	map = readw(mem + 0x18);	/* Read the SMI enables */
+ 	iounmap(mem);
+diff --git a/sound/pci/Kconfig b/sound/pci/Kconfig
+index 581debf..7e47421 100644
+--- a/sound/pci/Kconfig
++++ b/sound/pci/Kconfig
+@@ -515,19 +515,16 @@ config SND_FM801
+ config SND_FM801_TEA575X_BOOL
+ 	bool "ForteMedia FM801 + TEA5757 tuner"
+ 	depends on SND_FM801
++	depends on VIDEO_V4L1=y || VIDEO_V4L1=SND_FM801
+ 	help
+ 	  Say Y here to include support for soundcards based on the ForteMedia
+ 	  FM801 chip with a TEA5757 tuner connected to GPIO1-3 pins (Media
+ 	  Forte SF256-PCS-02) into the snd-fm801 driver.
+ 
+-	  This will enable support for the old V4L1 API.
+-
+ config SND_FM801_TEA575X
+ 	tristate
+ 	depends on SND_FM801_TEA575X_BOOL
+ 	default SND_FM801
+-	select VIDEO_V4L1
+-	select VIDEO_DEV
+ 
+ config SND_HDA_INTEL
+ 	tristate "Intel HD Audio"
+diff --git a/sound/pci/ac97/ac97_patch.c b/sound/pci/ac97/ac97_patch.c
+index 39198e5..2da8981 100644
+--- a/sound/pci/ac97/ac97_patch.c
++++ b/sound/pci/ac97/ac97_patch.c
+@@ -3446,6 +3446,7 @@ static const struct snd_kcontrol_new snd_ac97_controls_vt1617a[] = {
+ int patch_vt1617a(struct snd_ac97 * ac97)
+ {
+ 	int err = 0;
++	int val;
+ 
+ 	/* we choose to not fail out at this point, but we tell the
+ 	   caller when we return */
+@@ -3456,7 +3457,13 @@ int patch_vt1617a(struct snd_ac97 * ac97)
+ 	/* bring analog power consumption to normal by turning off the
+ 	 * headphone amplifier, like WinXP driver for EPIA SP
+ 	 */
+-	snd_ac97_write_cache(ac97, 0x5c, 0x20);
++	/* We need to check the bit before writing it.
++	 * On some (many?) hardwares, setting bit actually clears it!
++	 */
++	val = snd_ac97_read(ac97, 0x5c);
++	if (!(val & 0x20))
++		snd_ac97_write_cache(ac97, 0x5c, 0x20);
++
+ 	ac97->ext_id |= AC97_EI_SPDIF;	/* force the detection of spdif */
+ 	ac97->rates[AC97_RATES_SPDIF] = SNDRV_PCM_RATE_44100 | SNDRV_PCM_RATE_48000;
+ 	ac97->build_ops = &patch_vt1616_ops;
+diff --git a/sound/pci/hda/patch_realtek.c b/sound/pci/hda/patch_realtek.c
+index d9783a4..6d4df45 100644
+--- a/sound/pci/hda/patch_realtek.c
++++ b/sound/pci/hda/patch_realtek.c
+@@ -11902,7 +11902,10 @@ static void alc861_auto_set_output_and_unmute(struct hda_codec *codec,
+ 					      hda_nid_t nid,
+ 					      int pin_type, int dac_idx)
+ {
+-	alc_set_pin_output(codec, nid, pin_type);
++	snd_hda_codec_write(codec, nid, 0, AC_VERB_SET_PIN_WIDGET_CONTROL,
++			    pin_type);
++	snd_hda_codec_write(codec, dac_idx, 0, AC_VERB_SET_AMP_GAIN_MUTE,
++			    AMP_OUT_UNMUTE);
+ }
+ 
+ static void alc861_auto_init_multi_out(struct hda_codec *codec)
+diff --git a/sound/pci/hda/patch_sigmatel.c b/sound/pci/hda/patch_sigmatel.c
+index b3a15d6..393f7fd 100644
+--- a/sound/pci/hda/patch_sigmatel.c
++++ b/sound/pci/hda/patch_sigmatel.c
+@@ -4289,6 +4289,8 @@ struct hda_codec_preset snd_hda_preset_sigmatel[] = {
+ 	{ .id = 0x83847635, .name = "STAC9250D", .patch = patch_stac925x },
+ 	{ .id = 0x83847636, .name = "STAC9251", .patch = patch_stac925x },
+ 	{ .id = 0x83847637, .name = "STAC9250D", .patch = patch_stac925x },
++	{ .id = 0x83847645, .name = "92HD206X", .patch = patch_stac927x },
++	{ .id = 0x83847646, .name = "92HD206D", .patch = patch_stac927x },
+  	/* The following does not take into account .id=0x83847661 when subsys =
+  	 * 104D0C00 which is STAC9225s. Because of this, some SZ Notebooks are
+  	 * currently not fully supported.
+diff --git a/sound/soc/at91/at91-pcm.c b/sound/soc/at91/at91-pcm.c
+index 67c88e3..ccac6bd 100644
+--- a/sound/soc/at91/at91-pcm.c
++++ b/sound/soc/at91/at91-pcm.c
+@@ -103,7 +103,8 @@ static void at91_pcm_dma_irq(u32 ssc_sr,
+ 		if (prtd->period_ptr >= prtd->dma_buffer_end) {
+ 			prtd->period_ptr = prtd->dma_buffer;
+ 		}
+-		at91_ssc_write(params->ssc_base + params->pdc->xnpr, prtd->period_ptr);
++		at91_ssc_write(params->ssc_base + params->pdc->xnpr,
++			       prtd->period_ptr);
+ 		at91_ssc_write(params->ssc_base + params->pdc->xncr,
+ 				prtd->period_size / params->pdc_xfer_size);
+ 	}
+@@ -191,10 +192,12 @@ static int at91_pcm_trigger(struct snd_pcm_substream *substream,
+ 		at91_ssc_write(params->ssc_base + AT91_SSC_IER,
+ 			params->mask->ssc_endx | params->mask->ssc_endbuf);
+ 
+-		at91_ssc_write(params->ssc_base + ATMEL_PDC_PTCR, params->mask->pdc_enable);
++		at91_ssc_write(params->ssc_base + ATMEL_PDC_PTCR,
++			params->mask->pdc_enable);
+ 
+-		DBG("sr=%lx imr=%lx\n", at91_ssc_read(params->ssc_base + AT91_SSC_SR),
+-					at91_ssc_read(params->ssc_base + AT91_SSC_IER));
++		DBG("sr=%lx imr=%lx\n",
++		    at91_ssc_read(params->ssc_base + AT91_SSC_SR),
++		    at91_ssc_read(params->ssc_base + AT91_SSC_IMR));
+ 		break;
+ 
+ 	case SNDRV_PCM_TRIGGER_STOP:
+diff --git a/sound/soc/at91/at91-ssc.c b/sound/soc/at91/at91-ssc.c
+index f642d2d..bc35d00 100644
+--- a/sound/soc/at91/at91-ssc.c
++++ b/sound/soc/at91/at91-ssc.c
+@@ -590,7 +590,7 @@ static int at91_ssc_hw_params(struct snd_pcm_substream *substream,
+ 			printk(KERN_WARNING "at91-ssc: request_irq failure\n");
+ 
+ 			DBG("Stopping pid %d clock\n", ssc_p->ssc.pid);
+-			at91_sys_write(AT91_PMC_PCER, 1<<ssc_p->ssc.pid);
++			at91_sys_write(AT91_PMC_PCDR, 1<<ssc_p->ssc.pid);
+ 			return ret;
+ 		}
+ 
+diff --git a/sound/soc/s3c24xx/s3c24xx-i2s.c b/sound/soc/s3c24xx/s3c24xx-i2s.c
+index 4ebcd6a..1ed6afd 100644
+--- a/sound/soc/s3c24xx/s3c24xx-i2s.c
++++ b/sound/soc/s3c24xx/s3c24xx-i2s.c
+@@ -224,6 +224,7 @@ static int s3c24xx_i2s_set_fmt(struct snd_soc_cpu_dai *cpu_dai,
+ 		iismod |= S3C2410_IISMOD_SLAVE;
+ 		break;
+ 	case SND_SOC_DAIFMT_CBS_CFS:
++		iismod &= ~S3C2410_IISMOD_SLAVE;
+ 		break;
+ 	default:
+ 		return -EINVAL;
+@@ -234,6 +235,7 @@ static int s3c24xx_i2s_set_fmt(struct snd_soc_cpu_dai *cpu_dai,
+ 		iismod |= S3C2410_IISMOD_MSB;
+ 		break;
+ 	case SND_SOC_DAIFMT_I2S:
++		iismod &= ~S3C2410_IISMOD_MSB;
+ 		break;
+ 	default:
+ 		return -EINVAL;
+diff --git a/sound/soc/s3c24xx/s3c24xx-pcm.c b/sound/soc/s3c24xx/s3c24xx-pcm.c
+index 6c70a81..7806ae6 100644
+--- a/sound/soc/s3c24xx/s3c24xx-pcm.c
++++ b/sound/soc/s3c24xx/s3c24xx-pcm.c
+@@ -171,7 +171,7 @@ static int s3c24xx_pcm_hw_params(struct snd_pcm_substream *substream,
+ 		ret = s3c2410_dma_request(prtd->params->channel,
+ 					  prtd->params->client, NULL);
+ 
+-		if (ret) {
++		if (ret < 0) {
+ 			DBG(KERN_ERR "failed to get dma channel\n");
+ 			return ret;
+ 		}
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index e89338e..f7ba099 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -522,6 +522,7 @@ unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn)
+ 		return bad_hva();
+ 	return (slot->userspace_addr + (gfn - slot->base_gfn) * PAGE_SIZE);
+ }
++EXPORT_SYMBOL_GPL(gfn_to_hva);
+ 
+ /*
+  * Requires current->mm->mmap_sem to be held

Modified: dists/trunk/linux-2.6/debian/patches/series/1~experimental.1
==============================================================================
--- dists/trunk/linux-2.6/debian/patches/series/1~experimental.1	(original)
+++ dists/trunk/linux-2.6/debian/patches/series/1~experimental.1	Fri May  9 12:24:09 2008
@@ -1,4 +1,4 @@
-+ bugfix/all/patch-2.6.26-rc1-git6
++ bugfix/all/patch-2.6.26-rc1-git7
 + debian/version.patch
 + debian/kernelvariables.patch
 + debian/doc-build-parallel.patch
@@ -36,4 +36,3 @@
 ##+ features/all/mtd-nor-add-support-for-the-st-m29w400db-flash-chip.patch
 + bugfix/all/mtd-prevent-physmap-from-causing-request_module-runaway-loop-modprobe-net-pf-1.patch
 + bugfix/all/0001--USB-c67x00-build-fix.patch
-+ bugfix/all/drivers-media-build.patch