[kernel] r16129 - in dists/sid/linux-2.6/debian: . patches/bugfix/all/stable patches/debian patches/features/all patches/features/all/vserver patches/features/all/xen patches/series
Ben Hutchings
benh at alioth.debian.org
Fri Aug 13 03:29:24 UTC 2010
Author: benh
Date: Fri Aug 13 03:29:13 2010
New Revision: 16129
Log:
Add stable 2.6.32.19-rc1
Modify ll_rw_block() and submit_bh() to avoid breaking OOT filesystem
modules using the old values of READA or SWRITE*.
Revert a patch included in this update that changes signal_struct.
Rebase "USB: option: Use generic USB wwan code" again.
Update context in vs2.3.0.36.27.patch.
Remove changes from xen/pvops.patch that are included in this update.
Added:
dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.19-rc1
dists/sid/linux-2.6/debian/patches/debian/fs-buffer.c-Avoid-ABI-change-in-2.6.32.19.patch
dists/sid/linux-2.6/debian/patches/debian/revert-sched-cputime-Introduce-thread_group_times.patch
dists/sid/linux-2.6/debian/patches/features/all/USB-option-Use-generic-USB-wwan-code-3.patch
- copied, changed from r16127, dists/sid/linux-2.6/debian/patches/features/all/USB-option-Use-generic-USB-wwan-code-2.patch
dists/sid/linux-2.6/debian/patches/series/21
Modified:
dists/sid/linux-2.6/debian/changelog
dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch
dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog Thu Aug 12 21:50:04 2010 (r16128)
+++ dists/sid/linux-2.6/debian/changelog Fri Aug 13 03:29:13 2010 (r16129)
@@ -1,3 +1,11 @@
+linux-2.6 (2.6.32-21) UNRELEASED; urgency=low
+
+ [ Ben Hutchings ]
+ * Add stable 2.6.32.19-rc1
+ - Revert ABI change in sched.h
+
+ -- Ben Hutchings <ben at decadent.org.uk> Thu, 12 Aug 2010 23:20:55 +0100
+
linux-2.6 (2.6.32-20) unstable; urgency=low
[ Moritz Muehlenhoff ]
Added: dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.19-rc1
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/bugfix/all/stable/2.6.32.19-rc1 Fri Aug 13 03:29:13 2010 (r16129)
@@ -0,0 +1,8609 @@
+diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
+index 1c4119c..e516aa2 100644
+--- a/arch/arm/Kconfig
++++ b/arch/arm/Kconfig
+@@ -887,6 +887,18 @@ config ARM_ERRATA_460075
+ ACTLR register. Note that setting specific bits in the ACTLR register
+ may not be available in non-secure mode.
+
++config ARM_ERRATA_720789
++ bool "ARM errata: TLBIASIDIS and TLBIMVAIS operations can broadcast a faulty ASID"
++ depends on CPU_V7 && SMP
++ help
++ This option enables the workaround for the 720789 Cortex-A9 (prior to
++ r2p0) erratum. A faulty ASID can be sent to the other CPUs for the
++ broadcasted CP15 TLB maintenance operations TLBIASIDIS and TLBIMVAIS.
++ As a consequence of this erratum, some TLB entries which should be
++ invalidated are not, resulting in an incoherency in the system page
++ tables. The workaround changes the TLB flushing routines to invalidate
++ entries regardless of the ASID.
++
+ endmenu
+
+ source "arch/arm/common/Kconfig"
+diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h
+index c2f1605..00c1cba 100644
+--- a/arch/arm/include/asm/tlbflush.h
++++ b/arch/arm/include/asm/tlbflush.h
+@@ -369,7 +369,11 @@ static inline void local_flush_tlb_mm(struct mm_struct *mm)
+ if (tlb_flag(TLB_V6_I_ASID))
+ asm("mcr p15, 0, %0, c8, c5, 2" : : "r" (asid) : "cc");
+ if (tlb_flag(TLB_V7_UIS_ASID))
++#ifdef CONFIG_ARM_ERRATA_720789
++ asm("mcr p15, 0, %0, c8, c3, 0" : : "r" (zero) : "cc");
++#else
+ asm("mcr p15, 0, %0, c8, c3, 2" : : "r" (asid) : "cc");
++#endif
+
+ if (tlb_flag(TLB_BTB)) {
+ /* flush the branch target cache */
+@@ -409,7 +413,11 @@ local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr)
+ if (tlb_flag(TLB_V6_I_PAGE))
+ asm("mcr p15, 0, %0, c8, c5, 1" : : "r" (uaddr) : "cc");
+ if (tlb_flag(TLB_V7_UIS_PAGE))
++#ifdef CONFIG_ARM_ERRATA_720789
++ asm("mcr p15, 0, %0, c8, c3, 3" : : "r" (uaddr & PAGE_MASK) : "cc");
++#else
+ asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (uaddr) : "cc");
++#endif
+
+ if (tlb_flag(TLB_BTB)) {
+ /* flush the branch target cache */
+diff --git a/arch/arm/plat-mxc/include/mach/gpio.h b/arch/arm/plat-mxc/include/mach/gpio.h
+index 6bd932c..7a0dc5a 100644
+--- a/arch/arm/plat-mxc/include/mach/gpio.h
++++ b/arch/arm/plat-mxc/include/mach/gpio.h
+@@ -19,6 +19,7 @@
+ #ifndef __ASM_ARCH_MXC_GPIO_H__
+ #define __ASM_ARCH_MXC_GPIO_H__
+
++#include <linux/spinlock.h>
+ #include <mach/hardware.h>
+ #include <asm-generic/gpio.h>
+
+diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c
+index 674a837..01ae69b 100644
+--- a/arch/ia64/hp/common/sba_iommu.c
++++ b/arch/ia64/hp/common/sba_iommu.c
+@@ -677,12 +677,19 @@ sba_alloc_range(struct ioc *ioc, struct device *dev, size_t size)
+ spin_unlock_irqrestore(&ioc->saved_lock, flags);
+
+ pide = sba_search_bitmap(ioc, dev, pages_needed, 0);
+- if (unlikely(pide >= (ioc->res_size << 3)))
+- panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
+- ioc->ioc_hpa);
++ if (unlikely(pide >= (ioc->res_size << 3))) {
++ printk(KERN_WARNING "%s: I/O MMU @ %p is"
++ "out of mapping resources, %u %u %lx\n",
++ __func__, ioc->ioc_hpa, ioc->res_size,
++ pages_needed, dma_get_seg_boundary(dev));
++ return -1;
++ }
+ #else
+- panic(__FILE__ ": I/O MMU @ %p is out of mapping resources\n",
+- ioc->ioc_hpa);
++ printk(KERN_WARNING "%s: I/O MMU @ %p is"
++ "out of mapping resources, %u %u %lx\n",
++ __func__, ioc->ioc_hpa, ioc->res_size,
++ pages_needed, dma_get_seg_boundary(dev));
++ return -1;
+ #endif
+ }
+ }
+@@ -965,6 +972,8 @@ static dma_addr_t sba_map_page(struct device *dev, struct page *page,
+ #endif
+
+ pide = sba_alloc_range(ioc, dev, size);
++ if (pide < 0)
++ return 0;
+
+ iovp = (dma_addr_t) pide << iovp_shift;
+
+@@ -1320,6 +1329,7 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
+ unsigned long dma_offset, dma_len; /* start/len of DMA stream */
+ int n_mappings = 0;
+ unsigned int max_seg_size = dma_get_max_seg_size(dev);
++ int idx;
+
+ while (nents > 0) {
+ unsigned long vaddr = (unsigned long) sba_sg_address(startsg);
+@@ -1418,16 +1428,22 @@ sba_coalesce_chunks(struct ioc *ioc, struct device *dev,
+ vcontig_sg->dma_length = vcontig_len;
+ dma_len = (dma_len + dma_offset + ~iovp_mask) & iovp_mask;
+ ASSERT(dma_len <= DMA_CHUNK_SIZE);
+- dma_sg->dma_address = (dma_addr_t) (PIDE_FLAG
+- | (sba_alloc_range(ioc, dev, dma_len) << iovp_shift)
+- | dma_offset);
++ idx = sba_alloc_range(ioc, dev, dma_len);
++ if (idx < 0) {
++ dma_sg->dma_length = 0;
++ return -1;
++ }
++ dma_sg->dma_address = (dma_addr_t)(PIDE_FLAG | (idx << iovp_shift)
++ | dma_offset);
+ n_mappings++;
+ }
+
+ return n_mappings;
+ }
+
+-
++static void sba_unmap_sg_attrs(struct device *dev, struct scatterlist *sglist,
++ int nents, enum dma_data_direction dir,
++ struct dma_attrs *attrs);
+ /**
+ * sba_map_sg - map Scatter/Gather list
+ * @dev: instance of PCI owned by the driver that's asking.
+@@ -1493,6 +1509,10 @@ static int sba_map_sg_attrs(struct device *dev, struct scatterlist *sglist,
+ ** Access to the virtual address is what forces a two pass algorithm.
+ */
+ coalesced = sba_coalesce_chunks(ioc, dev, sglist, nents);
++ if (coalesced < 0) {
++ sba_unmap_sg_attrs(dev, sglist, nents, dir, attrs);
++ return 0;
++ }
+
+ /*
+ ** Program the I/O Pdir
+diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c
+index 4990495..a35c661 100644
+--- a/arch/ia64/kernel/time.c
++++ b/arch/ia64/kernel/time.c
+@@ -473,7 +473,7 @@ void update_vsyscall_tz(void)
+ {
+ }
+
+-void update_vsyscall(struct timespec *wall, struct clocksource *c)
++void update_vsyscall(struct timespec *wall, struct clocksource *c, u32 mult)
+ {
+ unsigned long flags;
+
+@@ -481,7 +481,7 @@ void update_vsyscall(struct timespec *wall, struct clocksource *c)
+
+ /* copy fsyscall clock data */
+ fsyscall_gtod_data.clk_mask = c->mask;
+- fsyscall_gtod_data.clk_mult = c->mult;
++ fsyscall_gtod_data.clk_mult = mult;
+ fsyscall_gtod_data.clk_shift = c->shift;
+ fsyscall_gtod_data.clk_fsys_mmio = c->fsys_mmio;
+ fsyscall_gtod_data.clk_cycle_last = c->cycle_last;
+diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
+index 1a54a3b..7546e2c 100644
+--- a/arch/powerpc/Makefile
++++ b/arch/powerpc/Makefile
+@@ -158,9 +158,11 @@ drivers-$(CONFIG_OPROFILE) += arch/powerpc/oprofile/
+ # Default to zImage, override when needed
+ all: zImage
+
+-BOOT_TARGETS = zImage zImage.initrd uImage zImage% dtbImage% treeImage.% cuImage.% simpleImage.%
++# With make 3.82 we cannot mix normal and wildcard targets
++BOOT_TARGETS1 := zImage zImage.initrd uImaged
++BOOT_TARGETS2 := zImage% dtbImage% treeImage.% cuImage.% simpleImage.%
+
+-PHONY += $(BOOT_TARGETS)
++PHONY += $(BOOT_TARGETS1) $(BOOT_TARGETS2)
+
+ boot := arch/$(ARCH)/boot
+
+@@ -175,10 +177,16 @@ relocs_check: arch/powerpc/relocs_check.pl vmlinux
+ zImage: relocs_check
+ endif
+
+-$(BOOT_TARGETS): vmlinux
++$(BOOT_TARGETS1): vmlinux
++ $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
++$(BOOT_TARGETS2): vmlinux
++ $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
++
++
++bootwrapper_install:
+ $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+
+-bootwrapper_install %.dtb:
++%.dtb:
+ $(Q)$(MAKE) ARCH=ppc64 $(build)=$(boot) $(patsubst %,$(boot)/%,$@)
+
+ define archhelp
+diff --git a/arch/powerpc/include/asm/ppc-pci.h b/arch/powerpc/include/asm/ppc-pci.h
+index 2828f9d..fa6648a 100644
+--- a/arch/powerpc/include/asm/ppc-pci.h
++++ b/arch/powerpc/include/asm/ppc-pci.h
+@@ -137,6 +137,11 @@ struct device_node * find_device_pe(struct device_node *dn);
+ void eeh_sysfs_add_device(struct pci_dev *pdev);
+ void eeh_sysfs_remove_device(struct pci_dev *pdev);
+
++static inline const char *eeh_pci_name(struct pci_dev *pdev)
++{
++ return pdev ? pci_name(pdev) : "<null>";
++}
++
+ #endif /* CONFIG_EEH */
+
+ #else /* CONFIG_PCI */
+diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
+index 02aab7f..7143d4c 100644
+--- a/arch/powerpc/kernel/time.c
++++ b/arch/powerpc/kernel/time.c
+@@ -864,7 +864,8 @@ static cycle_t timebase_read(struct clocksource *cs)
+ return (cycle_t)get_tb();
+ }
+
+-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
++void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
++ u32 mult)
+ {
+ u64 t2x, stamp_xsec;
+
+@@ -877,7 +878,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+
+ /* XXX this assumes clock->shift == 22 */
+ /* 4611686018 ~= 2^(20+64-22) / 1e9 */
+- t2x = (u64) clock->mult * 4611686018ULL;
++ t2x = (u64) mult * 4611686018ULL;
+ stamp_xsec = (u64) xtime.tv_nsec * XSEC_PER_SEC;
+ do_div(stamp_xsec, 1000000000);
+ stamp_xsec += (u64) xtime.tv_sec * XSEC_PER_SEC;
+diff --git a/arch/powerpc/platforms/pseries/eeh.c b/arch/powerpc/platforms/pseries/eeh.c
+index ccd8dd0..3304f32 100644
+--- a/arch/powerpc/platforms/pseries/eeh.c
++++ b/arch/powerpc/platforms/pseries/eeh.c
+@@ -491,7 +491,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+ pdn->eeh_mode & EEH_MODE_NOCHECK) {
+ ignored_check++;
+ pr_debug("EEH: Ignored check (%x) for %s %s\n",
+- pdn->eeh_mode, pci_name (dev), dn->full_name);
++ pdn->eeh_mode, eeh_pci_name(dev), dn->full_name);
+ return 0;
+ }
+
+@@ -515,7 +515,7 @@ int eeh_dn_check_failure(struct device_node *dn, struct pci_dev *dev)
+ printk (KERN_ERR "EEH: %d reads ignored for recovering device at "
+ "location=%s driver=%s pci addr=%s\n",
+ pdn->eeh_check_count, location,
+- dev->driver->name, pci_name(dev));
++ dev->driver->name, eeh_pci_name(dev));
+ printk (KERN_ERR "EEH: Might be infinite loop in %s driver\n",
+ dev->driver->name);
+ dump_stack();
+diff --git a/arch/powerpc/platforms/pseries/eeh_driver.c b/arch/powerpc/platforms/pseries/eeh_driver.c
+index 0e8db67..52c4b40 100644
+--- a/arch/powerpc/platforms/pseries/eeh_driver.c
++++ b/arch/powerpc/platforms/pseries/eeh_driver.c
+@@ -353,7 +353,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
+ location = location ? location : "unknown";
+ printk(KERN_ERR "EEH: Error: Cannot find partition endpoint "
+ "for location=%s pci addr=%s\n",
+- location, pci_name(event->dev));
++ location, eeh_pci_name(event->dev));
+ return NULL;
+ }
+
+@@ -384,7 +384,7 @@ struct pci_dn * handle_eeh_events (struct eeh_event *event)
+ pci_str = pci_name (frozen_pdn->pcidev);
+ drv_str = pcid_name (frozen_pdn->pcidev);
+ } else {
+- pci_str = pci_name (event->dev);
++ pci_str = eeh_pci_name(event->dev);
+ drv_str = pcid_name (event->dev);
+ }
+
+diff --git a/arch/powerpc/platforms/pseries/eeh_event.c b/arch/powerpc/platforms/pseries/eeh_event.c
+index ddb80f5..ec5df8f 100644
+--- a/arch/powerpc/platforms/pseries/eeh_event.c
++++ b/arch/powerpc/platforms/pseries/eeh_event.c
+@@ -80,7 +80,7 @@ static int eeh_event_handler(void * dummy)
+ eeh_mark_slot(event->dn, EEH_MODE_RECOVERING);
+
+ printk(KERN_INFO "EEH: Detected PCI bus error on device %s\n",
+- pci_name(event->dev));
++ eeh_pci_name(event->dev));
+
+ pdn = handle_eeh_events(event);
+
+diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
+index f23961a..258ba88 100644
+--- a/arch/s390/include/asm/cputime.h
++++ b/arch/s390/include/asm/cputime.h
+@@ -183,6 +183,7 @@ struct s390_idle_data {
+ unsigned long long idle_count;
+ unsigned long long idle_enter;
+ unsigned long long idle_time;
++ int nohz_delay;
+ };
+
+ DECLARE_PER_CPU(struct s390_idle_data, s390_idle);
+@@ -198,4 +199,11 @@ static inline void s390_idle_check(void)
+ vtime_start_cpu();
+ }
+
++static inline int s390_nohz_delay(int cpu)
++{
++ return per_cpu(s390_idle, cpu).nohz_delay != 0;
++}
++
++#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
++
+ #endif /* _S390_CPUTIME_H */
+diff --git a/arch/s390/kernel/s390_ext.c b/arch/s390/kernel/s390_ext.c
+index 0de305b..59618bc 100644
+--- a/arch/s390/kernel/s390_ext.c
++++ b/arch/s390/kernel/s390_ext.c
+@@ -126,6 +126,8 @@ void __irq_entry do_extint(struct pt_regs *regs, unsigned short code)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+ kstat_cpu(smp_processor_id()).irqs[EXTERNAL_INTERRUPT]++;
++ if (code != 0x1004)
++ __get_cpu_var(s390_idle).nohz_delay = 1;
+ index = ext_hash(code);
+ for (p = ext_int_hash[index]; p; p = p->next) {
+ if (likely(p->code == code))
+diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c
+index 34162a0..68e1ecf 100644
+--- a/arch/s390/kernel/time.c
++++ b/arch/s390/kernel/time.c
+@@ -214,7 +214,8 @@ struct clocksource * __init clocksource_default_clock(void)
+ return &clocksource_tod;
+ }
+
+-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
++void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
++ u32 mult)
+ {
+ if (clock != &clocksource_tod)
+ return;
+diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
+index c41bb0d..b59a812 100644
+--- a/arch/s390/kernel/vtime.c
++++ b/arch/s390/kernel/vtime.c
+@@ -167,6 +167,8 @@ void vtime_stop_cpu(void)
+ /* Wait for external, I/O or machine check interrupt. */
+ psw.mask = psw_kernel_bits | PSW_MASK_WAIT | PSW_MASK_IO | PSW_MASK_EXT;
+
++ idle->nohz_delay = 0;
++
+ /* Check if the CPU timer needs to be reprogrammed. */
+ if (vq->do_spt) {
+ __u64 vmax = VTIMER_MAX_SLICE;
+diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
+index ee1931b..5af5051 100644
+--- a/arch/x86/include/asm/cmpxchg_32.h
++++ b/arch/x86/include/asm/cmpxchg_32.h
+@@ -34,12 +34,12 @@ static inline void __set_64bit(unsigned long long *ptr,
+ unsigned int low, unsigned int high)
+ {
+ asm volatile("\n1:\t"
+- "movl (%0), %%eax\n\t"
+- "movl 4(%0), %%edx\n\t"
+- LOCK_PREFIX "cmpxchg8b (%0)\n\t"
++ "movl (%1), %%eax\n\t"
++ "movl 4(%1), %%edx\n\t"
++ LOCK_PREFIX "cmpxchg8b %0\n\t"
+ "jnz 1b"
+- : /* no outputs */
+- : "D"(ptr),
++ : "=m"(*ptr)
++ : "D" (ptr),
+ "b"(low),
+ "c"(high)
+ : "ax", "dx", "memory");
+@@ -82,20 +82,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+ switch (size) {
+ case 1:
+ asm volatile("xchgb %b0,%1"
+- : "=q" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=q" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 2:
+ asm volatile("xchgw %w0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 4:
+ asm volatile("xchgl %0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ }
+@@ -139,21 +139,21 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile(LOCK_PREFIX "cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgl %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -172,21 +172,21 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("lock; cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("lock; cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("lock; cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgl %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -200,21 +200,21 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgl %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -226,11 +226,10 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr,
+ unsigned long long new)
+ {
+ unsigned long long prev;
+- asm volatile(LOCK_PREFIX "cmpxchg8b %3"
+- : "=A"(prev)
++ asm volatile(LOCK_PREFIX "cmpxchg8b %1"
++ : "=A"(prev), "+m" (*__xg(ptr))
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+- "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+@@ -241,11 +240,10 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
+ unsigned long long new)
+ {
+ unsigned long long prev;
+- asm volatile("cmpxchg8b %3"
+- : "=A"(prev)
++ asm volatile("cmpxchg8b %1"
++ : "=A"(prev), "+m"(*__xg(ptr))
+ : "b"((unsigned long)new),
+ "c"((unsigned long)(new >> 32)),
+- "m"(*__xg(ptr)),
+ "0"(old)
+ : "memory");
+ return prev;
+diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
+index 52de72e..1871cb0 100644
+--- a/arch/x86/include/asm/cmpxchg_64.h
++++ b/arch/x86/include/asm/cmpxchg_64.h
+@@ -26,26 +26,26 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
+ switch (size) {
+ case 1:
+ asm volatile("xchgb %b0,%1"
+- : "=q" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=q" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 2:
+ asm volatile("xchgw %w0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 4:
+ asm volatile("xchgl %k0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ case 8:
+ asm volatile("xchgq %0,%1"
+- : "=r" (x)
+- : "m" (*__xg(ptr)), "0" (x)
++ : "=r" (x), "+m" (*__xg(ptr))
++ : "0" (x)
+ : "memory");
+ break;
+ }
+@@ -66,27 +66,27 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgl %k2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+- asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile(LOCK_PREFIX "cmpxchgq %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -105,21 +105,27 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("lock; cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("lock; cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("lock; cmpxchgl %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("lock; cmpxchgl %k2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
++ : "memory");
++ return prev;
++ case 8:
++ asm volatile("lock; cmpxchgq %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+@@ -133,27 +139,27 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
+ unsigned long prev;
+ switch (size) {
+ case 1:
+- asm volatile("cmpxchgb %b1,%2"
+- : "=a"(prev)
+- : "q"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgb %b2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "q"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 2:
+- asm volatile("cmpxchgw %w1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgw %w2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 4:
+- asm volatile("cmpxchgl %k1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgl %k2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ case 8:
+- asm volatile("cmpxchgq %1,%2"
+- : "=a"(prev)
+- : "r"(new), "m"(*__xg(ptr)), "0"(old)
++ asm volatile("cmpxchgq %2,%1"
++ : "=a"(prev), "+m"(*__xg(ptr))
++ : "r"(new), "0"(old)
+ : "memory");
+ return prev;
+ }
+diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
+index dc4f486..9f9fded 100644
+--- a/arch/x86/kernel/apic/io_apic.c
++++ b/arch/x86/kernel/apic/io_apic.c
+@@ -1484,7 +1484,7 @@ static struct {
+
+ static void __init setup_IO_APIC_irqs(void)
+ {
+- int apic_id = 0, pin, idx, irq;
++ int apic_id, pin, idx, irq;
+ int notcon = 0;
+ struct irq_desc *desc;
+ struct irq_cfg *cfg;
+@@ -1492,14 +1492,7 @@ static void __init setup_IO_APIC_irqs(void)
+
+ apic_printk(APIC_VERBOSE, KERN_DEBUG "init IO_APIC IRQs\n");
+
+-#ifdef CONFIG_ACPI
+- if (!acpi_disabled && acpi_ioapic) {
+- apic_id = mp_find_ioapic(0);
+- if (apic_id < 0)
+- apic_id = 0;
+- }
+-#endif
+-
++ for (apic_id = 0; apic_id < nr_ioapics; apic_id++)
+ for (pin = 0; pin < nr_ioapic_registers[apic_id]; pin++) {
+ idx = find_irq_entry(apic_id, pin, mp_INT);
+ if (idx == -1) {
+@@ -1521,6 +1514,9 @@ static void __init setup_IO_APIC_irqs(void)
+
+ irq = pin_2_irq(idx, apic_id, pin);
+
++ if ((apic_id > 0) && (irq > 16))
++ continue;
++
+ /*
+ * Skip the timer IRQ if there's a quirk handler
+ * installed and if it returns 1:
+@@ -4083,27 +4079,23 @@ int acpi_get_override_irq(int bus_irq, int *trigger, int *polarity)
+ #ifdef CONFIG_SMP
+ void __init setup_ioapic_dest(void)
+ {
+- int pin, ioapic = 0, irq, irq_entry;
++ int pin, ioapic, irq, irq_entry;
+ struct irq_desc *desc;
+ const struct cpumask *mask;
+
+ if (skip_ioapic_setup == 1)
+ return;
+
+-#ifdef CONFIG_ACPI
+- if (!acpi_disabled && acpi_ioapic) {
+- ioapic = mp_find_ioapic(0);
+- if (ioapic < 0)
+- ioapic = 0;
+- }
+-#endif
+-
++ for (ioapic = 0; ioapic < nr_ioapics; ioapic++)
+ for (pin = 0; pin < nr_ioapic_registers[ioapic]; pin++) {
+ irq_entry = find_irq_entry(ioapic, pin, mp_INT);
+ if (irq_entry == -1)
+ continue;
+ irq = pin_2_irq(irq_entry, ioapic, pin);
+
++ if ((ioapic > 0) && (irq > 16))
++ continue;
++
+ desc = irq_to_desc(irq);
+
+ /*
+diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c
+index 1cbed97..9580152 100644
+--- a/arch/x86/kernel/cpu/vmware.c
++++ b/arch/x86/kernel/cpu/vmware.c
+@@ -22,6 +22,7 @@
+ */
+
+ #include <linux/dmi.h>
++#include <linux/jiffies.h>
+ #include <asm/div64.h>
+ #include <asm/vmware.h>
+ #include <asm/x86_init.h>
+@@ -50,7 +51,7 @@ static inline int __vmware_platform(void)
+
+ static unsigned long vmware_get_tsc_khz(void)
+ {
+- uint64_t tsc_hz;
++ uint64_t tsc_hz, lpj;
+ uint32_t eax, ebx, ecx, edx;
+
+ VMWARE_PORT(GETHZ, eax, ebx, ecx, edx);
+@@ -61,6 +62,13 @@ static unsigned long vmware_get_tsc_khz(void)
+ printk(KERN_INFO "TSC freq read from hypervisor : %lu.%03lu MHz\n",
+ (unsigned long) tsc_hz / 1000,
+ (unsigned long) tsc_hz % 1000);
++
++ if (!preset_lpj) {
++ lpj = ((u64)tsc_hz * 1000);
++ do_div(lpj, HZ);
++ preset_lpj = lpj;
++ }
++
+ return tsc_hz;
+ }
+
+diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
+index 8cb4974..62f39d7 100644
+--- a/arch/x86/kernel/vsyscall_64.c
++++ b/arch/x86/kernel/vsyscall_64.c
+@@ -73,7 +73,8 @@ void update_vsyscall_tz(void)
+ write_sequnlock_irqrestore(&vsyscall_gtod_data.lock, flags);
+ }
+
+-void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
++void update_vsyscall(struct timespec *wall_time, struct clocksource *clock,
++ u32 mult)
+ {
+ unsigned long flags;
+
+@@ -82,7 +83,7 @@ void update_vsyscall(struct timespec *wall_time, struct clocksource *clock)
+ vsyscall_gtod_data.clock.vread = clock->vread;
+ vsyscall_gtod_data.clock.cycle_last = clock->cycle_last;
+ vsyscall_gtod_data.clock.mask = clock->mask;
+- vsyscall_gtod_data.clock.mult = clock->mult;
++ vsyscall_gtod_data.clock.mult = mult;
+ vsyscall_gtod_data.clock.shift = clock->shift;
+ vsyscall_gtod_data.wall_time_sec = wall_time->tv_sec;
+ vsyscall_gtod_data.wall_time_nsec = wall_time->tv_nsec;
+diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c
+index 5a4398a..7d095ad 100644
+--- a/arch/x86/mm/init_64.c
++++ b/arch/x86/mm/init_64.c
+@@ -49,6 +49,7 @@
+ #include <asm/numa.h>
+ #include <asm/cacheflush.h>
+ #include <asm/init.h>
++#include <linux/bootmem.h>
+
+ static unsigned long dma_reserve __initdata;
+
+@@ -615,6 +616,21 @@ void __init paging_init(void)
+ */
+ #ifdef CONFIG_MEMORY_HOTPLUG
+ /*
++ * After memory hotplug the variables max_pfn, max_low_pfn and high_memory need
++ * updating.
++ */
++static void update_end_of_memory_vars(u64 start, u64 size)
++{
++ unsigned long end_pfn = PFN_UP(start + size);
++
++ if (end_pfn > max_pfn) {
++ max_pfn = end_pfn;
++ max_low_pfn = end_pfn;
++ high_memory = (void *)__va(max_pfn * PAGE_SIZE - 1) + 1;
++ }
++}
++
++/*
+ * Memory is added always to NORMAL zone. This means you will never get
+ * additional DMA/DMA32 memory.
+ */
+@@ -633,6 +649,9 @@ int arch_add_memory(int nid, u64 start, u64 size)
+ ret = __add_pages(nid, zone, start_pfn, nr_pages);
+ WARN_ON_ONCE(ret);
+
++ /* update max_pfn, max_low_pfn and high_memory */
++ update_end_of_memory_vars(start, size);
++
+ return ret;
+ }
+ EXPORT_SYMBOL_GPL(arch_add_memory);
+diff --git a/crypto/testmgr.c b/crypto/testmgr.c
+index 6d5b746..2a4106d 100644
+--- a/crypto/testmgr.c
++++ b/crypto/testmgr.c
+@@ -1477,9 +1477,54 @@ static int alg_test_cprng(const struct alg_test_desc *desc, const char *driver,
+ return err;
+ }
+
++static int alg_test_null(const struct alg_test_desc *desc,
++ const char *driver, u32 type, u32 mask)
++{
++ return 0;
++}
++
+ /* Please keep this list sorted by algorithm name. */
+ static const struct alg_test_desc alg_test_descs[] = {
+ {
++ .alg = "__driver-cbc-aes-aesni",
++ .test = alg_test_null,
++ .suite = {
++ .cipher = {
++ .enc = {
++ .vecs = NULL,
++ .count = 0
++ },
++ .dec = {
++ .vecs = NULL,
++ .count = 0
++ }
++ }
++ }
++ }, {
++ .alg = "__driver-ecb-aes-aesni",
++ .test = alg_test_null,
++ .suite = {
++ .cipher = {
++ .enc = {
++ .vecs = NULL,
++ .count = 0
++ },
++ .dec = {
++ .vecs = NULL,
++ .count = 0
++ }
++ }
++ }
++ }, {
++ .alg = "__ghash-pclmulqdqni",
++ .test = alg_test_null,
++ .suite = {
++ .hash = {
++ .vecs = NULL,
++ .count = 0
++ }
++ }
++ }, {
+ .alg = "ansi_cprng",
+ .test = alg_test_cprng,
+ .fips_allowed = 1,
+@@ -1623,6 +1668,30 @@ static const struct alg_test_desc alg_test_descs[] = {
+ }
+ }
+ }, {
++ .alg = "cryptd(__driver-ecb-aes-aesni)",
++ .test = alg_test_null,
++ .suite = {
++ .cipher = {
++ .enc = {
++ .vecs = NULL,
++ .count = 0
++ },
++ .dec = {
++ .vecs = NULL,
++ .count = 0
++ }
++ }
++ }
++ }, {
++ .alg = "cryptd(__ghash-pclmulqdqni)",
++ .test = alg_test_null,
++ .suite = {
++ .hash = {
++ .vecs = NULL,
++ .count = 0
++ }
++ }
++ }, {
+ .alg = "ctr(aes)",
+ .test = alg_test_skcipher,
+ .fips_allowed = 1,
+@@ -1669,6 +1738,21 @@ static const struct alg_test_desc alg_test_descs[] = {
+ }
+ }
+ }, {
++ .alg = "ecb(__aes-aesni)",
++ .test = alg_test_null,
++ .suite = {
++ .cipher = {
++ .enc = {
++ .vecs = NULL,
++ .count = 0
++ },
++ .dec = {
++ .vecs = NULL,
++ .count = 0
++ }
++ }
++ }
++ }, {
+ .alg = "ecb(aes)",
+ .test = alg_test_skcipher,
+ .fips_allowed = 1,
+diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c
+index 8ba0ed0..40d395e 100644
+--- a/drivers/acpi/processor_perflib.c
++++ b/drivers/acpi/processor_perflib.c
+@@ -356,7 +356,11 @@ static int acpi_processor_get_performance_info(struct acpi_processor *pr)
+ if (result)
+ goto update_bios;
+
+- return 0;
++ /* We need to call _PPC once when cpufreq starts */
++ if (ignore_ppc != 1)
++ result = acpi_processor_get_platform_limit(pr);
++
++ return result;
+
+ /*
+ * Having _PPC but missing frequencies (_PSS, _PCT) is a very good hint that
+diff --git a/drivers/ata/ata_piix.c b/drivers/ata/ata_piix.c
+index 4f94e22..c33591d 100644
+--- a/drivers/ata/ata_piix.c
++++ b/drivers/ata/ata_piix.c
+@@ -157,6 +157,7 @@ struct piix_map_db {
+ struct piix_host_priv {
+ const int *map;
+ u32 saved_iocfg;
++ spinlock_t sidpr_lock; /* FIXME: remove once locking in EH is fixed */
+ void __iomem *sidpr;
+ };
+
+@@ -948,12 +949,15 @@ static int piix_sidpr_scr_read(struct ata_link *link,
+ unsigned int reg, u32 *val)
+ {
+ struct piix_host_priv *hpriv = link->ap->host->private_data;
++ unsigned long flags;
+
+ if (reg >= ARRAY_SIZE(piix_sidx_map))
+ return -EINVAL;
+
++ spin_lock_irqsave(&hpriv->sidpr_lock, flags);
+ piix_sidpr_sel(link, reg);
+ *val = ioread32(hpriv->sidpr + PIIX_SIDPR_DATA);
++ spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
+ return 0;
+ }
+
+@@ -961,12 +965,15 @@ static int piix_sidpr_scr_write(struct ata_link *link,
+ unsigned int reg, u32 val)
+ {
+ struct piix_host_priv *hpriv = link->ap->host->private_data;
++ unsigned long flags;
+
+ if (reg >= ARRAY_SIZE(piix_sidx_map))
+ return -EINVAL;
+
++ spin_lock_irqsave(&hpriv->sidpr_lock, flags);
+ piix_sidpr_sel(link, reg);
+ iowrite32(val, hpriv->sidpr + PIIX_SIDPR_DATA);
++ spin_unlock_irqrestore(&hpriv->sidpr_lock, flags);
+ return 0;
+ }
+
+@@ -1555,6 +1562,7 @@ static int __devinit piix_init_one(struct pci_dev *pdev,
+ hpriv = devm_kzalloc(dev, sizeof(*hpriv), GFP_KERNEL);
+ if (!hpriv)
+ return -ENOMEM;
++ spin_lock_init(&hpriv->sidpr_lock);
+
+ /* Save IOCFG, this will be used for cable detection, quirk
+ * detection and restoration on detach. This is necessary
+diff --git a/drivers/atm/solos-pci.c b/drivers/atm/solos-pci.c
+index c5f5186..a73f102 100644
+--- a/drivers/atm/solos-pci.c
++++ b/drivers/atm/solos-pci.c
+@@ -774,7 +774,8 @@ static struct atm_vcc *find_vcc(struct atm_dev *dev, short vpi, int vci)
+ sk_for_each(s, node, head) {
+ vcc = atm_sk(s);
+ if (vcc->dev == dev && vcc->vci == vci &&
+- vcc->vpi == vpi && vcc->qos.rxtp.traffic_class != ATM_NONE)
++ vcc->vpi == vpi && vcc->qos.rxtp.traffic_class != ATM_NONE &&
++ test_bit(ATM_VF_READY, &vcc->flags))
+ goto out;
+ }
+ vcc = NULL;
+@@ -900,6 +901,10 @@ static void pclose(struct atm_vcc *vcc)
+ clear_bit(ATM_VF_ADDR, &vcc->flags);
+ clear_bit(ATM_VF_READY, &vcc->flags);
+
++ /* Hold up vcc_destroy_socket() (our caller) until solos_bh() in the
++ tasklet has finished processing any incoming packets (and, more to
++ the point, using the vcc pointer). */
++ tasklet_unlock_wait(&card->tlet);
+ return;
+ }
+
+diff --git a/drivers/block/loop.c b/drivers/block/loop.c
+index bd112c8..1c21a3f 100644
+--- a/drivers/block/loop.c
++++ b/drivers/block/loop.c
+@@ -238,6 +238,8 @@ static int do_lo_send_aops(struct loop_device *lo, struct bio_vec *bvec,
+ if (ret)
+ goto fail;
+
++ file_update_time(file);
++
+ transfer_result = lo_do_transfer(lo, WRITE, page, offset,
+ bvec->bv_page, bv_offs, size, IV);
+ copied = size;
+diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
+index 1be7631..0e9c564 100644
+--- a/drivers/bluetooth/btusb.c
++++ b/drivers/bluetooth/btusb.c
+@@ -59,6 +59,9 @@ static struct usb_device_id btusb_table[] = {
+ /* Generic Bluetooth USB device */
+ { USB_DEVICE_INFO(0xe0, 0x01, 0x01) },
+
++ /* Apple iMac11,1 */
++ { USB_DEVICE(0x05ac, 0x8215) },
++
+ /* AVM BlueFRITZ! USB v2.0 */
+ { USB_DEVICE(0x057c, 0x3800) },
+
+diff --git a/drivers/char/nvram.c b/drivers/char/nvram.c
+index 88cee40..71f0d72 100644
+--- a/drivers/char/nvram.c
++++ b/drivers/char/nvram.c
+@@ -265,10 +265,16 @@ static ssize_t nvram_write(struct file *file, const char __user *buf,
+ unsigned char contents[NVRAM_BYTES];
+ unsigned i = *ppos;
+ unsigned char *tmp;
+- int len;
+
+- len = (NVRAM_BYTES - i) < count ? (NVRAM_BYTES - i) : count;
+- if (copy_from_user(contents, buf, len))
++ if (i >= NVRAM_BYTES)
++ return 0; /* Past EOF */
++
++ if (count > NVRAM_BYTES - i)
++ count = NVRAM_BYTES - i;
++ if (count > NVRAM_BYTES)
++ return -EFAULT; /* Can't happen, but prove it to gcc */
++
++ if (copy_from_user(contents, buf, count))
+ return -EFAULT;
+
+ spin_lock_irq(&rtc_lock);
+@@ -276,7 +282,7 @@ static ssize_t nvram_write(struct file *file, const char __user *buf,
+ if (!__nvram_check_checksum())
+ goto checksum_err;
+
+- for (tmp = contents; count-- > 0 && i < NVRAM_BYTES; ++i, ++tmp)
++ for (tmp = contents; count--; ++i, ++tmp)
+ __nvram_write_byte(*tmp, i);
+
+ __nvram_set_checksum();
+diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c
+index 259ec21..1097dec 100644
+diff --git a/drivers/gpu/drm/i915/dvo_tfp410.c b/drivers/gpu/drm/i915/dvo_tfp410.c
+index 9ecc907..16dce84 100644
+diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c
+index 64207df..2de76cc 100644
+--- a/drivers/ide/ide-cd.c
++++ b/drivers/ide/ide-cd.c
+@@ -506,15 +506,22 @@ int ide_cd_queue_pc(ide_drive_t *drive, const unsigned char *cmd,
+ return (flags & REQ_FAILED) ? -EIO : 0;
+ }
+
+-static void ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
++/*
++ * returns true if rq has been completed
++ */
++static bool ide_cd_error_cmd(ide_drive_t *drive, struct ide_cmd *cmd)
+ {
+ unsigned int nr_bytes = cmd->nbytes - cmd->nleft;
+
+ if (cmd->tf_flags & IDE_TFLAG_WRITE)
+ nr_bytes -= cmd->last_xfer_len;
+
+- if (nr_bytes > 0)
++ if (nr_bytes > 0) {
+ ide_complete_rq(drive, 0, nr_bytes);
++ return true;
++ }
++
++ return false;
+ }
+
+ static ide_startstop_t cdrom_newpc_intr(ide_drive_t *drive)
+@@ -679,7 +686,8 @@ out_end:
+ }
+
+ if (uptodate == 0 && rq->bio)
+- ide_cd_error_cmd(drive, cmd);
++ if (ide_cd_error_cmd(drive, cmd))
++ return ide_stopped;
+
+ /* make sure it's fully ended */
+ if (blk_fs_request(rq) == 0) {
+diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
+index a5e5f2f..4783153 100644
+--- a/drivers/md/bitmap.c
++++ b/drivers/md/bitmap.c
+@@ -1317,7 +1317,8 @@ void bitmap_endwrite(struct bitmap *bitmap, sector_t offset, unsigned long secto
+ {
+ if (!bitmap) return;
+ if (behind) {
+- atomic_dec(&bitmap->behind_writes);
++ if (atomic_dec_and_test(&bitmap->behind_writes))
++ wake_up(&bitmap->behind_wait);
+ PRINTK(KERN_DEBUG "dec write-behind count %d/%d\n",
+ atomic_read(&bitmap->behind_writes), bitmap->max_write_behind);
+ }
+@@ -1629,6 +1630,7 @@ int bitmap_create(mddev_t *mddev)
+ atomic_set(&bitmap->pending_writes, 0);
+ init_waitqueue_head(&bitmap->write_wait);
+ init_waitqueue_head(&bitmap->overflow_wait);
++ init_waitqueue_head(&bitmap->behind_wait);
+
+ bitmap->mddev = mddev;
+
+diff --git a/drivers/md/bitmap.h b/drivers/md/bitmap.h
+index 7e38d13..86950bc 100644
+--- a/drivers/md/bitmap.h
++++ b/drivers/md/bitmap.h
+@@ -254,6 +254,9 @@ struct bitmap {
+ wait_queue_head_t write_wait;
+ wait_queue_head_t overflow_wait;
+
++#ifndef __GENKSYMS__
++ wait_queue_head_t behind_wait;
++#endif
+ };
+
+ /* the bitmap API */
+diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
+index 5ccad28..791e195 100644
+--- a/drivers/md/raid1.c
++++ b/drivers/md/raid1.c
+@@ -845,6 +845,15 @@ static int make_request(struct request_queue *q, struct bio * bio)
+ }
+ mirror = conf->mirrors + rdisk;
+
++ if (test_bit(WriteMostly, &mirror->rdev->flags) &&
++ bitmap) {
++ /* Reading from a write-mostly device must
++ * take care not to over-take any writes
++ * that are 'behind'
++ */
++ wait_event(bitmap->behind_wait,
++ atomic_read(&bitmap->behind_writes) == 0);
++ }
+ r1_bio->read_disk = rdisk;
+
+ read_bio = bio_clone(bio, GFP_NOIO);
+@@ -922,9 +931,13 @@ static int make_request(struct request_queue *q, struct bio * bio)
+ set_bit(R1BIO_Degraded, &r1_bio->state);
+ }
+
+- /* do behind I/O ? */
++ /* do behind I/O ?
++ * Not if there are too many, or cannot allocate memory,
++ * or a reader on WriteMostly is waiting for behind writes
++ * to flush */
+ if (bitmap &&
+ atomic_read(&bitmap->behind_writes) < bitmap->max_write_behind &&
++ !waitqueue_active(&bitmap->behind_wait) &&
+ (behind_pages = alloc_behind_pages(bio)) != NULL)
+ set_bit(R1BIO_BehindIO, &r1_bio->state);
+
+@@ -2105,15 +2118,13 @@ static int stop(mddev_t *mddev)
+ {
+ conf_t *conf = mddev->private;
+ struct bitmap *bitmap = mddev->bitmap;
+- int behind_wait = 0;
+
+ /* wait for behind writes to complete */
+- while (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
+- behind_wait++;
+- printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop (%d)\n", mdname(mddev), behind_wait);
+- set_current_state(TASK_UNINTERRUPTIBLE);
+- schedule_timeout(HZ); /* wait a second */
++ if (bitmap && atomic_read(&bitmap->behind_writes) > 0) {
++ printk(KERN_INFO "raid1: behind writes in progress on device %s, waiting to stop.\n", mdname(mddev));
+ /* need to kick something here to make sure I/O goes? */
++ wait_event(bitmap->behind_wait,
++ atomic_read(&bitmap->behind_writes) == 0);
+ }
+
+ raise_barrier(conf);
+diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
+index 6c2a35b..1b4e232 100644
+--- a/drivers/md/raid10.c
++++ b/drivers/md/raid10.c
+@@ -824,11 +824,29 @@ static int make_request(struct request_queue *q, struct bio * bio)
+ */
+ bp = bio_split(bio,
+ chunk_sects - (bio->bi_sector & (chunk_sects - 1)) );
++
++ /* Each of these 'make_request' calls will call 'wait_barrier'.
++ * If the first succeeds but the second blocks due to the resync
++ * thread raising the barrier, we will deadlock because the
++ * IO to the underlying device will be queued in generic_make_request
++ * and will never complete, so will never reduce nr_pending.
++ * So increment nr_waiting here so no new raise_barriers will
++ * succeed, and so the second wait_barrier cannot block.
++ */
++ spin_lock_irq(&conf->resync_lock);
++ conf->nr_waiting++;
++ spin_unlock_irq(&conf->resync_lock);
++
+ if (make_request(q, &bp->bio1))
+ generic_make_request(&bp->bio1);
+ if (make_request(q, &bp->bio2))
+ generic_make_request(&bp->bio2);
+
++ spin_lock_irq(&conf->resync_lock);
++ conf->nr_waiting--;
++ wake_up(&conf->wait_barrier);
++ spin_unlock_irq(&conf->resync_lock);
++
+ bio_pair_release(bp);
+ return 0;
+ bad_map:
+diff --git a/drivers/net/smsc911x.c b/drivers/net/smsc911x.c
+index f9cdcbc..b496fa6 100644
+--- a/drivers/net/smsc911x.c
++++ b/drivers/net/smsc911x.c
+@@ -85,8 +85,7 @@ struct smsc911x_data {
+ */
+ spinlock_t mac_lock;
+
+- /* spinlock to ensure 16-bit accesses are serialised.
+- * unused with a 32-bit bus */
++ /* spinlock to ensure register accesses are serialised */
+ spinlock_t dev_lock;
+
+ struct phy_device *phy_dev;
+@@ -119,37 +118,33 @@ struct smsc911x_data {
+ unsigned int hashlo;
+ };
+
+-/* The 16-bit access functions are significantly slower, due to the locking
+- * necessary. If your bus hardware can be configured to do this for you
+- * (in response to a single 32-bit operation from software), you should use
+- * the 32-bit access functions instead. */
+-
+-static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
++static inline u32 __smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
+ {
+ if (pdata->config.flags & SMSC911X_USE_32BIT)
+ return readl(pdata->ioaddr + reg);
+
+- if (pdata->config.flags & SMSC911X_USE_16BIT) {
+- u32 data;
+- unsigned long flags;
+-
+- /* these two 16-bit reads must be performed consecutively, so
+- * must not be interrupted by our own ISR (which would start
+- * another read operation) */
+- spin_lock_irqsave(&pdata->dev_lock, flags);
+- data = ((readw(pdata->ioaddr + reg) & 0xFFFF) |
++ if (pdata->config.flags & SMSC911X_USE_16BIT)
++ return ((readw(pdata->ioaddr + reg) & 0xFFFF) |
+ ((readw(pdata->ioaddr + reg + 2) & 0xFFFF) << 16));
+- spin_unlock_irqrestore(&pdata->dev_lock, flags);
+-
+- return data;
+- }
+
+ BUG();
+ return 0;
+ }
+
+-static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
+- u32 val)
++static inline u32 smsc911x_reg_read(struct smsc911x_data *pdata, u32 reg)
++{
++ u32 data;
++ unsigned long flags;
++
++ spin_lock_irqsave(&pdata->dev_lock, flags);
++ data = __smsc911x_reg_read(pdata, reg);
++ spin_unlock_irqrestore(&pdata->dev_lock, flags);
++
++ return data;
++}
++
++static inline void __smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
++ u32 val)
+ {
+ if (pdata->config.flags & SMSC911X_USE_32BIT) {
+ writel(val, pdata->ioaddr + reg);
+@@ -157,44 +152,54 @@ static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
+ }
+
+ if (pdata->config.flags & SMSC911X_USE_16BIT) {
+- unsigned long flags;
+-
+- /* these two 16-bit writes must be performed consecutively, so
+- * must not be interrupted by our own ISR (which would start
+- * another read operation) */
+- spin_lock_irqsave(&pdata->dev_lock, flags);
+ writew(val & 0xFFFF, pdata->ioaddr + reg);
+ writew((val >> 16) & 0xFFFF, pdata->ioaddr + reg + 2);
+- spin_unlock_irqrestore(&pdata->dev_lock, flags);
+ return;
+ }
+
+ BUG();
+ }
+
++static inline void smsc911x_reg_write(struct smsc911x_data *pdata, u32 reg,
++ u32 val)
++{
++ unsigned long flags;
++
++ spin_lock_irqsave(&pdata->dev_lock, flags);
++ __smsc911x_reg_write(pdata, reg, val);
++ spin_unlock_irqrestore(&pdata->dev_lock, flags);
++}
++
+ /* Writes a packet to the TX_DATA_FIFO */
+ static inline void
+ smsc911x_tx_writefifo(struct smsc911x_data *pdata, unsigned int *buf,
+ unsigned int wordcount)
+ {
++ unsigned long flags;
++
++ spin_lock_irqsave(&pdata->dev_lock, flags);
++
+ if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+ while (wordcount--)
+- smsc911x_reg_write(pdata, TX_DATA_FIFO, swab32(*buf++));
+- return;
++ __smsc911x_reg_write(pdata, TX_DATA_FIFO,
++ swab32(*buf++));
++ goto out;
+ }
+
+ if (pdata->config.flags & SMSC911X_USE_32BIT) {
+ writesl(pdata->ioaddr + TX_DATA_FIFO, buf, wordcount);
+- return;
++ goto out;
+ }
+
+ if (pdata->config.flags & SMSC911X_USE_16BIT) {
+ while (wordcount--)
+- smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++);
+- return;
++ __smsc911x_reg_write(pdata, TX_DATA_FIFO, *buf++);
++ goto out;
+ }
+
+ BUG();
++out:
++ spin_unlock_irqrestore(&pdata->dev_lock, flags);
+ }
+
+ /* Reads a packet out of the RX_DATA_FIFO */
+@@ -202,24 +207,31 @@ static inline void
+ smsc911x_rx_readfifo(struct smsc911x_data *pdata, unsigned int *buf,
+ unsigned int wordcount)
+ {
++ unsigned long flags;
++
++ spin_lock_irqsave(&pdata->dev_lock, flags);
++
+ if (pdata->config.flags & SMSC911X_SWAP_FIFO) {
+ while (wordcount--)
+- *buf++ = swab32(smsc911x_reg_read(pdata, RX_DATA_FIFO));
+- return;
++ *buf++ = swab32(__smsc911x_reg_read(pdata,
++ RX_DATA_FIFO));
++ goto out;
+ }
+
+ if (pdata->config.flags & SMSC911X_USE_32BIT) {
+ readsl(pdata->ioaddr + RX_DATA_FIFO, buf, wordcount);
+- return;
++ goto out;
+ }
+
+ if (pdata->config.flags & SMSC911X_USE_16BIT) {
+ while (wordcount--)
+- *buf++ = smsc911x_reg_read(pdata, RX_DATA_FIFO);
+- return;
++ *buf++ = __smsc911x_reg_read(pdata, RX_DATA_FIFO);
++ goto out;
+ }
+
+ BUG();
++out:
++ spin_unlock_irqrestore(&pdata->dev_lock, flags);
+ }
+
+ /* waits for MAC not busy, with timeout. Only called by smsc911x_mac_read
+diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
+index baa051d..1a11d95 100644
+--- a/drivers/net/xen-netfront.c
++++ b/drivers/net/xen-netfront.c
+@@ -1619,6 +1619,7 @@ static void backend_changed(struct xenbus_device *dev,
+ if (xennet_connect(netdev) != 0)
+ break;
+ xenbus_switch_state(dev, XenbusStateConnected);
++ netif_notify_peers(netdev);
+ break;
+
+ case XenbusStateClosing:
+diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
+index c0de0b9..bf6bd67 100644
+--- a/drivers/pci/quirks.c
++++ b/drivers/pci/quirks.c
+@@ -2084,6 +2084,7 @@ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_ATI, PCI_DEVICE_ID_ATI_RS480, quirk_disabl
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3336, quirk_disable_all_msi);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3351, quirk_disable_all_msi);
+ DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_VT3364, quirk_disable_all_msi);
++DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_DEVICE_ID_VIA_8380_0, quirk_disable_all_msi);
+
+ /* Disable MSI on chipsets that are known to not support it */
+ static void __devinit quirk_disable_msi(struct pci_dev *dev)
+@@ -2356,6 +2357,9 @@ static void __devinit __nv_msi_ht_cap_quirk(struct pci_dev *dev, int all)
+ int pos;
+ int found;
+
++ if (!pci_msi_enabled())
++ return;
++
+ /* check if there is HT MSI cap or enabled on this device */
+ found = ht_check_msi_mapping(dev);
+
+diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
+index 138124f..126f240 100644
+--- a/drivers/s390/cio/cio.c
++++ b/drivers/s390/cio/cio.c
+@@ -618,6 +618,7 @@ void __irq_entry do_IRQ(struct pt_regs *regs)
+ old_regs = set_irq_regs(regs);
+ s390_idle_check();
+ irq_enter();
++ __get_cpu_var(s390_idle).nohz_delay = 1;
+ if (S390_lowcore.int_clock >= S390_lowcore.clock_comparator)
+ /* Serve timer interrupts first. */
+ clock_comparator_work();
+diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c
+index 63b521d..3e89f8e 100644
+--- a/drivers/scsi/aic7xxx/aic79xx_core.c
++++ b/drivers/scsi/aic7xxx/aic79xx_core.c
+@@ -3171,13 +3171,16 @@ ahd_handle_nonpkt_busfree(struct ahd_softc *ahd)
+ tinfo->curr.transport_version = 2;
+ tinfo->goal.transport_version = 2;
+ tinfo->goal.ppr_options = 0;
+- /*
+- * Remove any SCBs in the waiting for selection
+- * queue that may also be for this target so
+- * that command ordering is preserved.
+- */
+- ahd_freeze_devq(ahd, scb);
+- ahd_qinfifo_requeue_tail(ahd, scb);
++ if (scb != NULL) {
++ /*
++ * Remove any SCBs in the waiting
++ * for selection queue that may
++ * also be for this target so that
++ * command ordering is preserved.
++ */
++ ahd_freeze_devq(ahd, scb);
++ ahd_qinfifo_requeue_tail(ahd, scb);
++ }
+ printerror = 0;
+ }
+ } else if (ahd_sent_msg(ahd, AHDMSG_EXT, MSG_EXT_WDTR, FALSE)
+@@ -3194,13 +3197,16 @@ ahd_handle_nonpkt_busfree(struct ahd_softc *ahd)
+ MSG_EXT_WDTR_BUS_8_BIT,
+ AHD_TRANS_CUR|AHD_TRANS_GOAL,
+ /*paused*/TRUE);
+- /*
+- * Remove any SCBs in the waiting for selection
+- * queue that may also be for this target so that
+- * command ordering is preserved.
+- */
+- ahd_freeze_devq(ahd, scb);
+- ahd_qinfifo_requeue_tail(ahd, scb);
++ if (scb != NULL) {
++ /*
++ * Remove any SCBs in the waiting for
++ * selection queue that may also be for
++ * this target so that command ordering
++ * is preserved.
++ */
++ ahd_freeze_devq(ahd, scb);
++ ahd_qinfifo_requeue_tail(ahd, scb);
++ }
+ printerror = 0;
+ } else if (ahd_sent_msg(ahd, AHDMSG_EXT, MSG_EXT_SDTR, FALSE)
+ && ppr_busfree == 0) {
+@@ -3217,13 +3223,16 @@ ahd_handle_nonpkt_busfree(struct ahd_softc *ahd)
+ /*ppr_options*/0,
+ AHD_TRANS_CUR|AHD_TRANS_GOAL,
+ /*paused*/TRUE);
+- /*
+- * Remove any SCBs in the waiting for selection
+- * queue that may also be for this target so that
+- * command ordering is preserved.
+- */
+- ahd_freeze_devq(ahd, scb);
+- ahd_qinfifo_requeue_tail(ahd, scb);
++ if (scb != NULL) {
++ /*
++ * Remove any SCBs in the waiting for
++ * selection queue that may also be for
++ * this target so that command ordering
++ * is preserved.
++ */
++ ahd_freeze_devq(ahd, scb);
++ ahd_qinfifo_requeue_tail(ahd, scb);
++ }
+ printerror = 0;
+ } else if ((ahd->msg_flags & MSG_FLAG_EXPECT_IDE_BUSFREE) != 0
+ && ahd_sent_msg(ahd, AHDMSG_1B,
+@@ -3251,7 +3260,7 @@ ahd_handle_nonpkt_busfree(struct ahd_softc *ahd)
+ * the message phases. We check it last in case we
+ * had to send some other message that caused a busfree.
+ */
+- if (printerror != 0
++ if (scb != NULL && printerror != 0
+ && (lastphase == P_MESGIN || lastphase == P_MESGOUT)
+ && ((ahd->msg_flags & MSG_FLAG_EXPECT_PPR_BUSFREE) != 0)) {
+
+diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
+index bb2c696..2d66fac 100644
+--- a/drivers/scsi/ibmvscsi/ibmvfc.c
++++ b/drivers/scsi/ibmvscsi/ibmvfc.c
+@@ -1969,7 +1969,7 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
+ DECLARE_COMPLETION_ONSTACK(comp);
+ int wait;
+ unsigned long flags;
+- signed long timeout = init_timeout * HZ;
++ signed long timeout = IBMVFC_ABORT_WAIT_TIMEOUT * HZ;
+
+ ENTER;
+ do {
+@@ -2720,6 +2720,7 @@ static struct ibmvfc_async_crq *ibmvfc_next_async_crq(struct ibmvfc_host *vhost)
+ if (crq->valid & 0x80) {
+ if (++async_crq->cur == async_crq->size)
+ async_crq->cur = 0;
++ rmb();
+ } else
+ crq = NULL;
+
+@@ -2742,6 +2743,7 @@ static struct ibmvfc_crq *ibmvfc_next_crq(struct ibmvfc_host *vhost)
+ if (crq->valid & 0x80) {
+ if (++queue->cur == queue->size)
+ queue->cur = 0;
++ rmb();
+ } else
+ crq = NULL;
+
+@@ -2790,12 +2792,14 @@ static void ibmvfc_tasklet(void *data)
+ while ((async = ibmvfc_next_async_crq(vhost)) != NULL) {
+ ibmvfc_handle_async(async, vhost);
+ async->valid = 0;
++ wmb();
+ }
+
+ /* Pull all the valid messages off the CRQ */
+ while ((crq = ibmvfc_next_crq(vhost)) != NULL) {
+ ibmvfc_handle_crq(crq, vhost);
+ crq->valid = 0;
++ wmb();
+ }
+
+ vio_enable_interrupts(vdev);
+@@ -2803,10 +2807,12 @@ static void ibmvfc_tasklet(void *data)
+ vio_disable_interrupts(vdev);
+ ibmvfc_handle_async(async, vhost);
+ async->valid = 0;
++ wmb();
+ } else if ((crq = ibmvfc_next_crq(vhost)) != NULL) {
+ vio_disable_interrupts(vdev);
+ ibmvfc_handle_crq(crq, vhost);
+ crq->valid = 0;
++ wmb();
+ } else
+ done = 1;
+ }
+diff --git a/drivers/scsi/ibmvscsi/ibmvfc.h b/drivers/scsi/ibmvscsi/ibmvfc.h
+index 007fa1c..ef8e9f8 100644
+--- a/drivers/scsi/ibmvscsi/ibmvfc.h
++++ b/drivers/scsi/ibmvscsi/ibmvfc.h
+@@ -38,6 +38,7 @@
+ #define IBMVFC_ADISC_PLUS_CANCEL_TIMEOUT \
+ (IBMVFC_ADISC_TIMEOUT + IBMVFC_ADISC_CANCEL_TIMEOUT)
+ #define IBMVFC_INIT_TIMEOUT 120
++#define IBMVFC_ABORT_WAIT_TIMEOUT 40
+ #define IBMVFC_MAX_REQUESTS_DEFAULT 100
+
+ #define IBMVFC_DEBUG 0
+diff --git a/drivers/staging/line6/Kconfig b/drivers/staging/line6/Kconfig
+index 7852d4a..bc1ffbe 100644
+--- a/drivers/staging/line6/Kconfig
++++ b/drivers/staging/line6/Kconfig
+@@ -2,6 +2,7 @@ config LINE6_USB
+ tristate "Line6 USB support"
+ depends on USB && SND
+ select SND_RAWMIDI
++ select SND_PCM
+ help
+ This is a driver for the guitar amp, cab, and effects modeller
+ PODxt Pro by Line6 (and similar devices), supporting the
+diff --git a/drivers/staging/panel/panel.c b/drivers/staging/panel/panel.c
+index 4ce399b..c39a25f 100644
+--- a/drivers/staging/panel/panel.c
++++ b/drivers/staging/panel/panel.c
+@@ -2181,6 +2181,7 @@ int panel_init(void)
+ if (pprt) {
+ parport_release(pprt);
+ parport_unregister_device(pprt);
++ pprt = NULL;
+ }
+ parport_unregister_driver(&panel_driver);
+ printk(KERN_ERR "Panel driver version " PANEL_VERSION
+@@ -2230,6 +2231,7 @@ static void __exit panel_cleanup_module(void)
+ /* TODO: free all input signals */
+ parport_release(pprt);
+ parport_unregister_device(pprt);
++ pprt = NULL;
+ }
+ parport_unregister_driver(&panel_driver);
+ }
+diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
+index ed3aa7a..7456e29 100644
+--- a/drivers/usb/core/hub.c
++++ b/drivers/usb/core/hub.c
+@@ -22,6 +22,7 @@
+ #include <linux/kthread.h>
+ #include <linux/mutex.h>
+ #include <linux/freezer.h>
++#include <linux/usb/quirks.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/byteorder.h>
+@@ -1768,7 +1769,6 @@ int usb_new_device(struct usb_device *udev)
+ if (udev->parent)
+ usb_autoresume_device(udev->parent);
+
+- usb_detect_quirks(udev);
+ err = usb_enumerate_device(udev); /* Read descriptors */
+ if (err < 0)
+ goto fail;
+@@ -3063,6 +3063,10 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
+ if (status < 0)
+ goto loop;
+
++ usb_detect_quirks(udev);
++ if (udev->quirks & USB_QUIRK_DELAY_INIT)
++ msleep(1000);
++
+ /* consecutive bus-powered hubs aren't reliable; they can
+ * violate the voltage drop budget. if the new child has
+ * a "powered" LED, users should notice we didn't enable it
+diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
+index a61f160..80b062b 100644
+--- a/drivers/usb/core/quirks.c
++++ b/drivers/usb/core/quirks.c
+@@ -38,6 +38,9 @@ static const struct usb_device_id usb_quirk_list[] = {
+ /* Creative SB Audigy 2 NX */
+ { USB_DEVICE(0x041e, 0x3020), .driver_info = USB_QUIRK_RESET_RESUME },
+
++ /* Logitech Harmony 700-series */
++ { USB_DEVICE(0x046d, 0xc122), .driver_info = USB_QUIRK_DELAY_INIT },
++
+ /* Philips PSC805 audio device */
+ { USB_DEVICE(0x0471, 0x0155), .driver_info = USB_QUIRK_RESET_RESUME },
+
+diff --git a/drivers/usb/core/urb.c b/drivers/usb/core/urb.c
+index 0885d4a..da9a2b8 100644
+--- a/drivers/usb/core/urb.c
++++ b/drivers/usb/core/urb.c
+@@ -137,6 +137,16 @@ void usb_anchor_urb(struct urb *urb, struct usb_anchor *anchor)
+ }
+ EXPORT_SYMBOL_GPL(usb_anchor_urb);
+
++/* Callers must hold anchor->lock */
++static void __usb_unanchor_urb(struct urb *urb, struct usb_anchor *anchor)
++{
++ urb->anchor = NULL;
++ list_del(&urb->anchor_list);
++ usb_put_urb(urb);
++ if (list_empty(&anchor->urb_list))
++ wake_up(&anchor->wait);
++}
++
+ /**
+ * usb_unanchor_urb - unanchors an URB
+ * @urb: pointer to the urb to anchor
+@@ -156,17 +166,14 @@ void usb_unanchor_urb(struct urb *urb)
+ return;
+
+ spin_lock_irqsave(&anchor->lock, flags);
+- if (unlikely(anchor != urb->anchor)) {
+- /* we've lost the race to another thread */
+- spin_unlock_irqrestore(&anchor->lock, flags);
+- return;
+- }
+- urb->anchor = NULL;
+- list_del(&urb->anchor_list);
++ /*
++ * At this point, we could be competing with another thread which
++ * has the same intention. To protect the urb from being unanchored
++ * twice, only the winner of the race gets the job.
++ */
++ if (likely(anchor == urb->anchor))
++ __usb_unanchor_urb(urb, anchor);
+ spin_unlock_irqrestore(&anchor->lock, flags);
+- usb_put_urb(urb);
+- if (list_empty(&anchor->urb_list))
+- wake_up(&anchor->wait);
+ }
+ EXPORT_SYMBOL_GPL(usb_unanchor_urb);
+
+@@ -725,20 +732,11 @@ EXPORT_SYMBOL_GPL(usb_unpoison_anchored_urbs);
+ void usb_unlink_anchored_urbs(struct usb_anchor *anchor)
+ {
+ struct urb *victim;
+- unsigned long flags;
+
+- spin_lock_irqsave(&anchor->lock, flags);
+- while (!list_empty(&anchor->urb_list)) {
+- victim = list_entry(anchor->urb_list.prev, struct urb,
+- anchor_list);
+- usb_get_urb(victim);
+- spin_unlock_irqrestore(&anchor->lock, flags);
+- /* this will unanchor the URB */
++ while ((victim = usb_get_from_anchor(anchor)) != NULL) {
+ usb_unlink_urb(victim);
+ usb_put_urb(victim);
+- spin_lock_irqsave(&anchor->lock, flags);
+ }
+- spin_unlock_irqrestore(&anchor->lock, flags);
+ }
+ EXPORT_SYMBOL_GPL(usb_unlink_anchored_urbs);
+
+@@ -775,12 +773,11 @@ struct urb *usb_get_from_anchor(struct usb_anchor *anchor)
+ victim = list_entry(anchor->urb_list.next, struct urb,
+ anchor_list);
+ usb_get_urb(victim);
+- spin_unlock_irqrestore(&anchor->lock, flags);
+- usb_unanchor_urb(victim);
++ __usb_unanchor_urb(victim, anchor);
+ } else {
+- spin_unlock_irqrestore(&anchor->lock, flags);
+ victim = NULL;
+ }
++ spin_unlock_irqrestore(&anchor->lock, flags);
+
+ return victim;
+ }
+@@ -802,12 +799,7 @@ void usb_scuttle_anchored_urbs(struct usb_anchor *anchor)
+ while (!list_empty(&anchor->urb_list)) {
+ victim = list_entry(anchor->urb_list.prev, struct urb,
+ anchor_list);
+- usb_get_urb(victim);
+- spin_unlock_irqrestore(&anchor->lock, flags);
+- /* this may free the URB */
+- usb_unanchor_urb(victim);
+- usb_put_urb(victim);
+- spin_lock_irqsave(&anchor->lock, flags);
++ __usb_unanchor_urb(victim, anchor);
+ }
+ spin_unlock_irqrestore(&anchor->lock, flags);
+ }
+diff --git a/drivers/usb/misc/usbtest.c b/drivers/usb/misc/usbtest.c
+index a9f06d7..d6a2ef3 100644
+--- a/drivers/usb/misc/usbtest.c
++++ b/drivers/usb/misc/usbtest.c
+@@ -1382,7 +1382,6 @@ static void iso_callback (struct urb *urb)
+ break;
+ }
+ }
+- simple_free_urb (urb);
+
+ ctx->pending--;
+ if (ctx->pending == 0) {
+@@ -1499,6 +1498,7 @@ test_iso_queue (struct usbtest_dev *dev, struct usbtest_param *param,
+ }
+
+ simple_free_urb (urbs [i]);
++ urbs[i] = NULL;
+ context.pending--;
+ context.submit_error = 1;
+ break;
+@@ -1508,6 +1508,10 @@ test_iso_queue (struct usbtest_dev *dev, struct usbtest_param *param,
+
+ wait_for_completion (&context.done);
+
++ for (i = 0; i < param->sglen; i++) {
++ if (urbs[i])
++ simple_free_urb(urbs[i]);
++ }
+ /*
+ * Isochronous transfers are expected to fail sometimes. As an
+ * arbitrary limit, we will report an error if any submissions
+diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c
+index 10f3205..9231b25 100644
+--- a/drivers/usb/mon/mon_bin.c
++++ b/drivers/usb/mon/mon_bin.c
+@@ -971,7 +971,7 @@ static int mon_bin_ioctl(struct inode *inode, struct file *file,
+
+ mutex_lock(&rp->fetch_lock);
+ spin_lock_irqsave(&rp->b_lock, flags);
+- mon_free_buff(rp->b_vec, size/CHUNK_SIZE);
++ mon_free_buff(rp->b_vec, rp->b_size/CHUNK_SIZE);
+ kfree(rp->b_vec);
+ rp->b_vec = vec;
+ rp->b_size = size;
+diff --git a/drivers/usb/serial/cp210x.c b/drivers/usb/serial/cp210x.c
+index e3e087e..ae4b181 100644
+--- a/drivers/usb/serial/cp210x.c
++++ b/drivers/usb/serial/cp210x.c
+@@ -128,6 +128,10 @@ static struct usb_device_id id_table [] = {
+ { USB_DEVICE(0x1843, 0x0200) }, /* Vaisala USB Instrument Cable */
+ { USB_DEVICE(0x18EF, 0xE00F) }, /* ELV USB-I2C-Interface */
+ { USB_DEVICE(0x413C, 0x9500) }, /* DW700 GPS USB interface */
++ { USB_DEVICE(0x16DC, 0x0010) }, /* W-IE-NE-R Plein & Baus GmbH PL512 Power Supply */
++ { USB_DEVICE(0x16DC, 0x0011) }, /* W-IE-NE-R Plein & Baus GmbH RCM Remote Control for MARATON Power Supply */
++ { USB_DEVICE(0x16DC, 0x0012) }, /* W-IE-NE-R Plein & Baus GmbH MPOD Multi Channel Power Supply */
++ { USB_DEVICE(0x16DC, 0x0015) }, /* W-IE-NE-R Plein & Baus GmbH CML Control, Monitoring and Data Logger */
+ { } /* Terminating Entry */
+ };
+
+diff --git a/drivers/usb/serial/ftdi_sio.c b/drivers/usb/serial/ftdi_sio.c
+index 8de8572..b2353a9 100644
+--- a/drivers/usb/serial/ftdi_sio.c
++++ b/drivers/usb/serial/ftdi_sio.c
+@@ -162,6 +162,9 @@ static struct usb_device_id id_table_combined [] = {
+ { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_5_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_6_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_SCS_DEVICE_7_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_USINT_CAT_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_USINT_WKEY_PID) },
++ { USB_DEVICE(FTDI_VID, FTDI_USINT_RS232_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_ACTZWAVE_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_IRTRANS_PID) },
+ { USB_DEVICE(FTDI_VID, FTDI_IPLUS_PID) },
+@@ -752,6 +755,7 @@ static struct usb_device_id id_table_combined [] = {
+ .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
+ { USB_DEVICE(FTDI_VID, XVERVE_SIGNALYZER_SH4_PID),
+ .driver_info = (kernel_ulong_t)&ftdi_jtag_quirk },
++ { USB_DEVICE(FTDI_VID, SEGWAY_RMP200_PID) },
+ { }, /* Optional parameter entry */
+ { } /* Terminating entry */
+ };
+diff --git a/drivers/usb/serial/ftdi_sio_ids.h b/drivers/usb/serial/ftdi_sio_ids.h
+index ffdcec7..a0a7796 100644
+--- a/drivers/usb/serial/ftdi_sio_ids.h
++++ b/drivers/usb/serial/ftdi_sio_ids.h
+@@ -40,6 +40,11 @@
+
+ #define FTDI_NXTCAM_PID 0xABB8 /* NXTCam for Mindstorms NXT */
+
++/* US Interface Navigator (http://www.usinterface.com/) */
++#define FTDI_USINT_CAT_PID 0xb810 /* Navigator CAT and 2nd PTT lines */
++#define FTDI_USINT_WKEY_PID 0xb811 /* Navigator WKEY and FSK lines */
++#define FTDI_USINT_RS232_PID 0xb812 /* Navigator RS232 and CONFIG lines */
++
+ /* OOCDlink by Joern Kaipf <joernk at web.de>
+ * (http://www.joernonline.de/dw/doku.php?id=start&idx=projects:oocdlink) */
+ #define FTDI_OOCDLINK_PID 0xbaf8 /* Amontec JTAGkey */
+@@ -1039,3 +1044,8 @@
+ #define XVERVE_SIGNALYZER_SH2_PID 0xBCA2
+ #define XVERVE_SIGNALYZER_SH4_PID 0xBCA4
+
++/*
++ * Segway Robotic Mobility Platform USB interface (using VID 0x0403)
++ * Submitted by John G. Rogers
++ */
++#define SEGWAY_RMP200_PID 0xe729
+diff --git a/drivers/usb/serial/option.c b/drivers/usb/serial/option.c
+index 2586023..e864052 100644
+--- a/drivers/usb/serial/option.c
++++ b/drivers/usb/serial/option.c
+@@ -165,7 +165,10 @@ static int option_resume(struct usb_serial *serial);
+ #define HUAWEI_PRODUCT_E143D 0x143D
+ #define HUAWEI_PRODUCT_E143E 0x143E
+ #define HUAWEI_PRODUCT_E143F 0x143F
++#define HUAWEI_PRODUCT_K4505 0x1464
++#define HUAWEI_PRODUCT_K3765 0x1465
+ #define HUAWEI_PRODUCT_E14AC 0x14AC
++#define HUAWEI_PRODUCT_ETS1220 0x1803
+
+ #define QUANTA_VENDOR_ID 0x0408
+ #define QUANTA_PRODUCT_Q101 0xEA02
+@@ -469,6 +472,9 @@ static struct usb_device_id option_ids[] = {
+ { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143D, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143E, 0xff, 0xff, 0xff) },
+ { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E143F, 0xff, 0xff, 0xff) },
++ { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K4505, 0xff, 0xff, 0xff) },
++ { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_K3765, 0xff, 0xff, 0xff) },
++ { USB_DEVICE_AND_INTERFACE_INFO(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_ETS1220, 0xff, 0xff, 0xff) },
+ { USB_DEVICE(HUAWEI_VENDOR_ID, HUAWEI_PRODUCT_E14AC) },
+ { USB_DEVICE(AMOI_VENDOR_ID, AMOI_PRODUCT_9508) },
+ { USB_DEVICE(NOVATELWIRELESS_VENDOR_ID, NOVATELWIRELESS_PRODUCT_V640) }, /* Novatel Merlin V640/XV620 */
+@@ -1007,6 +1013,13 @@ static int option_probe(struct usb_serial *serial,
+ serial->interface->cur_altsetting->desc.bInterfaceClass != 0xff)
+ return -ENODEV;
+
++ /* Don't bind network interfaces on Huawei K3765 & K4505 */
++ if (serial->dev->descriptor.idVendor == HUAWEI_VENDOR_ID &&
++ (serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K3765 ||
++ serial->dev->descriptor.idProduct == HUAWEI_PRODUCT_K4505) &&
++ serial->interface->cur_altsetting->desc.bInterfaceNumber == 1)
++ return -ENODEV;
++
+ data = serial->private = kzalloc(sizeof(struct option_intf_private), GFP_KERNEL);
+ if (!data)
+ return -ENOMEM;
+diff --git a/drivers/video/w100fb.c b/drivers/video/w100fb.c
+index 2376f68..5a1dad2 100644
+--- a/drivers/video/w100fb.c
++++ b/drivers/video/w100fb.c
+@@ -857,9 +857,9 @@ unsigned long w100fb_gpio_read(int port)
+ void w100fb_gpio_write(int port, unsigned long value)
+ {
+ if (port==W100_GPIO_PORT_A)
+- value = writel(value, remapped_regs + mmGPIO_DATA);
++ writel(value, remapped_regs + mmGPIO_DATA);
+ else
+- value = writel(value, remapped_regs + mmGPIO_DATA2);
++ writel(value, remapped_regs + mmGPIO_DATA2);
+ }
+ EXPORT_SYMBOL(w100fb_gpio_read);
+ EXPORT_SYMBOL(w100fb_gpio_write);
+diff --git a/drivers/xen/events.c b/drivers/xen/events.c
+index ce602dd..30e0467 100644
+--- a/drivers/xen/events.c
++++ b/drivers/xen/events.c
+@@ -535,6 +535,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
+ if (irq < 0)
+ return irq;
+
++ irqflags |= IRQF_NO_SUSPEND;
+ retval = request_irq(irq, handler, irqflags, devname, dev_id);
+ if (retval != 0) {
+ unbind_from_irq(irq);
+diff --git a/fs/block_dev.c b/fs/block_dev.c
+index 9b9e3dc..e65efa2 100644
+--- a/fs/block_dev.c
++++ b/fs/block_dev.c
+@@ -1175,10 +1175,12 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
+ /*
+ * hooks: /n/, see "layering violations".
+ */
+- ret = devcgroup_inode_permission(bdev->bd_inode, perm);
+- if (ret != 0) {
+- bdput(bdev);
+- return ret;
++ if (!for_part) {
++ ret = devcgroup_inode_permission(bdev->bd_inode, perm);
++ if (ret != 0) {
++ bdput(bdev);
++ return ret;
++ }
+ }
+
+ lock_kernel();
+diff --git a/fs/btrfs/acl.c b/fs/btrfs/acl.c
+index 38ebe78..12d7be8 100644
+--- a/fs/btrfs/acl.c
++++ b/fs/btrfs/acl.c
+@@ -94,7 +94,8 @@ static int btrfs_xattr_get_acl(struct inode *inode, int type,
+ /*
+ * Needs to be called with fs_mutex held
+ */
+-static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
++static int btrfs_set_acl(struct btrfs_trans_handle *trans,
++ struct inode *inode, struct posix_acl *acl, int type)
+ {
+ int ret, size = 0;
+ const char *name;
+@@ -111,12 +112,14 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ mode = inode->i_mode;
+- ret = posix_acl_equiv_mode(acl, &mode);
+- if (ret < 0)
+- return ret;
+- ret = 0;
+- inode->i_mode = mode;
+ name = POSIX_ACL_XATTR_ACCESS;
++ if (acl) {
++ ret = posix_acl_equiv_mode(acl, &mode);
++ if (ret < 0)
++ return ret;
++ inode->i_mode = mode;
++ }
++ ret = 0;
+ break;
+ case ACL_TYPE_DEFAULT:
+ if (!S_ISDIR(inode->i_mode))
+@@ -140,8 +143,7 @@ static int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ goto out;
+ }
+
+- ret = __btrfs_setxattr(inode, name, value, size, 0);
+-
++ ret = __btrfs_setxattr(trans, inode, name, value, size, 0);
+ out:
+ kfree(value);
+
+@@ -154,7 +156,7 @@ out:
+ static int btrfs_xattr_set_acl(struct inode *inode, int type,
+ const void *value, size_t size)
+ {
+- int ret = 0;
++ int ret;
+ struct posix_acl *acl = NULL;
+
+ if (!is_owner_or_cap(inode))
+@@ -170,7 +172,7 @@ static int btrfs_xattr_set_acl(struct inode *inode, int type,
+ }
+ }
+
+- ret = btrfs_set_acl(inode, acl, type);
++ ret = btrfs_set_acl(NULL, inode, acl, type);
+
+ posix_acl_release(acl);
+
+@@ -224,7 +226,8 @@ int btrfs_check_acl(struct inode *inode, int mask)
+ * stuff has been fixed to work with that. If the locking stuff changes, we
+ * need to re-evaluate the acl locking stuff.
+ */
+-int btrfs_init_acl(struct inode *inode, struct inode *dir)
++int btrfs_init_acl(struct btrfs_trans_handle *trans,
++ struct inode *inode, struct inode *dir)
+ {
+ struct posix_acl *acl = NULL;
+ int ret = 0;
+@@ -249,7 +252,8 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
+ mode_t mode;
+
+ if (S_ISDIR(inode->i_mode)) {
+- ret = btrfs_set_acl(inode, acl, ACL_TYPE_DEFAULT);
++ ret = btrfs_set_acl(trans, inode, acl,
++ ACL_TYPE_DEFAULT);
+ if (ret)
+ goto failed;
+ }
+@@ -264,10 +268,11 @@ int btrfs_init_acl(struct inode *inode, struct inode *dir)
+ inode->i_mode = mode;
+ if (ret > 0) {
+ /* we need an acl */
+- ret = btrfs_set_acl(inode, clone,
++ ret = btrfs_set_acl(trans, inode, clone,
+ ACL_TYPE_ACCESS);
+ }
+ }
++ posix_acl_release(clone);
+ }
+ failed:
+ posix_acl_release(acl);
+@@ -297,7 +302,7 @@ int btrfs_acl_chmod(struct inode *inode)
+
+ ret = posix_acl_chmod_masq(clone, inode->i_mode);
+ if (!ret)
+- ret = btrfs_set_acl(inode, clone, ACL_TYPE_ACCESS);
++ ret = btrfs_set_acl(NULL, inode, clone, ACL_TYPE_ACCESS);
+
+ posix_acl_release(clone);
+
+@@ -323,7 +328,8 @@ int btrfs_acl_chmod(struct inode *inode)
+ return 0;
+ }
+
+-int btrfs_init_acl(struct inode *inode, struct inode *dir)
++int btrfs_init_acl(struct btrfs_trans_handle *trans,
++ struct inode *inode, struct inode *dir)
+ {
+ return 0;
+ }
+diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
+index f6783a4..3f1f50d 100644
+--- a/fs/btrfs/btrfs_inode.h
++++ b/fs/btrfs/btrfs_inode.h
+@@ -44,9 +44,6 @@ struct btrfs_inode {
+ */
+ struct extent_io_tree io_failure_tree;
+
+- /* held while inesrting or deleting extents from files */
+- struct mutex extent_mutex;
+-
+ /* held while logging the inode in tree-log.c */
+ struct mutex log_mutex;
+
+@@ -166,7 +163,7 @@ static inline struct btrfs_inode *BTRFS_I(struct inode *inode)
+
+ static inline void btrfs_i_size_write(struct inode *inode, u64 size)
+ {
+- inode->i_size = size;
++ i_size_write(inode, size);
+ BTRFS_I(inode)->disk_i_size = size;
+ }
+
+diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
+index ec96f3a..c4bc570 100644
+--- a/fs/btrfs/ctree.c
++++ b/fs/btrfs/ctree.c
+@@ -37,6 +37,11 @@ static int balance_node_right(struct btrfs_trans_handle *trans,
+ struct extent_buffer *src_buf);
+ static int del_ptr(struct btrfs_trans_handle *trans, struct btrfs_root *root,
+ struct btrfs_path *path, int level, int slot);
++static int setup_items_for_insert(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root, struct btrfs_path *path,
++ struct btrfs_key *cpu_key, u32 *data_size,
++ u32 total_data, u32 total_size, int nr);
++
+
+ struct btrfs_path *btrfs_alloc_path(void)
+ {
+@@ -451,9 +456,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
+ extent_buffer_get(cow);
+ spin_unlock(&root->node_lock);
+
+- btrfs_free_extent(trans, root, buf->start, buf->len,
+- parent_start, root->root_key.objectid,
+- level, 0);
++ btrfs_free_tree_block(trans, root, buf->start, buf->len,
++ parent_start, root->root_key.objectid, level);
+ free_extent_buffer(buf);
+ add_root_to_dirty_list(root);
+ } else {
+@@ -468,9 +472,8 @@ static noinline int __btrfs_cow_block(struct btrfs_trans_handle *trans,
+ btrfs_set_node_ptr_generation(parent, parent_slot,
+ trans->transid);
+ btrfs_mark_buffer_dirty(parent);
+- btrfs_free_extent(trans, root, buf->start, buf->len,
+- parent_start, root->root_key.objectid,
+- level, 0);
++ btrfs_free_tree_block(trans, root, buf->start, buf->len,
++ parent_start, root->root_key.objectid, level);
+ }
+ if (unlock_orig)
+ btrfs_tree_unlock(buf);
+@@ -1030,8 +1033,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ btrfs_tree_unlock(mid);
+ /* once for the path */
+ free_extent_buffer(mid);
+- ret = btrfs_free_extent(trans, root, mid->start, mid->len,
+- 0, root->root_key.objectid, level, 1);
++ ret = btrfs_free_tree_block(trans, root, mid->start, mid->len,
++ 0, root->root_key.objectid, level);
+ /* once for the root ptr */
+ free_extent_buffer(mid);
+ return ret;
+@@ -1095,10 +1098,10 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ 1);
+ if (wret)
+ ret = wret;
+- wret = btrfs_free_extent(trans, root, bytenr,
+- blocksize, 0,
+- root->root_key.objectid,
+- level, 0);
++ wret = btrfs_free_tree_block(trans, root,
++ bytenr, blocksize, 0,
++ root->root_key.objectid,
++ level);
+ if (wret)
+ ret = wret;
+ } else {
+@@ -1143,9 +1146,8 @@ static noinline int balance_level(struct btrfs_trans_handle *trans,
+ wret = del_ptr(trans, root, path, level + 1, pslot);
+ if (wret)
+ ret = wret;
+- wret = btrfs_free_extent(trans, root, bytenr, blocksize,
+- 0, root->root_key.objectid,
+- level, 0);
++ wret = btrfs_free_tree_block(trans, root, bytenr, blocksize,
++ 0, root->root_key.objectid, level);
+ if (wret)
+ ret = wret;
+ } else {
+@@ -2997,75 +2999,85 @@ again:
+ return ret;
+ }
+
+-/*
+- * This function splits a single item into two items,
+- * giving 'new_key' to the new item and splitting the
+- * old one at split_offset (from the start of the item).
+- *
+- * The path may be released by this operation. After
+- * the split, the path is pointing to the old item. The
+- * new item is going to be in the same node as the old one.
+- *
+- * Note, the item being split must be smaller enough to live alone on
+- * a tree block with room for one extra struct btrfs_item
+- *
+- * This allows us to split the item in place, keeping a lock on the
+- * leaf the entire time.
+- */
+-int btrfs_split_item(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+- struct btrfs_path *path,
+- struct btrfs_key *new_key,
+- unsigned long split_offset)
++static noinline int setup_leaf_for_split(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ struct btrfs_path *path, int ins_len)
+ {
+- u32 item_size;
++ struct btrfs_key key;
+ struct extent_buffer *leaf;
+- struct btrfs_key orig_key;
+- struct btrfs_item *item;
+- struct btrfs_item *new_item;
+- int ret = 0;
+- int slot;
+- u32 nritems;
+- u32 orig_offset;
+- struct btrfs_disk_key disk_key;
+- char *buf;
++ struct btrfs_file_extent_item *fi;
++ u64 extent_len = 0;
++ u32 item_size;
++ int ret;
+
+ leaf = path->nodes[0];
+- btrfs_item_key_to_cpu(leaf, &orig_key, path->slots[0]);
+- if (btrfs_leaf_free_space(root, leaf) >= sizeof(struct btrfs_item))
+- goto split;
++ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
++
++ BUG_ON(key.type != BTRFS_EXTENT_DATA_KEY &&
++ key.type != BTRFS_EXTENT_CSUM_KEY);
++
++ if (btrfs_leaf_free_space(root, leaf) >= ins_len)
++ return 0;
+
+ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
++ if (key.type == BTRFS_EXTENT_DATA_KEY) {
++ fi = btrfs_item_ptr(leaf, path->slots[0],
++ struct btrfs_file_extent_item);
++ extent_len = btrfs_file_extent_num_bytes(leaf, fi);
++ }
+ btrfs_release_path(root, path);
+
+- path->search_for_split = 1;
+ path->keep_locks = 1;
+-
+- ret = btrfs_search_slot(trans, root, &orig_key, path, 0, 1);
++ path->search_for_split = 1;
++ ret = btrfs_search_slot(trans, root, &key, path, 0, 1);
+ path->search_for_split = 0;
++ if (ret < 0)
++ goto err;
+
++ ret = -EAGAIN;
++ leaf = path->nodes[0];
+ /* if our item isn't there or got smaller, return now */
+- if (ret != 0 || item_size != btrfs_item_size_nr(path->nodes[0],
+- path->slots[0])) {
+- path->keep_locks = 0;
+- return -EAGAIN;
++ if (ret > 0 || item_size != btrfs_item_size_nr(leaf, path->slots[0]))
++ goto err;
++
++ if (key.type == BTRFS_EXTENT_DATA_KEY) {
++ fi = btrfs_item_ptr(leaf, path->slots[0],
++ struct btrfs_file_extent_item);
++ if (extent_len != btrfs_file_extent_num_bytes(leaf, fi))
++ goto err;
+ }
+
+ btrfs_set_path_blocking(path);
+- ret = split_leaf(trans, root, &orig_key, path,
+- sizeof(struct btrfs_item), 1);
+- path->keep_locks = 0;
++ ret = split_leaf(trans, root, &key, path, ins_len, 1);
+ BUG_ON(ret);
+
++ path->keep_locks = 0;
+ btrfs_unlock_up_safe(path, 1);
++ return 0;
++err:
++ path->keep_locks = 0;
++ return ret;
++}
++
++static noinline int split_item(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ struct btrfs_path *path,
++ struct btrfs_key *new_key,
++ unsigned long split_offset)
++{
++ struct extent_buffer *leaf;
++ struct btrfs_item *item;
++ struct btrfs_item *new_item;
++ int slot;
++ char *buf;
++ u32 nritems;
++ u32 item_size;
++ u32 orig_offset;
++ struct btrfs_disk_key disk_key;
++
+ leaf = path->nodes[0];
+ BUG_ON(btrfs_leaf_free_space(root, leaf) < sizeof(struct btrfs_item));
+
+-split:
+- /*
+- * make sure any changes to the path from split_leaf leave it
+- * in a blocking state
+- */
+ btrfs_set_path_blocking(path);
+
+ item = btrfs_item_nr(leaf, path->slots[0]);
+@@ -3073,19 +3085,19 @@ split:
+ item_size = btrfs_item_size(leaf, item);
+
+ buf = kmalloc(item_size, GFP_NOFS);
++ if (!buf)
++ return -ENOMEM;
++
+ read_extent_buffer(leaf, buf, btrfs_item_ptr_offset(leaf,
+ path->slots[0]), item_size);
+- slot = path->slots[0] + 1;
+- leaf = path->nodes[0];
+
++ slot = path->slots[0] + 1;
+ nritems = btrfs_header_nritems(leaf);
+-
+ if (slot != nritems) {
+ /* shift the items */
+ memmove_extent_buffer(leaf, btrfs_item_nr_offset(slot + 1),
+- btrfs_item_nr_offset(slot),
+- (nritems - slot) * sizeof(struct btrfs_item));
+-
++ btrfs_item_nr_offset(slot),
++ (nritems - slot) * sizeof(struct btrfs_item));
+ }
+
+ btrfs_cpu_key_to_disk(&disk_key, new_key);
+@@ -3113,16 +3125,81 @@ split:
+ item_size - split_offset);
+ btrfs_mark_buffer_dirty(leaf);
+
+- ret = 0;
+- if (btrfs_leaf_free_space(root, leaf) < 0) {
+- btrfs_print_leaf(root, leaf);
+- BUG();
+- }
++ BUG_ON(btrfs_leaf_free_space(root, leaf) < 0);
+ kfree(buf);
++ return 0;
++}
++
++/*
++ * This function splits a single item into two items,
++ * giving 'new_key' to the new item and splitting the
++ * old one at split_offset (from the start of the item).
++ *
++ * The path may be released by this operation. After
++ * the split, the path is pointing to the old item. The
++ * new item is going to be in the same node as the old one.
++ *
++ * Note, the item being split must be smaller enough to live alone on
++ * a tree block with room for one extra struct btrfs_item
++ *
++ * This allows us to split the item in place, keeping a lock on the
++ * leaf the entire time.
++ */
++int btrfs_split_item(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ struct btrfs_path *path,
++ struct btrfs_key *new_key,
++ unsigned long split_offset)
++{
++ int ret;
++ ret = setup_leaf_for_split(trans, root, path,
++ sizeof(struct btrfs_item));
++ if (ret)
++ return ret;
++
++ ret = split_item(trans, root, path, new_key, split_offset);
+ return ret;
+ }
+
+ /*
++ * This function duplicate a item, giving 'new_key' to the new item.
++ * It guarantees both items live in the same tree leaf and the new item
++ * is contiguous with the original item.
++ *
++ * This allows us to split file extent in place, keeping a lock on the
++ * leaf the entire time.
++ */
++int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ struct btrfs_path *path,
++ struct btrfs_key *new_key)
++{
++ struct extent_buffer *leaf;
++ int ret;
++ u32 item_size;
++
++ leaf = path->nodes[0];
++ item_size = btrfs_item_size_nr(leaf, path->slots[0]);
++ ret = setup_leaf_for_split(trans, root, path,
++ item_size + sizeof(struct btrfs_item));
++ if (ret)
++ return ret;
++
++ path->slots[0]++;
++ ret = setup_items_for_insert(trans, root, path, new_key, &item_size,
++ item_size, item_size +
++ sizeof(struct btrfs_item), 1);
++ BUG_ON(ret);
++
++ leaf = path->nodes[0];
++ memcpy_extent_buffer(leaf,
++ btrfs_item_ptr_offset(leaf, path->slots[0]),
++ btrfs_item_ptr_offset(leaf, path->slots[0] - 1),
++ item_size);
++ return 0;
++}
++
++/*
+ * make the item pointed to by the path smaller. new_size indicates
+ * how small to make it, and from_end tells us if we just chop bytes
+ * off the end of the item or if we shift the item to chop bytes off
+@@ -3714,8 +3791,8 @@ static noinline int btrfs_del_leaf(struct btrfs_trans_handle *trans,
+ */
+ btrfs_unlock_up_safe(path, 0);
+
+- ret = btrfs_free_extent(trans, root, leaf->start, leaf->len,
+- 0, root->root_key.objectid, 0, 0);
++ ret = btrfs_free_tree_block(trans, root, leaf->start, leaf->len,
++ 0, root->root_key.objectid, 0);
+ return ret;
+ }
+ /*
+diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
+index 444b3e9..9f806dd 100644
+--- a/fs/btrfs/ctree.h
++++ b/fs/btrfs/ctree.h
+@@ -310,6 +310,9 @@ struct btrfs_header {
+ #define BTRFS_MAX_INLINE_DATA_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
+ sizeof(struct btrfs_item) - \
+ sizeof(struct btrfs_file_extent_item))
++#define BTRFS_MAX_XATTR_SIZE(r) (BTRFS_LEAF_DATA_SIZE(r) - \
++ sizeof(struct btrfs_item) -\
++ sizeof(struct btrfs_dir_item))
+
+
+ /*
+@@ -859,8 +862,9 @@ struct btrfs_fs_info {
+ struct mutex ordered_operations_mutex;
+ struct rw_semaphore extent_commit_sem;
+
+- struct rw_semaphore subvol_sem;
++ struct rw_semaphore cleanup_work_sem;
+
++ struct rw_semaphore subvol_sem;
+ struct srcu_struct subvol_srcu;
+
+ struct list_head trans_list;
+@@ -868,6 +872,9 @@ struct btrfs_fs_info {
+ struct list_head dead_roots;
+ struct list_head caching_block_groups;
+
++ spinlock_t delayed_iput_lock;
++ struct list_head delayed_iputs;
++
+ atomic_t nr_async_submits;
+ atomic_t async_submit_draining;
+ atomic_t nr_async_bios;
+@@ -1034,12 +1041,12 @@ struct btrfs_root {
+ int ref_cows;
+ int track_dirty;
+ int in_radix;
++ int clean_orphans;
+
+ u64 defrag_trans_start;
+ struct btrfs_key defrag_progress;
+ struct btrfs_key defrag_max;
+ int defrag_running;
+- int defrag_level;
+ char *name;
+ int in_sysfs;
+
+@@ -1975,6 +1982,10 @@ struct extent_buffer *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
+ u64 parent, u64 root_objectid,
+ struct btrfs_disk_key *key, int level,
+ u64 hint, u64 empty_size);
++int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ u64 bytenr, u32 blocksize,
++ u64 parent, u64 root_objectid, int level);
+ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ u64 bytenr, u32 blocksize,
+@@ -2089,6 +2100,10 @@ int btrfs_split_item(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_key *new_key,
+ unsigned long split_offset);
++int btrfs_duplicate_item(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ struct btrfs_path *path,
++ struct btrfs_key *new_key);
+ int btrfs_search_slot(struct btrfs_trans_handle *trans, struct btrfs_root
+ *root, struct btrfs_key *key, struct btrfs_path *p, int
+ ins_len, int cow);
+@@ -2196,9 +2211,10 @@ int btrfs_delete_one_dir_name(struct btrfs_trans_handle *trans,
+ struct btrfs_path *path,
+ struct btrfs_dir_item *di);
+ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root, const char *name,
+- u16 name_len, const void *data, u16 data_len,
+- u64 dir);
++ struct btrfs_root *root,
++ struct btrfs_path *path, u64 objectid,
++ const char *name, u16 name_len,
++ const void *data, u16 data_len);
+ struct btrfs_dir_item *btrfs_lookup_xattr(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct btrfs_path *path, u64 dir,
+@@ -2292,7 +2308,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 new_size,
+ u32 min_type);
+
+-int btrfs_start_delalloc_inodes(struct btrfs_root *root);
++int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput);
+ int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end);
+ int btrfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc);
+@@ -2332,6 +2348,8 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, struct inode *inode);
+ void btrfs_orphan_cleanup(struct btrfs_root *root);
+ int btrfs_cont_expand(struct inode *inode, loff_t size);
+ int btrfs_invalidate_inodes(struct btrfs_root *root);
++void btrfs_add_delayed_iput(struct inode *inode);
++void btrfs_run_delayed_iputs(struct btrfs_root *root);
+ extern const struct dentry_operations btrfs_dentry_operations;
+
+ /* ioctl.c */
+@@ -2345,12 +2363,9 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+ int skip_pinned);
+ int btrfs_check_file(struct btrfs_root *root, struct inode *inode);
+ extern const struct file_operations btrfs_file_operations;
+-int btrfs_drop_extents(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root, struct inode *inode,
+- u64 start, u64 end, u64 locked_end,
+- u64 inline_limit, u64 *hint_block, int drop_cache);
++int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
++ u64 start, u64 end, u64 *hint_byte, int drop_cache);
+ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+ struct inode *inode, u64 start, u64 end);
+ int btrfs_release_file(struct inode *inode, struct file *file);
+
+@@ -2380,7 +2395,8 @@ int btrfs_check_acl(struct inode *inode, int mask);
+ #else
+ #define btrfs_check_acl NULL
+ #endif
+-int btrfs_init_acl(struct inode *inode, struct inode *dir);
++int btrfs_init_acl(struct btrfs_trans_handle *trans,
++ struct inode *inode, struct inode *dir);
+ int btrfs_acl_chmod(struct inode *inode);
+
+ /* relocation.c */
+diff --git a/fs/btrfs/dir-item.c b/fs/btrfs/dir-item.c
+index f3a6075..e9103b3 100644
+--- a/fs/btrfs/dir-item.c
++++ b/fs/btrfs/dir-item.c
+@@ -68,12 +68,12 @@ static struct btrfs_dir_item *insert_with_overflow(struct btrfs_trans_handle
+ * into the tree
+ */
+ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root, const char *name,
+- u16 name_len, const void *data, u16 data_len,
+- u64 dir)
++ struct btrfs_root *root,
++ struct btrfs_path *path, u64 objectid,
++ const char *name, u16 name_len,
++ const void *data, u16 data_len)
+ {
+ int ret = 0;
+- struct btrfs_path *path;
+ struct btrfs_dir_item *dir_item;
+ unsigned long name_ptr, data_ptr;
+ struct btrfs_key key, location;
+@@ -81,15 +81,11 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+ struct extent_buffer *leaf;
+ u32 data_size;
+
+- key.objectid = dir;
++ BUG_ON(name_len + data_len > BTRFS_MAX_XATTR_SIZE(root));
++
++ key.objectid = objectid;
+ btrfs_set_key_type(&key, BTRFS_XATTR_ITEM_KEY);
+ key.offset = btrfs_name_hash(name, name_len);
+- path = btrfs_alloc_path();
+- if (!path)
+- return -ENOMEM;
+- if (name_len + data_len + sizeof(struct btrfs_dir_item) >
+- BTRFS_LEAF_DATA_SIZE(root) - sizeof(struct btrfs_item))
+- return -ENOSPC;
+
+ data_size = sizeof(*dir_item) + name_len + data_len;
+ dir_item = insert_with_overflow(trans, root, path, &key, data_size,
+@@ -117,7 +113,6 @@ int btrfs_insert_xattr_item(struct btrfs_trans_handle *trans,
+ write_extent_buffer(leaf, data, data_ptr, data_len);
+ btrfs_mark_buffer_dirty(path->nodes[0]);
+
+- btrfs_free_path(path);
+ return ret;
+ }
+
+diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
+index 02b6afb..2b59201 100644
+--- a/fs/btrfs/disk-io.c
++++ b/fs/btrfs/disk-io.c
+@@ -892,6 +892,8 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
+ root->stripesize = stripesize;
+ root->ref_cows = 0;
+ root->track_dirty = 0;
++ root->in_radix = 0;
++ root->clean_orphans = 0;
+
+ root->fs_info = fs_info;
+ root->objectid = objectid;
+@@ -928,7 +930,6 @@ static int __setup_root(u32 nodesize, u32 leafsize, u32 sectorsize,
+ root->defrag_trans_start = fs_info->generation;
+ init_completion(&root->kobj_unregister);
+ root->defrag_running = 0;
+- root->defrag_level = 0;
+ root->root_key.objectid = objectid;
+ root->anon_super.s_root = NULL;
+ root->anon_super.s_dev = 0;
+@@ -980,12 +981,12 @@ int btrfs_free_log_root_tree(struct btrfs_trans_handle *trans,
+
+ while (1) {
+ ret = find_first_extent_bit(&log_root_tree->dirty_log_pages,
+- 0, &start, &end, EXTENT_DIRTY);
++ 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
+ if (ret)
+ break;
+
+- clear_extent_dirty(&log_root_tree->dirty_log_pages,
+- start, end, GFP_NOFS);
++ clear_extent_bits(&log_root_tree->dirty_log_pages, start, end,
++ EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
+ }
+ eb = fs_info->log_root_tree->node;
+
+@@ -1210,8 +1211,10 @@ again:
+ ret = radix_tree_insert(&fs_info->fs_roots_radix,
+ (unsigned long)root->root_key.objectid,
+ root);
+- if (ret == 0)
++ if (ret == 0) {
+ root->in_radix = 1;
++ root->clean_orphans = 1;
++ }
+ spin_unlock(&fs_info->fs_roots_radix_lock);
+ radix_tree_preload_end();
+ if (ret) {
+@@ -1225,10 +1228,6 @@ again:
+ ret = btrfs_find_dead_roots(fs_info->tree_root,
+ root->root_key.objectid);
+ WARN_ON(ret);
+-
+- if (!(fs_info->sb->s_flags & MS_RDONLY))
+- btrfs_orphan_cleanup(root);
+-
+ return root;
+ fail:
+ free_fs_root(root);
+@@ -1477,6 +1476,7 @@ static int cleaner_kthread(void *arg)
+
+ if (!(root->fs_info->sb->s_flags & MS_RDONLY) &&
+ mutex_trylock(&root->fs_info->cleaner_mutex)) {
++ btrfs_run_delayed_iputs(root);
+ btrfs_clean_old_snapshots(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
+ }
+@@ -1606,6 +1606,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+ INIT_RADIX_TREE(&fs_info->fs_roots_radix, GFP_ATOMIC);
+ INIT_LIST_HEAD(&fs_info->trans_list);
+ INIT_LIST_HEAD(&fs_info->dead_roots);
++ INIT_LIST_HEAD(&fs_info->delayed_iputs);
+ INIT_LIST_HEAD(&fs_info->hashers);
+ INIT_LIST_HEAD(&fs_info->delalloc_inodes);
+ INIT_LIST_HEAD(&fs_info->ordered_operations);
+@@ -1614,6 +1615,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+ spin_lock_init(&fs_info->new_trans_lock);
+ spin_lock_init(&fs_info->ref_cache_lock);
+ spin_lock_init(&fs_info->fs_roots_radix_lock);
++ spin_lock_init(&fs_info->delayed_iput_lock);
+
+ init_completion(&fs_info->kobj_unregister);
+ fs_info->tree_root = tree_root;
+@@ -1689,6 +1691,7 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+ mutex_init(&fs_info->cleaner_mutex);
+ mutex_init(&fs_info->volume_mutex);
+ init_rwsem(&fs_info->extent_commit_sem);
++ init_rwsem(&fs_info->cleanup_work_sem);
+ init_rwsem(&fs_info->subvol_sem);
+
+ btrfs_init_free_cluster(&fs_info->meta_alloc_cluster);
+@@ -1979,7 +1982,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+
+ if (!(sb->s_flags & MS_RDONLY)) {
+ ret = btrfs_recover_relocation(tree_root);
+- BUG_ON(ret);
++ if (ret < 0) {
++ printk(KERN_WARNING
++ "btrfs: failed to recover relocation\n");
++ err = -EINVAL;
++ goto fail_trans_kthread;
++ }
+ }
+
+ location.objectid = BTRFS_FS_TREE_OBJECTID;
+@@ -1990,6 +1998,12 @@ struct btrfs_root *open_ctree(struct super_block *sb,
+ if (!fs_info->fs_root)
+ goto fail_trans_kthread;
+
++ if (!(sb->s_flags & MS_RDONLY)) {
++ down_read(&fs_info->cleanup_work_sem);
++ btrfs_orphan_cleanup(fs_info->fs_root);
++ up_read(&fs_info->cleanup_work_sem);
++ }
++
+ return tree_root;
+
+ fail_trans_kthread:
+@@ -2386,8 +2400,14 @@ int btrfs_commit_super(struct btrfs_root *root)
+ int ret;
+
+ mutex_lock(&root->fs_info->cleaner_mutex);
++ btrfs_run_delayed_iputs(root);
+ btrfs_clean_old_snapshots(root);
+ mutex_unlock(&root->fs_info->cleaner_mutex);
++
++ /* wait until ongoing cleanup work done */
++ down_write(&root->fs_info->cleanup_work_sem);
++ up_write(&root->fs_info->cleanup_work_sem);
++
+ trans = btrfs_start_transaction(root, 1);
+ ret = btrfs_commit_transaction(trans, root);
+ BUG_ON(ret);
+diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
+index 94627c4..559f724 100644
+--- a/fs/btrfs/extent-tree.c
++++ b/fs/btrfs/extent-tree.c
+@@ -83,6 +83,17 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits)
+ return (cache->flags & bits) == bits;
+ }
+
++void btrfs_get_block_group(struct btrfs_block_group_cache *cache)
++{
++ atomic_inc(&cache->count);
++}
++
++void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
++{
++ if (atomic_dec_and_test(&cache->count))
++ kfree(cache);
++}
++
+ /*
+ * this adds the block group to the fs_info rb tree for the block group
+ * cache
+@@ -156,7 +167,7 @@ block_group_cache_tree_search(struct btrfs_fs_info *info, u64 bytenr,
+ }
+ }
+ if (ret)
+- atomic_inc(&ret->count);
++ btrfs_get_block_group(ret);
+ spin_unlock(&info->block_group_cache_lock);
+
+ return ret;
+@@ -195,6 +206,14 @@ static int exclude_super_stripes(struct btrfs_root *root,
+ int stripe_len;
+ int i, nr, ret;
+
++ if (cache->key.objectid < BTRFS_SUPER_INFO_OFFSET) {
++ stripe_len = BTRFS_SUPER_INFO_OFFSET - cache->key.objectid;
++ cache->bytes_super += stripe_len;
++ ret = add_excluded_extent(root, cache->key.objectid,
++ stripe_len);
++ BUG_ON(ret);
++ }
++
+ for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
+ bytenr = btrfs_sb_offset(i);
+ ret = btrfs_rmap_block(&root->fs_info->mapping_tree,
+@@ -255,7 +274,7 @@ static u64 add_new_free_space(struct btrfs_block_group_cache *block_group,
+ if (ret)
+ break;
+
+- if (extent_start == start) {
++ if (extent_start <= start) {
+ start = extent_end + 1;
+ } else if (extent_start > start && extent_start < end) {
+ size = extent_start - start;
+@@ -399,6 +418,8 @@ err:
+
+ put_caching_control(caching_ctl);
+ atomic_dec(&block_group->space_info->caching_threads);
++ btrfs_put_block_group(block_group);
++
+ return 0;
+ }
+
+@@ -439,6 +460,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache)
+ up_write(&fs_info->extent_commit_sem);
+
+ atomic_inc(&cache->space_info->caching_threads);
++ btrfs_get_block_group(cache);
+
+ tsk = kthread_run(caching_kthread, cache, "btrfs-cache-%llu\n",
+ cache->key.objectid);
+@@ -478,12 +500,6 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
+ return cache;
+ }
+
+-void btrfs_put_block_group(struct btrfs_block_group_cache *cache)
+-{
+- if (atomic_dec_and_test(&cache->count))
+- kfree(cache);
+-}
+-
+ static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
+ u64 flags)
+ {
+@@ -2574,7 +2590,7 @@ next_block_group(struct btrfs_root *root,
+ if (node) {
+ cache = rb_entry(node, struct btrfs_block_group_cache,
+ cache_node);
+- atomic_inc(&cache->count);
++ btrfs_get_block_group(cache);
+ } else
+ cache = NULL;
+ spin_unlock(&root->fs_info->block_group_cache_lock);
+@@ -2880,9 +2896,9 @@ static noinline void flush_delalloc_async(struct btrfs_work *work)
+ root = async->root;
+ info = async->info;
+
+- btrfs_start_delalloc_inodes(root);
++ btrfs_start_delalloc_inodes(root, 0);
+ wake_up(&info->flush_wait);
+- btrfs_wait_ordered_extents(root, 0);
++ btrfs_wait_ordered_extents(root, 0, 0);
+
+ spin_lock(&info->lock);
+ info->flushing = 0;
+@@ -2956,8 +2972,8 @@ static void flush_delalloc(struct btrfs_root *root,
+ return;
+
+ flush:
+- btrfs_start_delalloc_inodes(root);
+- btrfs_wait_ordered_extents(root, 0);
++ btrfs_start_delalloc_inodes(root, 0);
++ btrfs_wait_ordered_extents(root, 0, 0);
+
+ spin_lock(&info->lock);
+ info->flushing = 0;
+@@ -3454,14 +3470,6 @@ static int update_block_group(struct btrfs_trans_handle *trans,
+ else
+ old_val -= num_bytes;
+ btrfs_set_super_bytes_used(&info->super_copy, old_val);
+-
+- /* block accounting for root item */
+- old_val = btrfs_root_used(&root->root_item);
+- if (alloc)
+- old_val += num_bytes;
+- else
+- old_val -= num_bytes;
+- btrfs_set_root_used(&root->root_item, old_val);
+ spin_unlock(&info->delalloc_lock);
+
+ while (total) {
+@@ -4049,6 +4057,21 @@ int btrfs_free_extent(struct btrfs_trans_handle *trans,
+ return ret;
+ }
+
++int btrfs_free_tree_block(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ u64 bytenr, u32 blocksize,
++ u64 parent, u64 root_objectid, int level)
++{
++ u64 used;
++ spin_lock(&root->node_lock);
++ used = btrfs_root_used(&root->root_item) - blocksize;
++ btrfs_set_root_used(&root->root_item, used);
++ spin_unlock(&root->node_lock);
++
++ return btrfs_free_extent(trans, root, bytenr, blocksize,
++ parent, root_objectid, level, 0);
++}
++
+ static u64 stripe_align(struct btrfs_root *root, u64 val)
+ {
+ u64 mask = ((u64)root->stripesize - 1);
+@@ -4212,7 +4235,7 @@ search:
+ u64 offset;
+ int cached;
+
+- atomic_inc(&block_group->count);
++ btrfs_get_block_group(block_group);
+ search_start = block_group->key.objectid;
+
+ have_block_group:
+@@ -4300,7 +4323,7 @@ have_block_group:
+
+ btrfs_put_block_group(block_group);
+ block_group = last_ptr->block_group;
+- atomic_inc(&block_group->count);
++ btrfs_get_block_group(block_group);
+ spin_unlock(&last_ptr->lock);
+ spin_unlock(&last_ptr->refill_lock);
+
+@@ -4578,7 +4601,6 @@ int btrfs_reserve_extent(struct btrfs_trans_handle *trans,
+ {
+ int ret;
+ u64 search_start = 0;
+- struct btrfs_fs_info *info = root->fs_info;
+
+ data = btrfs_get_alloc_profile(root, data);
+ again:
+@@ -4586,17 +4608,9 @@ again:
+ * the only place that sets empty_size is btrfs_realloc_node, which
+ * is not called recursively on allocations
+ */
+- if (empty_size || root->ref_cows) {
+- if (!(data & BTRFS_BLOCK_GROUP_METADATA)) {
+- ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+- 2 * 1024 * 1024,
+- BTRFS_BLOCK_GROUP_METADATA |
+- (info->metadata_alloc_profile &
+- info->avail_metadata_alloc_bits), 0);
+- }
++ if (empty_size || root->ref_cows)
+ ret = do_chunk_alloc(trans, root->fs_info->extent_root,
+ num_bytes + 2 * 1024 * 1024, data, 0);
+- }
+
+ WARN_ON(num_bytes < root->sectorsize);
+ ret = find_free_extent(trans, root, num_bytes, empty_size,
+@@ -4897,6 +4911,14 @@ static int alloc_tree_block(struct btrfs_trans_handle *trans,
+ extent_op);
+ BUG_ON(ret);
+ }
++
++ if (root_objectid == root->root_key.objectid) {
++ u64 used;
++ spin_lock(&root->node_lock);
++ used = btrfs_root_used(&root->root_item) + num_bytes;
++ btrfs_set_root_used(&root->root_item, used);
++ spin_unlock(&root->node_lock);
++ }
+ return ret;
+ }
+
+@@ -4919,8 +4941,16 @@ struct extent_buffer *btrfs_init_new_buffer(struct btrfs_trans_handle *trans,
+ btrfs_set_buffer_uptodate(buf);
+
+ if (root->root_key.objectid == BTRFS_TREE_LOG_OBJECTID) {
+- set_extent_dirty(&root->dirty_log_pages, buf->start,
+- buf->start + buf->len - 1, GFP_NOFS);
++ /*
++ * we allow two log transactions at a time, use different
++ * EXENT bit to differentiate dirty pages.
++ */
++ if (root->log_transid % 2 == 0)
++ set_extent_dirty(&root->dirty_log_pages, buf->start,
++ buf->start + buf->len - 1, GFP_NOFS);
++ else
++ set_extent_new(&root->dirty_log_pages, buf->start,
++ buf->start + buf->len - 1, GFP_NOFS);
+ } else {
+ set_extent_dirty(&trans->transaction->dirty_pages, buf->start,
+ buf->start + buf->len - 1, GFP_NOFS);
+@@ -5372,10 +5402,6 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+ int ret;
+
+ while (level >= 0) {
+- if (path->slots[level] >=
+- btrfs_header_nritems(path->nodes[level]))
+- break;
+-
+ ret = walk_down_proc(trans, root, path, wc, lookup_info);
+ if (ret > 0)
+ break;
+@@ -5383,6 +5409,10 @@ static noinline int walk_down_tree(struct btrfs_trans_handle *trans,
+ if (level == 0)
+ break;
+
++ if (path->slots[level] >=
++ btrfs_header_nritems(path->nodes[level]))
++ break;
++
+ ret = do_walk_down(trans, root, path, wc, &lookup_info);
+ if (ret > 0) {
+ path->slots[level]++;
+@@ -7373,9 +7403,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
+ wait_block_group_cache_done(block_group);
+
+ btrfs_remove_free_space_cache(block_group);
+-
+- WARN_ON(atomic_read(&block_group->count) != 1);
+- kfree(block_group);
++ btrfs_put_block_group(block_group);
+
+ spin_lock(&info->block_group_cache_lock);
+ }
+diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
+index 96577e8..b177ed3 100644
+--- a/fs/btrfs/extent_io.c
++++ b/fs/btrfs/extent_io.c
+@@ -3165,10 +3165,9 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree,
+ spin_unlock(&tree->buffer_lock);
+ goto free_eb;
+ }
+- spin_unlock(&tree->buffer_lock);
+-
+ /* add one reference for the tree */
+ atomic_inc(&eb->refs);
++ spin_unlock(&tree->buffer_lock);
+ return eb;
+
+ free_eb:
+diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
+index 06550af..a7fd9f3 100644
+--- a/fs/btrfs/file.c
++++ b/fs/btrfs/file.c
+@@ -179,18 +179,14 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+ }
+ flags = em->flags;
+ if (skip_pinned && test_bit(EXTENT_FLAG_PINNED, &em->flags)) {
+- if (em->start <= start &&
+- (!testend || em->start + em->len >= start + len)) {
++ if (testend && em->start + em->len >= start + len) {
+ free_extent_map(em);
+ write_unlock(&em_tree->lock);
+ break;
+ }
+- if (start < em->start) {
+- len = em->start - start;
+- } else {
++ start = em->start + em->len;
++ if (testend)
+ len = start + len - (em->start + em->len);
+- start = em->start + em->len;
+- }
+ free_extent_map(em);
+ write_unlock(&em_tree->lock);
+ continue;
+@@ -265,324 +261,253 @@ int btrfs_drop_extent_cache(struct inode *inode, u64 start, u64 end,
+ * If an extent intersects the range but is not entirely inside the range
+ * it is either truncated or split. Anything entirely inside the range
+ * is deleted from the tree.
+- *
+- * inline_limit is used to tell this code which offsets in the file to keep
+- * if they contain inline extents.
+ */
+-noinline int btrfs_drop_extents(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root, struct inode *inode,
+- u64 start, u64 end, u64 locked_end,
+- u64 inline_limit, u64 *hint_byte, int drop_cache)
++int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode,
++ u64 start, u64 end, u64 *hint_byte, int drop_cache)
+ {
+- u64 extent_end = 0;
+- u64 search_start = start;
+- u64 ram_bytes = 0;
+- u64 disk_bytenr = 0;
+- u64 orig_locked_end = locked_end;
+- u8 compression;
+- u8 encryption;
+- u16 other_encoding = 0;
++ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_buffer *leaf;
+- struct btrfs_file_extent_item *extent;
++ struct btrfs_file_extent_item *fi;
+ struct btrfs_path *path;
+ struct btrfs_key key;
+- struct btrfs_file_extent_item old;
+- int keep;
+- int slot;
+- int bookend;
+- int found_type = 0;
+- int found_extent;
+- int found_inline;
++ struct btrfs_key new_key;
++ u64 search_start = start;
++ u64 disk_bytenr = 0;
++ u64 num_bytes = 0;
++ u64 extent_offset = 0;
++ u64 extent_end = 0;
++ int del_nr = 0;
++ int del_slot = 0;
++ int extent_type;
+ int recow;
+ int ret;
+
+- inline_limit = 0;
+ if (drop_cache)
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
++
+ while (1) {
+ recow = 0;
+- btrfs_release_path(root, path);
+ ret = btrfs_lookup_file_extent(trans, root, path, inode->i_ino,
+ search_start, -1);
+ if (ret < 0)
+- goto out;
+- if (ret > 0) {
+- if (path->slots[0] == 0) {
+- ret = 0;
+- goto out;
+- }
+- path->slots[0]--;
++ break;
++ if (ret > 0 && path->slots[0] > 0 && search_start == start) {
++ leaf = path->nodes[0];
++ btrfs_item_key_to_cpu(leaf, &key, path->slots[0] - 1);
++ if (key.objectid == inode->i_ino &&
++ key.type == BTRFS_EXTENT_DATA_KEY)
++ path->slots[0]--;
+ }
++ ret = 0;
+ next_slot:
+- keep = 0;
+- bookend = 0;
+- found_extent = 0;
+- found_inline = 0;
+- compression = 0;
+- encryption = 0;
+- extent = NULL;
+ leaf = path->nodes[0];
+- slot = path->slots[0];
+- ret = 0;
+- btrfs_item_key_to_cpu(leaf, &key, slot);
+- if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY &&
+- key.offset >= end) {
+- goto out;
+- }
+- if (btrfs_key_type(&key) > BTRFS_EXTENT_DATA_KEY ||
+- key.objectid != inode->i_ino) {
+- goto out;
+- }
+- if (recow) {
+- search_start = max(key.offset, start);
+- continue;
+- }
+- if (btrfs_key_type(&key) == BTRFS_EXTENT_DATA_KEY) {
+- extent = btrfs_item_ptr(leaf, slot,
+- struct btrfs_file_extent_item);
+- found_type = btrfs_file_extent_type(leaf, extent);
+- compression = btrfs_file_extent_compression(leaf,
+- extent);
+- encryption = btrfs_file_extent_encryption(leaf,
+- extent);
+- other_encoding = btrfs_file_extent_other_encoding(leaf,
+- extent);
+- if (found_type == BTRFS_FILE_EXTENT_REG ||
+- found_type == BTRFS_FILE_EXTENT_PREALLOC) {
+- extent_end =
+- btrfs_file_extent_disk_bytenr(leaf,
+- extent);
+- if (extent_end)
+- *hint_byte = extent_end;
+-
+- extent_end = key.offset +
+- btrfs_file_extent_num_bytes(leaf, extent);
+- ram_bytes = btrfs_file_extent_ram_bytes(leaf,
+- extent);
+- found_extent = 1;
+- } else if (found_type == BTRFS_FILE_EXTENT_INLINE) {
+- found_inline = 1;
+- extent_end = key.offset +
+- btrfs_file_extent_inline_len(leaf, extent);
++ if (path->slots[0] >= btrfs_header_nritems(leaf)) {
++ BUG_ON(del_nr > 0);
++ ret = btrfs_next_leaf(root, path);
++ if (ret < 0)
++ break;
++ if (ret > 0) {
++ ret = 0;
++ break;
+ }
++ leaf = path->nodes[0];
++ recow = 1;
++ }
++
++ btrfs_item_key_to_cpu(leaf, &key, path->slots[0]);
++ if (key.objectid > inode->i_ino ||
++ key.type > BTRFS_EXTENT_DATA_KEY || key.offset >= end)
++ break;
++
++ fi = btrfs_item_ptr(leaf, path->slots[0],
++ struct btrfs_file_extent_item);
++ extent_type = btrfs_file_extent_type(leaf, fi);
++
++ if (extent_type == BTRFS_FILE_EXTENT_REG ||
++ extent_type == BTRFS_FILE_EXTENT_PREALLOC) {
++ disk_bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
++ num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
++ extent_offset = btrfs_file_extent_offset(leaf, fi);
++ extent_end = key.offset +
++ btrfs_file_extent_num_bytes(leaf, fi);
++ } else if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
++ extent_end = key.offset +
++ btrfs_file_extent_inline_len(leaf, fi);
+ } else {
++ WARN_ON(1);
+ extent_end = search_start;
+ }
+
+- /* we found nothing we can drop */
+- if ((!found_extent && !found_inline) ||
+- search_start >= extent_end) {
+- int nextret;
+- u32 nritems;
+- nritems = btrfs_header_nritems(leaf);
+- if (slot >= nritems - 1) {
+- nextret = btrfs_next_leaf(root, path);
+- if (nextret)
+- goto out;
+- recow = 1;
+- } else {
+- path->slots[0]++;
+- }
++ if (extent_end <= search_start) {
++ path->slots[0]++;
+ goto next_slot;
+ }
+
+- if (end <= extent_end && start >= key.offset && found_inline)
+- *hint_byte = EXTENT_MAP_INLINE;
+-
+- if (found_extent) {
+- read_extent_buffer(leaf, &old, (unsigned long)extent,
+- sizeof(old));
+- }
+-
+- if (end < extent_end && end >= key.offset) {
+- bookend = 1;
+- if (found_inline && start <= key.offset)
+- keep = 1;
++ search_start = max(key.offset, start);
++ if (recow) {
++ btrfs_release_path(root, path);
++ continue;
+ }
+
+- if (bookend && found_extent) {
+- if (locked_end < extent_end) {
+- ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+- locked_end, extent_end - 1,
+- GFP_NOFS);
+- if (!ret) {
+- btrfs_release_path(root, path);
+- lock_extent(&BTRFS_I(inode)->io_tree,
+- locked_end, extent_end - 1,
+- GFP_NOFS);
+- locked_end = extent_end;
+- continue;
+- }
+- locked_end = extent_end;
++ /*
++ * | - range to drop - |
++ * | -------- extent -------- |
++ */
++ if (start > key.offset && end < extent_end) {
++ BUG_ON(del_nr > 0);
++ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
++
++ memcpy(&new_key, &key, sizeof(new_key));
++ new_key.offset = start;
++ ret = btrfs_duplicate_item(trans, root, path,
++ &new_key);
++ if (ret == -EAGAIN) {
++ btrfs_release_path(root, path);
++ continue;
+ }
+- disk_bytenr = le64_to_cpu(old.disk_bytenr);
+- if (disk_bytenr != 0) {
++ if (ret < 0)
++ break;
++
++ leaf = path->nodes[0];
++ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
++ struct btrfs_file_extent_item);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ start - key.offset);
++
++ fi = btrfs_item_ptr(leaf, path->slots[0],
++ struct btrfs_file_extent_item);
++
++ extent_offset += start - key.offset;
++ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ extent_end - start);
++ btrfs_mark_buffer_dirty(leaf);
++
++ if (disk_bytenr > 0) {
+ ret = btrfs_inc_extent_ref(trans, root,
+- disk_bytenr,
+- le64_to_cpu(old.disk_num_bytes), 0,
+- root->root_key.objectid,
+- key.objectid, key.offset -
+- le64_to_cpu(old.offset));
++ disk_bytenr, num_bytes, 0,
++ root->root_key.objectid,
++ new_key.objectid,
++ start - extent_offset);
+ BUG_ON(ret);
++ *hint_byte = disk_bytenr;
+ }
++ key.offset = start;
+ }
++ /*
++ * | ---- range to drop ----- |
++ * | -------- extent -------- |
++ */
++ if (start <= key.offset && end < extent_end) {
++ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
+
+- if (found_inline) {
+- u64 mask = root->sectorsize - 1;
+- search_start = (extent_end + mask) & ~mask;
+- } else
+- search_start = extent_end;
+-
+- /* truncate existing extent */
+- if (start > key.offset) {
+- u64 new_num;
+- u64 old_num;
+- keep = 1;
+- WARN_ON(start & (root->sectorsize - 1));
+- if (found_extent) {
+- new_num = start - key.offset;
+- old_num = btrfs_file_extent_num_bytes(leaf,
+- extent);
+- *hint_byte =
+- btrfs_file_extent_disk_bytenr(leaf,
+- extent);
+- if (btrfs_file_extent_disk_bytenr(leaf,
+- extent)) {
+- inode_sub_bytes(inode, old_num -
+- new_num);
+- }
+- btrfs_set_file_extent_num_bytes(leaf,
+- extent, new_num);
+- btrfs_mark_buffer_dirty(leaf);
+- } else if (key.offset < inline_limit &&
+- (end > extent_end) &&
+- (inline_limit < extent_end)) {
+- u32 new_size;
+- new_size = btrfs_file_extent_calc_inline_size(
+- inline_limit - key.offset);
+- inode_sub_bytes(inode, extent_end -
+- inline_limit);
+- btrfs_set_file_extent_ram_bytes(leaf, extent,
+- new_size);
+- if (!compression && !encryption) {
+- btrfs_truncate_item(trans, root, path,
+- new_size, 1);
+- }
++ memcpy(&new_key, &key, sizeof(new_key));
++ new_key.offset = end;
++ btrfs_set_item_key_safe(trans, root, path, &new_key);
++
++ extent_offset += end - key.offset;
++ btrfs_set_file_extent_offset(leaf, fi, extent_offset);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ extent_end - end);
++ btrfs_mark_buffer_dirty(leaf);
++ if (disk_bytenr > 0) {
++ inode_sub_bytes(inode, end - key.offset);
++ *hint_byte = disk_bytenr;
+ }
++ break;
+ }
+- /* delete the entire extent */
+- if (!keep) {
+- if (found_inline)
+- inode_sub_bytes(inode, extent_end -
+- key.offset);
+- ret = btrfs_del_item(trans, root, path);
+- /* TODO update progress marker and return */
+- BUG_ON(ret);
+- extent = NULL;
+- btrfs_release_path(root, path);
+- /* the extent will be freed later */
+- }
+- if (bookend && found_inline && start <= key.offset) {
+- u32 new_size;
+- new_size = btrfs_file_extent_calc_inline_size(
+- extent_end - end);
+- inode_sub_bytes(inode, end - key.offset);
+- btrfs_set_file_extent_ram_bytes(leaf, extent,
+- new_size);
+- if (!compression && !encryption)
+- ret = btrfs_truncate_item(trans, root, path,
+- new_size, 0);
+- BUG_ON(ret);
+- }
+- /* create bookend, splitting the extent in two */
+- if (bookend && found_extent) {
+- struct btrfs_key ins;
+- ins.objectid = inode->i_ino;
+- ins.offset = end;
+- btrfs_set_key_type(&ins, BTRFS_EXTENT_DATA_KEY);
+
+- btrfs_release_path(root, path);
+- path->leave_spinning = 1;
+- ret = btrfs_insert_empty_item(trans, root, path, &ins,
+- sizeof(*extent));
+- BUG_ON(ret);
++ search_start = extent_end;
++ /*
++ * | ---- range to drop ----- |
++ * | -------- extent -------- |
++ */
++ if (start > key.offset && end >= extent_end) {
++ BUG_ON(del_nr > 0);
++ BUG_ON(extent_type == BTRFS_FILE_EXTENT_INLINE);
+
+- leaf = path->nodes[0];
+- extent = btrfs_item_ptr(leaf, path->slots[0],
+- struct btrfs_file_extent_item);
+- write_extent_buffer(leaf, &old,
+- (unsigned long)extent, sizeof(old));
+-
+- btrfs_set_file_extent_compression(leaf, extent,
+- compression);
+- btrfs_set_file_extent_encryption(leaf, extent,
+- encryption);
+- btrfs_set_file_extent_other_encoding(leaf, extent,
+- other_encoding);
+- btrfs_set_file_extent_offset(leaf, extent,
+- le64_to_cpu(old.offset) + end - key.offset);
+- WARN_ON(le64_to_cpu(old.num_bytes) <
+- (extent_end - end));
+- btrfs_set_file_extent_num_bytes(leaf, extent,
+- extent_end - end);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ start - key.offset);
++ btrfs_mark_buffer_dirty(leaf);
++ if (disk_bytenr > 0) {
++ inode_sub_bytes(inode, extent_end - start);
++ *hint_byte = disk_bytenr;
++ }
++ if (end == extent_end)
++ break;
+
+- /*
+- * set the ram bytes to the size of the full extent
+- * before splitting. This is a worst case flag,
+- * but its the best we can do because we don't know
+- * how splitting affects compression
+- */
+- btrfs_set_file_extent_ram_bytes(leaf, extent,
+- ram_bytes);
+- btrfs_set_file_extent_type(leaf, extent, found_type);
+-
+- btrfs_unlock_up_safe(path, 1);
+- btrfs_mark_buffer_dirty(path->nodes[0]);
+- btrfs_set_lock_blocking(path->nodes[0]);
+-
+- path->leave_spinning = 0;
+- btrfs_release_path(root, path);
+- if (disk_bytenr != 0)
+- inode_add_bytes(inode, extent_end - end);
++ path->slots[0]++;
++ goto next_slot;
+ }
+
+- if (found_extent && !keep) {
+- u64 old_disk_bytenr = le64_to_cpu(old.disk_bytenr);
++ /*
++ * | ---- range to drop ----- |
++ * | ------ extent ------ |
++ */
++ if (start <= key.offset && end >= extent_end) {
++ if (del_nr == 0) {
++ del_slot = path->slots[0];
++ del_nr = 1;
++ } else {
++ BUG_ON(del_slot + del_nr != path->slots[0]);
++ del_nr++;
++ }
+
+- if (old_disk_bytenr != 0) {
++ if (extent_type == BTRFS_FILE_EXTENT_INLINE) {
+ inode_sub_bytes(inode,
+- le64_to_cpu(old.num_bytes));
++ extent_end - key.offset);
++ extent_end = ALIGN(extent_end,
++ root->sectorsize);
++ } else if (disk_bytenr > 0) {
+ ret = btrfs_free_extent(trans, root,
+- old_disk_bytenr,
+- le64_to_cpu(old.disk_num_bytes),
+- 0, root->root_key.objectid,
++ disk_bytenr, num_bytes, 0,
++ root->root_key.objectid,
+ key.objectid, key.offset -
+- le64_to_cpu(old.offset));
++ extent_offset);
+ BUG_ON(ret);
+- *hint_byte = old_disk_bytenr;
++ inode_sub_bytes(inode,
++ extent_end - key.offset);
++ *hint_byte = disk_bytenr;
+ }
+- }
+
+- if (search_start >= end) {
+- ret = 0;
+- goto out;
++ if (end == extent_end)
++ break;
++
++ if (path->slots[0] + 1 < btrfs_header_nritems(leaf)) {
++ path->slots[0]++;
++ goto next_slot;
++ }
++
++ ret = btrfs_del_items(trans, root, path, del_slot,
++ del_nr);
++ BUG_ON(ret);
++
++ del_nr = 0;
++ del_slot = 0;
++
++ btrfs_release_path(root, path);
++ continue;
+ }
++
++ BUG_ON(1);
+ }
+-out:
+- btrfs_free_path(path);
+- if (locked_end > orig_locked_end) {
+- unlock_extent(&BTRFS_I(inode)->io_tree, orig_locked_end,
+- locked_end - 1, GFP_NOFS);
++
++ if (del_nr > 0) {
++ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
++ BUG_ON(ret);
+ }
++
++ btrfs_free_path(path);
+ return ret;
+ }
+
+ static int extent_mergeable(struct extent_buffer *leaf, int slot,
+- u64 objectid, u64 bytenr, u64 *start, u64 *end)
++ u64 objectid, u64 bytenr, u64 orig_offset,
++ u64 *start, u64 *end)
+ {
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
+@@ -598,6 +523,7 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
+ fi = btrfs_item_ptr(leaf, slot, struct btrfs_file_extent_item);
+ if (btrfs_file_extent_type(leaf, fi) != BTRFS_FILE_EXTENT_REG ||
+ btrfs_file_extent_disk_bytenr(leaf, fi) != bytenr ||
++ btrfs_file_extent_offset(leaf, fi) != key.offset - orig_offset ||
+ btrfs_file_extent_compression(leaf, fi) ||
+ btrfs_file_extent_encryption(leaf, fi) ||
+ btrfs_file_extent_other_encoding(leaf, fi))
+@@ -620,23 +546,24 @@ static int extent_mergeable(struct extent_buffer *leaf, int slot,
+ * two or three.
+ */
+ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+ struct inode *inode, u64 start, u64 end)
+ {
++ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct extent_buffer *leaf;
+ struct btrfs_path *path;
+ struct btrfs_file_extent_item *fi;
+ struct btrfs_key key;
++ struct btrfs_key new_key;
+ u64 bytenr;
+ u64 num_bytes;
+ u64 extent_end;
+ u64 orig_offset;
+ u64 other_start;
+ u64 other_end;
+- u64 split = start;
+- u64 locked_end = end;
+- int extent_type;
+- int split_end = 1;
++ u64 split;
++ int del_nr = 0;
++ int del_slot = 0;
++ int recow;
+ int ret;
+
+ btrfs_drop_extent_cache(inode, start, end - 1, 0);
+@@ -644,12 +571,11 @@ int btrfs_mark_extent_written(struct btrfs_trans_handle *trans,
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ again:
++ recow = 0;
++ split = start;
+ key.objectid = inode->i_ino;
+ key.type = BTRFS_EXTENT_DATA_KEY;
+- if (split == start)
+- key.offset = split;
+- else
+- key.offset = split - 1;
++ key.offset = split;
+
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+ if (ret > 0 && path->slots[0] > 0)
+@@ -661,159 +587,158 @@ again:
+ key.type != BTRFS_EXTENT_DATA_KEY);
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+- extent_type = btrfs_file_extent_type(leaf, fi);
+- BUG_ON(extent_type != BTRFS_FILE_EXTENT_PREALLOC);
++ BUG_ON(btrfs_file_extent_type(leaf, fi) !=
++ BTRFS_FILE_EXTENT_PREALLOC);
+ extent_end = key.offset + btrfs_file_extent_num_bytes(leaf, fi);
+ BUG_ON(key.offset > start || extent_end < end);
+
+ bytenr = btrfs_file_extent_disk_bytenr(leaf, fi);
+ num_bytes = btrfs_file_extent_disk_num_bytes(leaf, fi);
+ orig_offset = key.offset - btrfs_file_extent_offset(leaf, fi);
++ memcpy(&new_key, &key, sizeof(new_key));
+
+- if (key.offset == start)
+- split = end;
+-
+- if (key.offset == start && extent_end == end) {
+- int del_nr = 0;
+- int del_slot = 0;
+- other_start = end;
+- other_end = 0;
+- if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
+- bytenr, &other_start, &other_end)) {
+- extent_end = other_end;
+- del_slot = path->slots[0] + 1;
+- del_nr++;
+- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+- 0, root->root_key.objectid,
+- inode->i_ino, orig_offset);
+- BUG_ON(ret);
+- }
++ if (start == key.offset && end < extent_end) {
+ other_start = 0;
+ other_end = start;
+- if (extent_mergeable(leaf, path->slots[0] - 1, inode->i_ino,
+- bytenr, &other_start, &other_end)) {
+- key.offset = other_start;
+- del_slot = path->slots[0];
+- del_nr++;
+- ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
+- 0, root->root_key.objectid,
+- inode->i_ino, orig_offset);
+- BUG_ON(ret);
+- }
+- split_end = 0;
+- if (del_nr == 0) {
+- btrfs_set_file_extent_type(leaf, fi,
+- BTRFS_FILE_EXTENT_REG);
+- goto done;
+- }
+-
+- fi = btrfs_item_ptr(leaf, del_slot - 1,
+- struct btrfs_file_extent_item);
+- btrfs_set_file_extent_type(leaf, fi, BTRFS_FILE_EXTENT_REG);
+- btrfs_set_file_extent_num_bytes(leaf, fi,
+- extent_end - key.offset);
+- btrfs_mark_buffer_dirty(leaf);
+-
+- ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
+- BUG_ON(ret);
+- goto release;
+- } else if (split == start) {
+- if (locked_end < extent_end) {
+- ret = try_lock_extent(&BTRFS_I(inode)->io_tree,
+- locked_end, extent_end - 1, GFP_NOFS);
+- if (!ret) {
+- btrfs_release_path(root, path);
+- lock_extent(&BTRFS_I(inode)->io_tree,
+- locked_end, extent_end - 1, GFP_NOFS);
+- locked_end = extent_end;
+- goto again;
+- }
+- locked_end = extent_end;
++ if (extent_mergeable(leaf, path->slots[0] - 1,
++ inode->i_ino, bytenr, orig_offset,
++ &other_start, &other_end)) {
++ new_key.offset = end;
++ btrfs_set_item_key_safe(trans, root, path, &new_key);
++ fi = btrfs_item_ptr(leaf, path->slots[0],
++ struct btrfs_file_extent_item);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ extent_end - end);
++ btrfs_set_file_extent_offset(leaf, fi,
++ end - orig_offset);
++ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
++ struct btrfs_file_extent_item);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ end - other_start);
++ btrfs_mark_buffer_dirty(leaf);
++ goto out;
+ }
+- btrfs_set_file_extent_num_bytes(leaf, fi, split - key.offset);
+- } else {
+- BUG_ON(key.offset != start);
+- key.offset = split;
+- btrfs_set_file_extent_offset(leaf, fi, key.offset -
+- orig_offset);
+- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - split);
+- btrfs_set_item_key_safe(trans, root, path, &key);
+- extent_end = split;
+ }
+
+- if (extent_end == end) {
+- split_end = 0;
+- extent_type = BTRFS_FILE_EXTENT_REG;
+- }
+- if (extent_end == end && split == start) {
++ if (start > key.offset && end == extent_end) {
+ other_start = end;
+ other_end = 0;
+- if (extent_mergeable(leaf, path->slots[0] + 1, inode->i_ino,
+- bytenr, &other_start, &other_end)) {
+- path->slots[0]++;
++ if (extent_mergeable(leaf, path->slots[0] + 1,
++ inode->i_ino, bytenr, orig_offset,
++ &other_start, &other_end)) {
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+- key.offset = split;
+- btrfs_set_item_key_safe(trans, root, path, &key);
+- btrfs_set_file_extent_offset(leaf, fi, key.offset -
+- orig_offset);
+ btrfs_set_file_extent_num_bytes(leaf, fi,
+- other_end - split);
+- goto done;
+- }
+- }
+- if (extent_end == end && split == end) {
+- other_start = 0;
+- other_end = start;
+- if (extent_mergeable(leaf, path->slots[0] - 1 , inode->i_ino,
+- bytenr, &other_start, &other_end)) {
+- path->slots[0]--;
++ start - key.offset);
++ path->slots[0]++;
++ new_key.offset = start;
++ btrfs_set_item_key_safe(trans, root, path, &new_key);
++
+ fi = btrfs_item_ptr(leaf, path->slots[0],
+ struct btrfs_file_extent_item);
+- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end -
+- other_start);
+- goto done;
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ other_end - start);
++ btrfs_set_file_extent_offset(leaf, fi,
++ start - orig_offset);
++ btrfs_mark_buffer_dirty(leaf);
++ goto out;
+ }
+ }
+
+- btrfs_mark_buffer_dirty(leaf);
++ while (start > key.offset || end < extent_end) {
++ if (key.offset == start)
++ split = end;
+
+- ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
+- root->root_key.objectid,
+- inode->i_ino, orig_offset);
+- BUG_ON(ret);
+- btrfs_release_path(root, path);
++ new_key.offset = split;
++ ret = btrfs_duplicate_item(trans, root, path, &new_key);
++ if (ret == -EAGAIN) {
++ btrfs_release_path(root, path);
++ goto again;
++ }
++ BUG_ON(ret < 0);
+
+- key.offset = start;
+- ret = btrfs_insert_empty_item(trans, root, path, &key, sizeof(*fi));
+- BUG_ON(ret);
++ leaf = path->nodes[0];
++ fi = btrfs_item_ptr(leaf, path->slots[0] - 1,
++ struct btrfs_file_extent_item);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ split - key.offset);
+
+- leaf = path->nodes[0];
+- fi = btrfs_item_ptr(leaf, path->slots[0],
+- struct btrfs_file_extent_item);
+- btrfs_set_file_extent_generation(leaf, fi, trans->transid);
+- btrfs_set_file_extent_type(leaf, fi, extent_type);
+- btrfs_set_file_extent_disk_bytenr(leaf, fi, bytenr);
+- btrfs_set_file_extent_disk_num_bytes(leaf, fi, num_bytes);
+- btrfs_set_file_extent_offset(leaf, fi, key.offset - orig_offset);
+- btrfs_set_file_extent_num_bytes(leaf, fi, extent_end - key.offset);
+- btrfs_set_file_extent_ram_bytes(leaf, fi, num_bytes);
+- btrfs_set_file_extent_compression(leaf, fi, 0);
+- btrfs_set_file_extent_encryption(leaf, fi, 0);
+- btrfs_set_file_extent_other_encoding(leaf, fi, 0);
+-done:
+- btrfs_mark_buffer_dirty(leaf);
+-
+-release:
+- btrfs_release_path(root, path);
+- if (split_end && split == start) {
+- split = end;
+- goto again;
++ fi = btrfs_item_ptr(leaf, path->slots[0],
++ struct btrfs_file_extent_item);
++
++ btrfs_set_file_extent_offset(leaf, fi, split - orig_offset);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ extent_end - split);
++ btrfs_mark_buffer_dirty(leaf);
++
++ ret = btrfs_inc_extent_ref(trans, root, bytenr, num_bytes, 0,
++ root->root_key.objectid,
++ inode->i_ino, orig_offset);
++ BUG_ON(ret);
++
++ if (split == start) {
++ key.offset = start;
++ } else {
++ BUG_ON(start != key.offset);
++ path->slots[0]--;
++ extent_end = end;
++ }
++ recow = 1;
++ }
++
++ other_start = end;
++ other_end = 0;
++ if (extent_mergeable(leaf, path->slots[0] + 1,
++ inode->i_ino, bytenr, orig_offset,
++ &other_start, &other_end)) {
++ if (recow) {
++ btrfs_release_path(root, path);
++ goto again;
++ }
++ extent_end = other_end;
++ del_slot = path->slots[0] + 1;
++ del_nr++;
++ ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
++ 0, root->root_key.objectid,
++ inode->i_ino, orig_offset);
++ BUG_ON(ret);
++ }
++ other_start = 0;
++ other_end = start;
++ if (extent_mergeable(leaf, path->slots[0] - 1,
++ inode->i_ino, bytenr, orig_offset,
++ &other_start, &other_end)) {
++ if (recow) {
++ btrfs_release_path(root, path);
++ goto again;
++ }
++ key.offset = other_start;
++ del_slot = path->slots[0];
++ del_nr++;
++ ret = btrfs_free_extent(trans, root, bytenr, num_bytes,
++ 0, root->root_key.objectid,
++ inode->i_ino, orig_offset);
++ BUG_ON(ret);
+ }
+- if (locked_end > end) {
+- unlock_extent(&BTRFS_I(inode)->io_tree, end, locked_end - 1,
+- GFP_NOFS);
++ if (del_nr == 0) {
++ fi = btrfs_item_ptr(leaf, path->slots[0],
++ struct btrfs_file_extent_item);
++ btrfs_set_file_extent_type(leaf, fi,
++ BTRFS_FILE_EXTENT_REG);
++ btrfs_mark_buffer_dirty(leaf);
++ } else {
++ fi = btrfs_item_ptr(leaf, del_slot - 1,
++ struct btrfs_file_extent_item);
++ btrfs_set_file_extent_type(leaf, fi,
++ BTRFS_FILE_EXTENT_REG);
++ btrfs_set_file_extent_num_bytes(leaf, fi,
++ extent_end - key.offset);
++ btrfs_mark_buffer_dirty(leaf);
++
++ ret = btrfs_del_items(trans, root, path, del_slot, del_nr);
++ BUG_ON(ret);
+ }
++out:
+ btrfs_free_path(path);
+ return 0;
+ }
+@@ -1210,7 +1135,7 @@ int btrfs_sync_file(struct file *file, struct dentry *dentry, int datasync)
+ }
+ mutex_lock(&dentry->d_inode->i_mutex);
+ out:
+- return ret > 0 ? EIO : ret;
++ return ret > 0 ? -EIO : ret;
+ }
+
+ static const struct vm_operations_struct btrfs_file_vm_ops = {
+diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
+index b3ad168..e03a836 100644
+--- a/fs/btrfs/inode.c
++++ b/fs/btrfs/inode.c
+@@ -88,13 +88,14 @@ static noinline int cow_file_range(struct inode *inode,
+ u64 start, u64 end, int *page_started,
+ unsigned long *nr_written, int unlock);
+
+-static int btrfs_init_inode_security(struct inode *inode, struct inode *dir)
++static int btrfs_init_inode_security(struct btrfs_trans_handle *trans,
++ struct inode *inode, struct inode *dir)
+ {
+ int err;
+
+- err = btrfs_init_acl(inode, dir);
++ err = btrfs_init_acl(trans, inode, dir);
+ if (!err)
+- err = btrfs_xattr_security_init(inode, dir);
++ err = btrfs_xattr_security_init(trans, inode, dir);
+ return err;
+ }
+
+@@ -188,8 +189,18 @@ static noinline int insert_inline_extent(struct btrfs_trans_handle *trans,
+ btrfs_mark_buffer_dirty(leaf);
+ btrfs_free_path(path);
+
++ /*
++ * we're an inline extent, so nobody can
++ * extend the file past i_size without locking
++ * a page we already have locked.
++ *
++ * We must do any isize and inode updates
++ * before we unlock the pages. Otherwise we
++ * could end up racing with unlink.
++ */
+ BTRFS_I(inode)->disk_i_size = inode->i_size;
+ btrfs_update_inode(trans, root, inode);
++
+ return 0;
+ fail:
+ btrfs_free_path(path);
+@@ -230,8 +241,7 @@ static noinline int cow_file_range_inline(struct btrfs_trans_handle *trans,
+ return 1;
+ }
+
+- ret = btrfs_drop_extents(trans, root, inode, start,
+- aligned_end, aligned_end, start,
++ ret = btrfs_drop_extents(trans, inode, start, aligned_end,
+ &hint_byte, 1);
+ BUG_ON(ret);
+
+@@ -416,7 +426,6 @@ again:
+ start, end,
+ total_compressed, pages);
+ }
+- btrfs_end_transaction(trans, root);
+ if (ret == 0) {
+ /*
+ * inline extent creation worked, we don't need
+@@ -430,9 +439,11 @@ again:
+ EXTENT_CLEAR_DELALLOC |
+ EXTENT_CLEAR_ACCOUNTING |
+ EXTENT_SET_WRITEBACK | EXTENT_END_WRITEBACK);
+- ret = 0;
++
++ btrfs_end_transaction(trans, root);
+ goto free_pages_out;
+ }
++ btrfs_end_transaction(trans, root);
+ }
+
+ if (will_compress) {
+@@ -543,7 +554,6 @@ static noinline int submit_compressed_extents(struct inode *inode,
+ if (list_empty(&async_cow->extents))
+ return 0;
+
+- trans = btrfs_join_transaction(root, 1);
+
+ while (!list_empty(&async_cow->extents)) {
+ async_extent = list_entry(async_cow->extents.next,
+@@ -590,19 +600,15 @@ retry:
+ lock_extent(io_tree, async_extent->start,
+ async_extent->start + async_extent->ram_size - 1,
+ GFP_NOFS);
+- /*
+- * here we're doing allocation and writeback of the
+- * compressed pages
+- */
+- btrfs_drop_extent_cache(inode, async_extent->start,
+- async_extent->start +
+- async_extent->ram_size - 1, 0);
+
++ trans = btrfs_join_transaction(root, 1);
+ ret = btrfs_reserve_extent(trans, root,
+ async_extent->compressed_size,
+ async_extent->compressed_size,
+ 0, alloc_hint,
+ (u64)-1, &ins, 1);
++ btrfs_end_transaction(trans, root);
++
+ if (ret) {
+ int i;
+ for (i = 0; i < async_extent->nr_pages; i++) {
+@@ -618,6 +624,14 @@ retry:
+ goto retry;
+ }
+
++ /*
++ * here we're doing allocation and writeback of the
++ * compressed pages
++ */
++ btrfs_drop_extent_cache(inode, async_extent->start,
++ async_extent->start +
++ async_extent->ram_size - 1, 0);
++
+ em = alloc_extent_map(GFP_NOFS);
+ em->start = async_extent->start;
+ em->len = async_extent->ram_size;
+@@ -649,8 +663,6 @@ retry:
+ BTRFS_ORDERED_COMPRESSED);
+ BUG_ON(ret);
+
+- btrfs_end_transaction(trans, root);
+-
+ /*
+ * clear dirty, set writeback and unlock the pages.
+ */
+@@ -672,13 +684,11 @@ retry:
+ async_extent->nr_pages);
+
+ BUG_ON(ret);
+- trans = btrfs_join_transaction(root, 1);
+ alloc_hint = ins.objectid + ins.offset;
+ kfree(async_extent);
+ cond_resched();
+ }
+
+- btrfs_end_transaction(trans, root);
+ return 0;
+ }
+
+@@ -742,6 +752,7 @@ static noinline int cow_file_range(struct inode *inode,
+ EXTENT_CLEAR_DIRTY |
+ EXTENT_SET_WRITEBACK |
+ EXTENT_END_WRITEBACK);
++
+ *nr_written = *nr_written +
+ (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
+ *page_started = 1;
+@@ -1596,7 +1607,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
+ struct inode *inode, u64 file_pos,
+ u64 disk_bytenr, u64 disk_num_bytes,
+ u64 num_bytes, u64 ram_bytes,
+- u64 locked_end,
+ u8 compression, u8 encryption,
+ u16 other_encoding, int extent_type)
+ {
+@@ -1622,9 +1632,8 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
+ * the caller is expected to unpin it and allow it to be merged
+ * with the others.
+ */
+- ret = btrfs_drop_extents(trans, root, inode, file_pos,
+- file_pos + num_bytes, locked_end,
+- file_pos, &hint, 0);
++ ret = btrfs_drop_extents(trans, inode, file_pos, file_pos + num_bytes,
++ &hint, 0);
+ BUG_ON(ret);
+
+ ins.objectid = inode->i_ino;
+@@ -1671,24 +1680,6 @@ static int insert_reserved_file_extent(struct btrfs_trans_handle *trans,
+ * before we start the transaction. It limits the amount of btree
+ * reads required while inside the transaction.
+ */
+-static noinline void reada_csum(struct btrfs_root *root,
+- struct btrfs_path *path,
+- struct btrfs_ordered_extent *ordered_extent)
+-{
+- struct btrfs_ordered_sum *sum;
+- u64 bytenr;
+-
+- sum = list_entry(ordered_extent->list.next, struct btrfs_ordered_sum,
+- list);
+- bytenr = sum->sums[0].bytenr;
+-
+- /*
+- * we don't care about the results, the point of this search is
+- * just to get the btree leaves into ram
+- */
+- btrfs_lookup_csum(NULL, root->fs_info->csum_root, path, bytenr, 0);
+-}
+-
+ /* as ordered data IO finishes, this gets called so we can finish
+ * an ordered extent if the range of bytes in the file it covers are
+ * fully written.
+@@ -1699,7 +1690,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
+ struct btrfs_trans_handle *trans;
+ struct btrfs_ordered_extent *ordered_extent = NULL;
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+- struct btrfs_path *path;
+ int compressed = 0;
+ int ret;
+
+@@ -1707,46 +1697,32 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
+ if (!ret)
+ return 0;
+
+- /*
+- * before we join the transaction, try to do some of our IO.
+- * This will limit the amount of IO that we have to do with
+- * the transaction running. We're unlikely to need to do any
+- * IO if the file extents are new, the disk_i_size checks
+- * covers the most common case.
+- */
+- if (start < BTRFS_I(inode)->disk_i_size) {
+- path = btrfs_alloc_path();
+- if (path) {
+- ret = btrfs_lookup_file_extent(NULL, root, path,
+- inode->i_ino,
+- start, 0);
+- ordered_extent = btrfs_lookup_ordered_extent(inode,
+- start);
+- if (!list_empty(&ordered_extent->list)) {
+- btrfs_release_path(root, path);
+- reada_csum(root, path, ordered_extent);
+- }
+- btrfs_free_path(path);
++ ordered_extent = btrfs_lookup_ordered_extent(inode, start);
++ BUG_ON(!ordered_extent);
++
++ if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags)) {
++ BUG_ON(!list_empty(&ordered_extent->list));
++ ret = btrfs_ordered_update_i_size(inode, 0, ordered_extent);
++ if (!ret) {
++ trans = btrfs_join_transaction(root, 1);
++ ret = btrfs_update_inode(trans, root, inode);
++ BUG_ON(ret);
++ btrfs_end_transaction(trans, root);
+ }
++ goto out;
+ }
+
+- trans = btrfs_join_transaction(root, 1);
+-
+- if (!ordered_extent)
+- ordered_extent = btrfs_lookup_ordered_extent(inode, start);
+- BUG_ON(!ordered_extent);
+- if (test_bit(BTRFS_ORDERED_NOCOW, &ordered_extent->flags))
+- goto nocow;
+-
+ lock_extent(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ GFP_NOFS);
+
++ trans = btrfs_join_transaction(root, 1);
++
+ if (test_bit(BTRFS_ORDERED_COMPRESSED, &ordered_extent->flags))
+ compressed = 1;
+ if (test_bit(BTRFS_ORDERED_PREALLOC, &ordered_extent->flags)) {
+ BUG_ON(compressed);
+- ret = btrfs_mark_extent_written(trans, root, inode,
++ ret = btrfs_mark_extent_written(trans, inode,
+ ordered_extent->file_offset,
+ ordered_extent->file_offset +
+ ordered_extent->len);
+@@ -1758,8 +1734,6 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
+ ordered_extent->disk_len,
+ ordered_extent->len,
+ ordered_extent->len,
+- ordered_extent->file_offset +
+- ordered_extent->len,
+ compressed, 0, 0,
+ BTRFS_FILE_EXTENT_REG);
+ unpin_extent_cache(&BTRFS_I(inode)->extent_tree,
+@@ -1770,22 +1744,20 @@ static int btrfs_finish_ordered_io(struct inode *inode, u64 start, u64 end)
+ unlock_extent(io_tree, ordered_extent->file_offset,
+ ordered_extent->file_offset + ordered_extent->len - 1,
+ GFP_NOFS);
+-nocow:
+ add_pending_csums(trans, inode, ordered_extent->file_offset,
+ &ordered_extent->list);
+
+- mutex_lock(&BTRFS_I(inode)->extent_mutex);
+- btrfs_ordered_update_i_size(inode, ordered_extent);
+- btrfs_update_inode(trans, root, inode);
+- btrfs_remove_ordered_extent(inode, ordered_extent);
+- mutex_unlock(&BTRFS_I(inode)->extent_mutex);
+-
++ /* this also removes the ordered extent from the tree */
++ btrfs_ordered_update_i_size(inode, 0, ordered_extent);
++ ret = btrfs_update_inode(trans, root, inode);
++ BUG_ON(ret);
++ btrfs_end_transaction(trans, root);
++out:
+ /* once for us */
+ btrfs_put_ordered_extent(ordered_extent);
+ /* once for the tree */
+ btrfs_put_ordered_extent(ordered_extent);
+
+- btrfs_end_transaction(trans, root);
+ return 0;
+ }
+
+@@ -2008,6 +1980,54 @@ zeroit:
+ return -EIO;
+ }
+
++struct delayed_iput {
++ struct list_head list;
++ struct inode *inode;
++};
++
++void btrfs_add_delayed_iput(struct inode *inode)
++{
++ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
++ struct delayed_iput *delayed;
++
++ if (atomic_add_unless(&inode->i_count, -1, 1))
++ return;
++
++ delayed = kmalloc(sizeof(*delayed), GFP_NOFS | __GFP_NOFAIL);
++ delayed->inode = inode;
++
++ spin_lock(&fs_info->delayed_iput_lock);
++ list_add_tail(&delayed->list, &fs_info->delayed_iputs);
++ spin_unlock(&fs_info->delayed_iput_lock);
++}
++
++void btrfs_run_delayed_iputs(struct btrfs_root *root)
++{
++ LIST_HEAD(list);
++ struct btrfs_fs_info *fs_info = root->fs_info;
++ struct delayed_iput *delayed;
++ int empty;
++
++ spin_lock(&fs_info->delayed_iput_lock);
++ empty = list_empty(&fs_info->delayed_iputs);
++ spin_unlock(&fs_info->delayed_iput_lock);
++ if (empty)
++ return;
++
++ down_read(&root->fs_info->cleanup_work_sem);
++ spin_lock(&fs_info->delayed_iput_lock);
++ list_splice_init(&fs_info->delayed_iputs, &list);
++ spin_unlock(&fs_info->delayed_iput_lock);
++
++ while (!list_empty(&list)) {
++ delayed = list_entry(list.next, struct delayed_iput, list);
++ list_del(&delayed->list);
++ iput(delayed->inode);
++ kfree(delayed);
++ }
++ up_read(&root->fs_info->cleanup_work_sem);
++}
++
+ /*
+ * This creates an orphan entry for the given inode in case something goes
+ * wrong in the middle of an unlink/truncate.
+@@ -2080,16 +2100,17 @@ void btrfs_orphan_cleanup(struct btrfs_root *root)
+ struct inode *inode;
+ int ret = 0, nr_unlink = 0, nr_truncate = 0;
+
+- path = btrfs_alloc_path();
+- if (!path)
++ if (!xchg(&root->clean_orphans, 0))
+ return;
++
++ path = btrfs_alloc_path();
++ BUG_ON(!path);
+ path->reada = -1;
+
+ key.objectid = BTRFS_ORPHAN_OBJECTID;
+ btrfs_set_key_type(&key, BTRFS_ORPHAN_ITEM_KEY);
+ key.offset = (u64)-1;
+
+-
+ while (1) {
+ ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
+ if (ret < 0) {
+@@ -2834,37 +2855,40 @@ out:
+ * min_type is the minimum key type to truncate down to. If set to 0, this
+ * will kill all the items on this inode, including the INODE_ITEM_KEY.
+ */
+-noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+- struct btrfs_root *root,
+- struct inode *inode,
+- u64 new_size, u32 min_type)
++int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root,
++ struct inode *inode,
++ u64 new_size, u32 min_type)
+ {
+- int ret;
+ struct btrfs_path *path;
+- struct btrfs_key key;
+- struct btrfs_key found_key;
+- u32 found_type = (u8)-1;
+ struct extent_buffer *leaf;
+ struct btrfs_file_extent_item *fi;
++ struct btrfs_key key;
++ struct btrfs_key found_key;
+ u64 extent_start = 0;
+ u64 extent_num_bytes = 0;
+ u64 extent_offset = 0;
+ u64 item_end = 0;
++ u64 mask = root->sectorsize - 1;
++ u32 found_type = (u8)-1;
+ int found_extent;
+ int del_item;
+ int pending_del_nr = 0;
+ int pending_del_slot = 0;
+ int extent_type = -1;
+ int encoding;
+- u64 mask = root->sectorsize - 1;
++ int ret;
++ int err = 0;
++
++ BUG_ON(new_size > 0 && min_type != BTRFS_EXTENT_DATA_KEY);
+
+ if (root->ref_cows)
+ btrfs_drop_extent_cache(inode, new_size & (~mask), (u64)-1, 0);
++
+ path = btrfs_alloc_path();
+ BUG_ON(!path);
+ path->reada = -1;
+
+- /* FIXME, add redo link to tree so we don't leak on crash */
+ key.objectid = inode->i_ino;
+ key.offset = (u64)-1;
+ key.type = (u8)-1;
+@@ -2872,17 +2896,17 @@ noinline int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
+ search_again:
+ path->leave_spinning = 1;
+ ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
+- if (ret < 0)
+- goto error;
++ if (ret < 0) {
++ err = ret;
++ goto out;
++ }
+
+ if (ret > 0) {
+ /* there are no items in the tree for us to truncate, we're
+ * done
+ */
+- if (path->slots[0] == 0) {
+- ret = 0;
+- goto error;
+- }
++ if (path->slots[0] == 0)
++ goto out;
+ path->slots[0]--;
+ }
+
+@@ -2917,28 +2941,17 @@ search_again:
+ }
+ item_end--;
+ }
+- if (item_end < new_size) {
+- if (found_type == BTRFS_DIR_ITEM_KEY)
+- found_type = BTRFS_INODE_ITEM_KEY;
+- else if (found_type == BTRFS_EXTENT_ITEM_KEY)
+- found_type = BTRFS_EXTENT_DATA_KEY;
+- else if (found_type == BTRFS_EXTENT_DATA_KEY)
+- found_type = BTRFS_XATTR_ITEM_KEY;
+- else if (found_type == BTRFS_XATTR_ITEM_KEY)
+- found_type = BTRFS_INODE_REF_KEY;
+- else if (found_type)
+- found_type--;
+- else
++ if (found_type > min_type) {
++ del_item = 1;
++ } else {
++ if (item_end < new_size)
+ break;
+- btrfs_set_key_type(&key, found_type);
+- goto next;
++ if (found_key.offset >= new_size)
++ del_item = 1;
++ else
++ del_item = 0;
+ }
+- if (found_key.offset >= new_size)
+- del_item = 1;
+- else
+- del_item = 0;
+ found_extent = 0;
+-
+ /* FIXME, shrink the extent if the ref count is only 1 */
+ if (found_type != BTRFS_EXTENT_DATA_KEY)
+ goto delete;
+@@ -3025,42 +3038,36 @@ delete:
+ inode->i_ino, extent_offset);
+ BUG_ON(ret);
+ }
+-next:
+- if (path->slots[0] == 0) {
+- if (pending_del_nr)
+- goto del_pending;
+- btrfs_release_path(root, path);
+- if (found_type == BTRFS_INODE_ITEM_KEY)
+- break;
+- goto search_again;
+- }
+
+- path->slots[0]--;
+- if (pending_del_nr &&
+- path->slots[0] + 1 != pending_del_slot) {
+- struct btrfs_key debug;
+-del_pending:
+- btrfs_item_key_to_cpu(path->nodes[0], &debug,
+- pending_del_slot);
+- ret = btrfs_del_items(trans, root, path,
+- pending_del_slot,
+- pending_del_nr);
+- BUG_ON(ret);
+- pending_del_nr = 0;
++ if (found_type == BTRFS_INODE_ITEM_KEY)
++ break;
++
++ if (path->slots[0] == 0 ||
++ path->slots[0] != pending_del_slot) {
++ if (root->ref_cows) {
++ err = -EAGAIN;
++ goto out;
++ }
++ if (pending_del_nr) {
++ ret = btrfs_del_items(trans, root, path,
++ pending_del_slot,
++ pending_del_nr);
++ BUG_ON(ret);
++ pending_del_nr = 0;
++ }
+ btrfs_release_path(root, path);
+- if (found_type == BTRFS_INODE_ITEM_KEY)
+- break;
+ goto search_again;
++ } else {
++ path->slots[0]--;
+ }
+ }
+- ret = 0;
+-error:
++out:
+ if (pending_del_nr) {
+ ret = btrfs_del_items(trans, root, path, pending_del_slot,
+ pending_del_nr);
+ }
+ btrfs_free_path(path);
+- return ret;
++ return err;
+ }
+
+ /*
+@@ -3180,10 +3187,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
+ if (size <= hole_start)
+ return 0;
+
+- err = btrfs_truncate_page(inode->i_mapping, inode->i_size);
+- if (err)
+- return err;
+-
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+ btrfs_wait_ordered_range(inode, hole_start,
+@@ -3196,9 +3199,6 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
+ btrfs_put_ordered_extent(ordered);
+ }
+
+- trans = btrfs_start_transaction(root, 1);
+- btrfs_set_trans_block_group(trans, inode);
+-
+ cur_offset = hole_start;
+ while (1) {
+ em = btrfs_get_extent(inode, NULL, 0, cur_offset,
+@@ -3206,40 +3206,120 @@ int btrfs_cont_expand(struct inode *inode, loff_t size)
+ BUG_ON(IS_ERR(em) || !em);
+ last_byte = min(extent_map_end(em), block_end);
+ last_byte = (last_byte + mask) & ~mask;
+- if (test_bit(EXTENT_FLAG_VACANCY, &em->flags)) {
++ if (!test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ u64 hint_byte = 0;
+ hole_size = last_byte - cur_offset;
+- err = btrfs_drop_extents(trans, root, inode,
+- cur_offset,
+- cur_offset + hole_size,
+- block_end,
+- cur_offset, &hint_byte, 1);
+- if (err)
+- break;
+
+- err = btrfs_reserve_metadata_space(root, 1);
++ err = btrfs_reserve_metadata_space(root, 2);
+ if (err)
+ break;
+
++ trans = btrfs_start_transaction(root, 1);
++ btrfs_set_trans_block_group(trans, inode);
++
++ err = btrfs_drop_extents(trans, inode, cur_offset,
++ cur_offset + hole_size,
++ &hint_byte, 1);
++ BUG_ON(err);
++
+ err = btrfs_insert_file_extent(trans, root,
+ inode->i_ino, cur_offset, 0,
+ 0, hole_size, 0, hole_size,
+ 0, 0, 0);
++ BUG_ON(err);
++
+ btrfs_drop_extent_cache(inode, hole_start,
+ last_byte - 1, 0);
+- btrfs_unreserve_metadata_space(root, 1);
++
++ btrfs_end_transaction(trans, root);
++ btrfs_unreserve_metadata_space(root, 2);
+ }
+ free_extent_map(em);
+ cur_offset = last_byte;
+- if (err || cur_offset >= block_end)
++ if (cur_offset >= block_end)
+ break;
+ }
+
+- btrfs_end_transaction(trans, root);
+ unlock_extent(io_tree, hole_start, block_end - 1, GFP_NOFS);
+ return err;
+ }
+
++static int btrfs_setattr_size(struct inode *inode, struct iattr *attr)
++{
++ struct btrfs_root *root = BTRFS_I(inode)->root;
++ struct btrfs_trans_handle *trans;
++ unsigned long nr;
++ int ret;
++
++ if (attr->ia_size == inode->i_size)
++ return 0;
++
++ if (attr->ia_size > inode->i_size) {
++ unsigned long limit;
++ limit = current->signal->rlim[RLIMIT_FSIZE].rlim_cur;
++ if (attr->ia_size > inode->i_sb->s_maxbytes)
++ return -EFBIG;
++ if (limit != RLIM_INFINITY && attr->ia_size > limit) {
++ send_sig(SIGXFSZ, current, 0);
++ return -EFBIG;
++ }
++ }
++
++ ret = btrfs_reserve_metadata_space(root, 1);
++ if (ret)
++ return ret;
++
++ trans = btrfs_start_transaction(root, 1);
++ btrfs_set_trans_block_group(trans, inode);
++
++ ret = btrfs_orphan_add(trans, inode);
++ BUG_ON(ret);
++
++ nr = trans->blocks_used;
++ btrfs_end_transaction(trans, root);
++ btrfs_unreserve_metadata_space(root, 1);
++ btrfs_btree_balance_dirty(root, nr);
++
++ if (attr->ia_size > inode->i_size) {
++ ret = btrfs_cont_expand(inode, attr->ia_size);
++ if (ret) {
++ btrfs_truncate(inode);
++ return ret;
++ }
++
++ i_size_write(inode, attr->ia_size);
++ btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
++
++ trans = btrfs_start_transaction(root, 1);
++ btrfs_set_trans_block_group(trans, inode);
++
++ ret = btrfs_update_inode(trans, root, inode);
++ BUG_ON(ret);
++ if (inode->i_nlink > 0) {
++ ret = btrfs_orphan_del(trans, inode);
++ BUG_ON(ret);
++ }
++ nr = trans->blocks_used;
++ btrfs_end_transaction(trans, root);
++ btrfs_btree_balance_dirty(root, nr);
++ return 0;
++ }
++
++ /*
++ * We're truncating a file that used to have good data down to
++ * zero. Make sure it gets into the ordered flush list so that
++ * any new writes get down to disk quickly.
++ */
++ if (attr->ia_size == 0)
++ BTRFS_I(inode)->ordered_data_close = 1;
++
++ /* we don't support swapfiles, so vmtruncate shouldn't fail */
++ ret = vmtruncate(inode, attr->ia_size);
++ BUG_ON(ret);
++
++ return 0;
++}
++
+ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
+ {
+ struct inode *inode = dentry->d_inode;
+@@ -3250,23 +3330,14 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
+ return err;
+
+ if (S_ISREG(inode->i_mode) && (attr->ia_valid & ATTR_SIZE)) {
+- if (attr->ia_size > inode->i_size) {
+- err = btrfs_cont_expand(inode, attr->ia_size);
+- if (err)
+- return err;
+- } else if (inode->i_size > 0 &&
+- attr->ia_size == 0) {
+-
+- /* we're truncating a file that used to have good
+- * data down to zero. Make sure it gets into
+- * the ordered flush list so that any new writes
+- * get down to disk quickly.
+- */
+- BTRFS_I(inode)->ordered_data_close = 1;
+- }
++ err = btrfs_setattr_size(inode, attr);
++ if (err)
++ return err;
+ }
++ attr->ia_valid &= ~ATTR_SIZE;
+
+- err = inode_setattr(inode, attr);
++ if (attr->ia_valid)
++ err = inode_setattr(inode, attr);
+
+ if (!err && ((attr->ia_valid & ATTR_MODE)))
+ err = btrfs_acl_chmod(inode);
+@@ -3287,36 +3358,43 @@ void btrfs_delete_inode(struct inode *inode)
+ }
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+
++ if (root->fs_info->log_root_recovering) {
++ BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
++ goto no_delete;
++ }
++
+ if (inode->i_nlink > 0) {
+ BUG_ON(btrfs_root_refs(&root->root_item) != 0);
+ goto no_delete;
+ }
+
+ btrfs_i_size_write(inode, 0);
+- trans = btrfs_join_transaction(root, 1);
+
+- btrfs_set_trans_block_group(trans, inode);
+- ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size, 0);
+- if (ret) {
+- btrfs_orphan_del(NULL, inode);
+- goto no_delete_lock;
+- }
++ while (1) {
++ trans = btrfs_start_transaction(root, 1);
++ btrfs_set_trans_block_group(trans, inode);
++ ret = btrfs_truncate_inode_items(trans, root, inode, 0, 0);
+
+- btrfs_orphan_del(trans, inode);
++ if (ret != -EAGAIN)
++ break;
+
+- nr = trans->blocks_used;
+- clear_inode(inode);
++ nr = trans->blocks_used;
++ btrfs_end_transaction(trans, root);
++ trans = NULL;
++ btrfs_btree_balance_dirty(root, nr);
++ }
+
+- btrfs_end_transaction(trans, root);
+- btrfs_btree_balance_dirty(root, nr);
+- return;
++ if (ret == 0) {
++ ret = btrfs_orphan_del(trans, inode);
++ BUG_ON(ret);
++ }
+
+-no_delete_lock:
+ nr = trans->blocks_used;
+ btrfs_end_transaction(trans, root);
+ btrfs_btree_balance_dirty(root, nr);
+ no_delete:
+ clear_inode(inode);
++ return;
+ }
+
+ /*
+@@ -3569,7 +3647,6 @@ static noinline void init_btrfs_i(struct inode *inode)
+ INIT_LIST_HEAD(&BTRFS_I(inode)->ordered_operations);
+ RB_CLEAR_NODE(&BTRFS_I(inode)->rb_node);
+ btrfs_ordered_inode_tree_init(&BTRFS_I(inode)->ordered_tree);
+- mutex_init(&BTRFS_I(inode)->extent_mutex);
+ mutex_init(&BTRFS_I(inode)->log_mutex);
+ }
+
+@@ -3695,6 +3772,13 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry)
+ }
+ srcu_read_unlock(&root->fs_info->subvol_srcu, index);
+
++ if (root != sub_root) {
++ down_read(&root->fs_info->cleanup_work_sem);
++ if (!(inode->i_sb->s_flags & MS_RDONLY))
++ btrfs_orphan_cleanup(sub_root);
++ up_read(&root->fs_info->cleanup_work_sem);
++ }
++
+ return inode;
+ }
+
+@@ -3869,7 +3953,11 @@ skip:
+
+ /* Reached end of directory/root. Bump pos past the last item. */
+ if (key_type == BTRFS_DIR_INDEX_KEY)
+- filp->f_pos = INT_LIMIT(off_t);
++ /*
++ * 32-bit glibc will use getdents64, but then strtol -
++ * so the last number we can serve is this.
++ */
++ filp->f_pos = 0x7fffffff;
+ else
+ filp->f_pos++;
+ nopos:
+@@ -4219,7 +4307,7 @@ static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
+ if (IS_ERR(inode))
+ goto out_unlock;
+
+- err = btrfs_init_inode_security(inode, dir);
++ err = btrfs_init_inode_security(trans, inode, dir);
+ if (err) {
+ drop_inode = 1;
+ goto out_unlock;
+@@ -4290,7 +4378,7 @@ static int btrfs_create(struct inode *dir, struct dentry *dentry,
+ if (IS_ERR(inode))
+ goto out_unlock;
+
+- err = btrfs_init_inode_security(inode, dir);
++ err = btrfs_init_inode_security(trans, inode, dir);
+ if (err) {
+ drop_inode = 1;
+ goto out_unlock;
+@@ -4336,6 +4424,10 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
+ if (inode->i_nlink == 0)
+ return -ENOENT;
+
++ /* do not allow sys_link's with other subvols of the same device */
++ if (root->objectid != BTRFS_I(inode)->root->objectid)
++ return -EPERM;
++
+ /*
+ * 1 item for inode ref
+ * 2 items for dir items
+@@ -4423,7 +4515,7 @@ static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
+
+ drop_on_err = 1;
+
+- err = btrfs_init_inode_security(inode, dir);
++ err = btrfs_init_inode_security(trans, inode, dir);
+ if (err)
+ goto out_fail;
+
+@@ -5074,17 +5166,20 @@ static void btrfs_truncate(struct inode *inode)
+ unsigned long nr;
+ u64 mask = root->sectorsize - 1;
+
+- if (!S_ISREG(inode->i_mode))
+- return;
+- if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
++ if (!S_ISREG(inode->i_mode)) {
++ WARN_ON(1);
+ return;
++ }
+
+ ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
+ if (ret)
+ return;
++
+ btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
++ btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
+
+ trans = btrfs_start_transaction(root, 1);
++ btrfs_set_trans_block_group(trans, inode);
+
+ /*
+ * setattr is responsible for setting the ordered_data_close flag,
+@@ -5106,21 +5201,32 @@ static void btrfs_truncate(struct inode *inode)
+ if (inode->i_size == 0 && BTRFS_I(inode)->ordered_data_close)
+ btrfs_add_ordered_operation(trans, root, inode);
+
+- btrfs_set_trans_block_group(trans, inode);
+- btrfs_i_size_write(inode, inode->i_size);
++ while (1) {
++ ret = btrfs_truncate_inode_items(trans, root, inode,
++ inode->i_size,
++ BTRFS_EXTENT_DATA_KEY);
++ if (ret != -EAGAIN)
++ break;
+
+- ret = btrfs_orphan_add(trans, inode);
+- if (ret)
+- goto out;
+- /* FIXME, add redo link to tree so we don't leak on crash */
+- ret = btrfs_truncate_inode_items(trans, root, inode, inode->i_size,
+- BTRFS_EXTENT_DATA_KEY);
+- btrfs_update_inode(trans, root, inode);
++ ret = btrfs_update_inode(trans, root, inode);
++ BUG_ON(ret);
+
+- ret = btrfs_orphan_del(trans, inode);
++ nr = trans->blocks_used;
++ btrfs_end_transaction(trans, root);
++ btrfs_btree_balance_dirty(root, nr);
++
++ trans = btrfs_start_transaction(root, 1);
++ btrfs_set_trans_block_group(trans, inode);
++ }
++
++ if (ret == 0 && inode->i_nlink > 0) {
++ ret = btrfs_orphan_del(trans, inode);
++ BUG_ON(ret);
++ }
++
++ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
+
+-out:
+ nr = trans->blocks_used;
+ ret = btrfs_end_transaction_throttle(trans, root);
+ BUG_ON(ret);
+@@ -5217,9 +5323,9 @@ void btrfs_destroy_inode(struct inode *inode)
+
+ spin_lock(&root->list_lock);
+ if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
+- printk(KERN_ERR "BTRFS: inode %lu: inode still on the orphan"
+- " list\n", inode->i_ino);
+- dump_stack();
++ printk(KERN_INFO "BTRFS: inode %lu still on the orphan list\n",
++ inode->i_ino);
++ list_del_init(&BTRFS_I(inode)->i_orphan);
+ }
+ spin_unlock(&root->list_lock);
+
+@@ -5476,7 +5582,7 @@ out_fail:
+ * some fairly slow code that needs optimization. This walks the list
+ * of all the inodes with pending delalloc and forces them to disk.
+ */
+-int btrfs_start_delalloc_inodes(struct btrfs_root *root)
++int btrfs_start_delalloc_inodes(struct btrfs_root *root, int delay_iput)
+ {
+ struct list_head *head = &root->fs_info->delalloc_inodes;
+ struct btrfs_inode *binode;
+@@ -5495,7 +5601,10 @@ int btrfs_start_delalloc_inodes(struct btrfs_root *root)
+ spin_unlock(&root->fs_info->delalloc_lock);
+ if (inode) {
+ filemap_flush(inode->i_mapping);
+- iput(inode);
++ if (delay_iput)
++ btrfs_add_delayed_iput(inode);
++ else
++ iput(inode);
+ }
+ cond_resched();
+ spin_lock(&root->fs_info->delalloc_lock);
+@@ -5569,7 +5678,7 @@ static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
+ if (IS_ERR(inode))
+ goto out_unlock;
+
+- err = btrfs_init_inode_security(inode, dir);
++ err = btrfs_init_inode_security(trans, inode, dir);
+ if (err) {
+ drop_inode = 1;
+ goto out_unlock;
+@@ -5641,57 +5750,77 @@ out_fail:
+ return err;
+ }
+
+-static int prealloc_file_range(struct btrfs_trans_handle *trans,
+- struct inode *inode, u64 start, u64 end,
+- u64 locked_end, u64 alloc_hint, int mode)
++static int prealloc_file_range(struct inode *inode, u64 start, u64 end,
++ u64 alloc_hint, int mode, loff_t actual_len)
+ {
++ struct btrfs_trans_handle *trans;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+ struct btrfs_key ins;
+ u64 alloc_size;
+ u64 cur_offset = start;
+ u64 num_bytes = end - start;
+ int ret = 0;
++ u64 i_size;
+
+ while (num_bytes > 0) {
+ alloc_size = min(num_bytes, root->fs_info->max_extent);
+
+- ret = btrfs_reserve_metadata_space(root, 1);
+- if (ret)
+- goto out;
++ trans = btrfs_start_transaction(root, 1);
+
+ ret = btrfs_reserve_extent(trans, root, alloc_size,
+ root->sectorsize, 0, alloc_hint,
+ (u64)-1, &ins, 1);
+ if (ret) {
+ WARN_ON(1);
+- goto out;
++ goto stop_trans;
+ }
++
++ ret = btrfs_reserve_metadata_space(root, 3);
++ if (ret) {
++ btrfs_free_reserved_extent(root, ins.objectid,
++ ins.offset);
++ goto stop_trans;
++ }
++
+ ret = insert_reserved_file_extent(trans, inode,
+ cur_offset, ins.objectid,
+ ins.offset, ins.offset,
+- ins.offset, locked_end,
+- 0, 0, 0,
++ ins.offset, 0, 0, 0,
+ BTRFS_FILE_EXTENT_PREALLOC);
+ BUG_ON(ret);
+ btrfs_drop_extent_cache(inode, cur_offset,
+ cur_offset + ins.offset -1, 0);
++
+ num_bytes -= ins.offset;
+ cur_offset += ins.offset;
+ alloc_hint = ins.objectid + ins.offset;
+- btrfs_unreserve_metadata_space(root, 1);
+- }
+-out:
+- if (cur_offset > start) {
++
+ inode->i_ctime = CURRENT_TIME;
+ BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
+ if (!(mode & FALLOC_FL_KEEP_SIZE) &&
+- cur_offset > i_size_read(inode))
+- btrfs_i_size_write(inode, cur_offset);
++ (actual_len > inode->i_size) &&
++ (cur_offset > inode->i_size)) {
++
++ if (cur_offset > actual_len)
++ i_size = actual_len;
++ else
++ i_size = cur_offset;
++ i_size_write(inode, i_size);
++ btrfs_ordered_update_i_size(inode, i_size, NULL);
++ }
++
+ ret = btrfs_update_inode(trans, root, inode);
+ BUG_ON(ret);
++
++ btrfs_end_transaction(trans, root);
++ btrfs_unreserve_metadata_space(root, 3);
+ }
++ return ret;
+
++stop_trans:
++ btrfs_end_transaction(trans, root);
+ return ret;
++
+ }
+
+ static long btrfs_fallocate(struct inode *inode, int mode,
+@@ -5705,8 +5834,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
+ u64 locked_end;
+ u64 mask = BTRFS_I(inode)->root->sectorsize - 1;
+ struct extent_map *em;
+- struct btrfs_trans_handle *trans;
+- struct btrfs_root *root;
+ int ret;
+
+ alloc_start = offset & ~mask;
+@@ -5725,9 +5852,7 @@ static long btrfs_fallocate(struct inode *inode, int mode,
+ goto out;
+ }
+
+- root = BTRFS_I(inode)->root;
+-
+- ret = btrfs_check_data_free_space(root, inode,
++ ret = btrfs_check_data_free_space(BTRFS_I(inode)->root, inode,
+ alloc_end - alloc_start);
+ if (ret)
+ goto out;
+@@ -5736,12 +5861,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
+ while (1) {
+ struct btrfs_ordered_extent *ordered;
+
+- trans = btrfs_start_transaction(BTRFS_I(inode)->root, 1);
+- if (!trans) {
+- ret = -EIO;
+- goto out_free;
+- }
+-
+ /* the extent lock is ordered inside the running
+ * transaction
+ */
+@@ -5755,8 +5874,6 @@ static long btrfs_fallocate(struct inode *inode, int mode,
+ btrfs_put_ordered_extent(ordered);
+ unlock_extent(&BTRFS_I(inode)->io_tree,
+ alloc_start, locked_end, GFP_NOFS);
+- btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+-
+ /*
+ * we can't wait on the range with the transaction
+ * running or with the extent lock held
+@@ -5777,10 +5894,12 @@ static long btrfs_fallocate(struct inode *inode, int mode,
+ BUG_ON(IS_ERR(em) || !em);
+ last_byte = min(extent_map_end(em), alloc_end);
+ last_byte = (last_byte + mask) & ~mask;
+- if (em->block_start == EXTENT_MAP_HOLE) {
+- ret = prealloc_file_range(trans, inode, cur_offset,
+- last_byte, locked_end + 1,
+- alloc_hint, mode);
++ if (em->block_start == EXTENT_MAP_HOLE ||
++ (cur_offset >= inode->i_size &&
++ !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
++ ret = prealloc_file_range(inode,
++ cur_offset, last_byte,
++ alloc_hint, mode, offset+len);
+ if (ret < 0) {
+ free_extent_map(em);
+ break;
+@@ -5799,9 +5918,8 @@ static long btrfs_fallocate(struct inode *inode, int mode,
+ unlock_extent(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
+ GFP_NOFS);
+
+- btrfs_end_transaction(trans, BTRFS_I(inode)->root);
+-out_free:
+- btrfs_free_reserved_data_space(root, inode, alloc_end - alloc_start);
++ btrfs_free_reserved_data_space(BTRFS_I(inode)->root, inode,
++ alloc_end - alloc_start);
+ out:
+ mutex_unlock(&inode->i_mutex);
+ return ret;
+diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
+index b9840fa..0bc5776 100644
+--- a/fs/btrfs/ioctl.c
++++ b/fs/btrfs/ioctl.c
+@@ -237,7 +237,6 @@ static noinline int create_subvol(struct btrfs_root *root,
+ u64 objectid;
+ u64 new_dirid = BTRFS_FIRST_FREE_OBJECTID;
+ u64 index = 0;
+- unsigned long nr = 1;
+
+ /*
+ * 1 - inode item
+@@ -290,7 +289,7 @@ static noinline int create_subvol(struct btrfs_root *root,
+ btrfs_set_root_generation(&root_item, trans->transid);
+ btrfs_set_root_level(&root_item, 0);
+ btrfs_set_root_refs(&root_item, 1);
+- btrfs_set_root_used(&root_item, 0);
++ btrfs_set_root_used(&root_item, leaf->len);
+ btrfs_set_root_last_snapshot(&root_item, 0);
+
+ memset(&root_item.drop_progress, 0, sizeof(root_item.drop_progress));
+@@ -342,24 +341,21 @@ static noinline int create_subvol(struct btrfs_root *root,
+
+ d_instantiate(dentry, btrfs_lookup_dentry(dir, dentry));
+ fail:
+- nr = trans->blocks_used;
+ err = btrfs_commit_transaction(trans, root);
+ if (err && !ret)
+ ret = err;
+
+ btrfs_unreserve_metadata_space(root, 6);
+- btrfs_btree_balance_dirty(root, nr);
+ return ret;
+ }
+
+ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+ char *name, int namelen)
+ {
++ struct inode *inode;
+ struct btrfs_pending_snapshot *pending_snapshot;
+ struct btrfs_trans_handle *trans;
+- int ret = 0;
+- int err;
+- unsigned long nr = 0;
++ int ret;
+
+ if (!root->ref_cows)
+ return -EINVAL;
+@@ -372,20 +368,20 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+ */
+ ret = btrfs_reserve_metadata_space(root, 6);
+ if (ret)
+- goto fail_unlock;
++ goto fail;
+
+ pending_snapshot = kzalloc(sizeof(*pending_snapshot), GFP_NOFS);
+ if (!pending_snapshot) {
+ ret = -ENOMEM;
+ btrfs_unreserve_metadata_space(root, 6);
+- goto fail_unlock;
++ goto fail;
+ }
+ pending_snapshot->name = kmalloc(namelen + 1, GFP_NOFS);
+ if (!pending_snapshot->name) {
+ ret = -ENOMEM;
+ kfree(pending_snapshot);
+ btrfs_unreserve_metadata_space(root, 6);
+- goto fail_unlock;
++ goto fail;
+ }
+ memcpy(pending_snapshot->name, name, namelen);
+ pending_snapshot->name[namelen] = '\0';
+@@ -395,10 +391,19 @@ static int create_snapshot(struct btrfs_root *root, struct dentry *dentry,
+ pending_snapshot->root = root;
+ list_add(&pending_snapshot->list,
+ &trans->transaction->pending_snapshots);
+- err = btrfs_commit_transaction(trans, root);
++ ret = btrfs_commit_transaction(trans, root);
++ BUG_ON(ret);
++ btrfs_unreserve_metadata_space(root, 6);
+
+-fail_unlock:
+- btrfs_btree_balance_dirty(root, nr);
++ inode = btrfs_lookup_dentry(dentry->d_parent->d_inode, dentry);
++ if (IS_ERR(inode)) {
++ ret = PTR_ERR(inode);
++ goto fail;
++ }
++ BUG_ON(!inode);
++ d_instantiate(dentry, inode);
++ ret = 0;
++fail:
+ return ret;
+ }
+
+@@ -1032,8 +1037,7 @@ static noinline long btrfs_ioctl_clone(struct file *file, unsigned long srcfd,
+ BUG_ON(!trans);
+
+ /* punch hole in destination first */
+- btrfs_drop_extents(trans, root, inode, off, off + len,
+- off + len, 0, &hint_byte, 1);
++ btrfs_drop_extents(trans, inode, off, off + len, &hint_byte, 1);
+
+ /* clone data */
+ key.objectid = src->i_ino;
+diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
+index 5799bc4..5c2a9e7 100644
+--- a/fs/btrfs/ordered-data.c
++++ b/fs/btrfs/ordered-data.c
+@@ -291,16 +291,16 @@ int btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry)
+
+ /*
+ * remove an ordered extent from the tree. No references are dropped
+- * but, anyone waiting on this extent is woken up.
++ * and you must wake_up entry->wait. You must hold the tree mutex
++ * while you call this function.
+ */
+-int btrfs_remove_ordered_extent(struct inode *inode,
++static int __btrfs_remove_ordered_extent(struct inode *inode,
+ struct btrfs_ordered_extent *entry)
+ {
+ struct btrfs_ordered_inode_tree *tree;
+ struct rb_node *node;
+
+ tree = &BTRFS_I(inode)->ordered_tree;
+- mutex_lock(&tree->mutex);
+ node = &entry->rb_node;
+ rb_erase(node, &tree->tree);
+ tree->last = NULL;
+@@ -326,16 +326,34 @@ int btrfs_remove_ordered_extent(struct inode *inode,
+ }
+ spin_unlock(&BTRFS_I(inode)->root->fs_info->ordered_extent_lock);
+
++ return 0;
++}
++
++/*
++ * remove an ordered extent from the tree. No references are dropped
++ * but any waiters are woken.
++ */
++int btrfs_remove_ordered_extent(struct inode *inode,
++ struct btrfs_ordered_extent *entry)
++{
++ struct btrfs_ordered_inode_tree *tree;
++ int ret;
++
++ tree = &BTRFS_I(inode)->ordered_tree;
++ mutex_lock(&tree->mutex);
++ ret = __btrfs_remove_ordered_extent(inode, entry);
+ mutex_unlock(&tree->mutex);
+ wake_up(&entry->wait);
+- return 0;
++
++ return ret;
+ }
+
+ /*
+ * wait for all the ordered extents in a root. This is done when balancing
+ * space between drives.
+ */
+-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
++int btrfs_wait_ordered_extents(struct btrfs_root *root,
++ int nocow_only, int delay_iput)
+ {
+ struct list_head splice;
+ struct list_head *cur;
+@@ -372,7 +390,10 @@ int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only)
+ if (inode) {
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ btrfs_put_ordered_extent(ordered);
+- iput(inode);
++ if (delay_iput)
++ btrfs_add_delayed_iput(inode);
++ else
++ iput(inode);
+ } else {
+ btrfs_put_ordered_extent(ordered);
+ }
+@@ -430,7 +451,7 @@ again:
+ btrfs_wait_ordered_range(inode, 0, (u64)-1);
+ else
+ filemap_flush(inode->i_mapping);
+- iput(inode);
++ btrfs_add_delayed_iput(inode);
+ }
+
+ cond_resched();
+@@ -589,7 +610,7 @@ out:
+ * After an extent is done, call this to conditionally update the on disk
+ * i_size. i_size is updated to cover any fully written part of the file.
+ */
+-int btrfs_ordered_update_i_size(struct inode *inode,
++int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
+ struct btrfs_ordered_extent *ordered)
+ {
+ struct btrfs_ordered_inode_tree *tree = &BTRFS_I(inode)->ordered_tree;
+@@ -597,18 +618,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
+ u64 disk_i_size;
+ u64 new_i_size;
+ u64 i_size_test;
++ u64 i_size = i_size_read(inode);
+ struct rb_node *node;
++ struct rb_node *prev = NULL;
+ struct btrfs_ordered_extent *test;
++ int ret = 1;
++
++ if (ordered)
++ offset = entry_end(ordered);
++ else
++ offset = ALIGN(offset, BTRFS_I(inode)->root->sectorsize);
+
+ mutex_lock(&tree->mutex);
+ disk_i_size = BTRFS_I(inode)->disk_i_size;
+
++ /* truncate file */
++ if (disk_i_size > i_size) {
++ BTRFS_I(inode)->disk_i_size = i_size;
++ ret = 0;
++ goto out;
++ }
++
+ /*
+ * if the disk i_size is already at the inode->i_size, or
+ * this ordered extent is inside the disk i_size, we're done
+ */
+- if (disk_i_size >= inode->i_size ||
+- ordered->file_offset + ordered->len <= disk_i_size) {
++ if (disk_i_size == i_size || offset <= disk_i_size) {
+ goto out;
+ }
+
+@@ -616,8 +651,7 @@ int btrfs_ordered_update_i_size(struct inode *inode,
+ * we can't update the disk_isize if there are delalloc bytes
+ * between disk_i_size and this ordered extent
+ */
+- if (test_range_bit(io_tree, disk_i_size,
+- ordered->file_offset + ordered->len - 1,
++ if (test_range_bit(io_tree, disk_i_size, offset - 1,
+ EXTENT_DELALLOC, 0, NULL)) {
+ goto out;
+ }
+@@ -626,20 +660,32 @@ int btrfs_ordered_update_i_size(struct inode *inode,
+ * if we find an ordered extent then we can't update disk i_size
+ * yet
+ */
+- node = &ordered->rb_node;
+- while (1) {
+- node = rb_prev(node);
+- if (!node)
+- break;
++ if (ordered) {
++ node = rb_prev(&ordered->rb_node);
++ } else {
++ prev = tree_search(tree, offset);
++ /*
++ * we insert file extents without involving ordered struct,
++ * so there should be no ordered struct cover this offset
++ */
++ if (prev) {
++ test = rb_entry(prev, struct btrfs_ordered_extent,
++ rb_node);
++ BUG_ON(offset_in_entry(test, offset));
++ }
++ node = prev;
++ }
++ while (node) {
+ test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+ if (test->file_offset + test->len <= disk_i_size)
+ break;
+- if (test->file_offset >= inode->i_size)
++ if (test->file_offset >= i_size)
+ break;
+ if (test->file_offset >= disk_i_size)
+ goto out;
++ node = rb_prev(node);
+ }
+- new_i_size = min_t(u64, entry_end(ordered), i_size_read(inode));
++ new_i_size = min_t(u64, offset, i_size);
+
+ /*
+ * at this point, we know we can safely update i_size to at least
+@@ -647,7 +693,14 @@ int btrfs_ordered_update_i_size(struct inode *inode,
+ * walk forward and see if ios from higher up in the file have
+ * finished.
+ */
+- node = rb_next(&ordered->rb_node);
++ if (ordered) {
++ node = rb_next(&ordered->rb_node);
++ } else {
++ if (prev)
++ node = rb_next(prev);
++ else
++ node = rb_first(&tree->tree);
++ }
+ i_size_test = 0;
+ if (node) {
+ /*
+@@ -655,10 +708,10 @@ int btrfs_ordered_update_i_size(struct inode *inode,
+ * between our ordered extent and the next one.
+ */
+ test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
+- if (test->file_offset > entry_end(ordered))
++ if (test->file_offset > offset)
+ i_size_test = test->file_offset;
+ } else {
+- i_size_test = i_size_read(inode);
++ i_size_test = i_size;
+ }
+
+ /*
+@@ -667,15 +720,25 @@ int btrfs_ordered_update_i_size(struct inode *inode,
+ * are no delalloc bytes in this area, it is safe to update
+ * disk_i_size to the end of the region.
+ */
+- if (i_size_test > entry_end(ordered) &&
+- !test_range_bit(io_tree, entry_end(ordered), i_size_test - 1,
+- EXTENT_DELALLOC, 0, NULL)) {
+- new_i_size = min_t(u64, i_size_test, i_size_read(inode));
++ if (i_size_test > offset &&
++ !test_range_bit(io_tree, offset, i_size_test - 1,
++ EXTENT_DELALLOC, 0, NULL)) {
++ new_i_size = min_t(u64, i_size_test, i_size);
+ }
+ BTRFS_I(inode)->disk_i_size = new_i_size;
++ ret = 0;
+ out:
++ /*
++ * we need to remove the ordered extent with the tree lock held
++ * so that other people calling this function don't find our fully
++ * processed ordered entry and skip updating the i_size
++ */
++ if (ordered)
++ __btrfs_remove_ordered_extent(inode, ordered);
+ mutex_unlock(&tree->mutex);
+- return 0;
++ if (ordered)
++ wake_up(&ordered->wait);
++ return ret;
+ }
+
+ /*
+diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
+index f82e874..1fe1282 100644
+--- a/fs/btrfs/ordered-data.h
++++ b/fs/btrfs/ordered-data.h
+@@ -150,12 +150,13 @@ void btrfs_start_ordered_extent(struct inode *inode,
+ int btrfs_wait_ordered_range(struct inode *inode, u64 start, u64 len);
+ struct btrfs_ordered_extent *
+ btrfs_lookup_first_ordered_extent(struct inode * inode, u64 file_offset);
+-int btrfs_ordered_update_i_size(struct inode *inode,
++int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
+ struct btrfs_ordered_extent *ordered);
+ int btrfs_find_ordered_sum(struct inode *inode, u64 offset, u64 disk_bytenr, u32 *sum);
+-int btrfs_wait_ordered_extents(struct btrfs_root *root, int nocow_only);
+ int btrfs_run_ordered_operations(struct btrfs_root *root, int wait);
+ int btrfs_add_ordered_operation(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root,
+ struct inode *inode);
++int btrfs_wait_ordered_extents(struct btrfs_root *root,
++ int nocow_only, int delay_iput);
+ #endif
+diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
+index cfcc93c..ab7ab53 100644
+--- a/fs/btrfs/relocation.c
++++ b/fs/btrfs/relocation.c
+@@ -1561,6 +1561,20 @@ static int invalidate_extent_cache(struct btrfs_root *root,
+ return 0;
+ }
+
++static void put_inodes(struct list_head *list)
++{
++ struct inodevec *ivec;
++ while (!list_empty(list)) {
++ ivec = list_entry(list->next, struct inodevec, list);
++ list_del(&ivec->list);
++ while (ivec->nr > 0) {
++ ivec->nr--;
++ iput(ivec->inode[ivec->nr]);
++ }
++ kfree(ivec);
++ }
++}
++
+ static int find_next_key(struct btrfs_path *path, int level,
+ struct btrfs_key *key)
+
+@@ -1723,6 +1737,11 @@ static noinline_for_stack int merge_reloc_root(struct reloc_control *rc,
+
+ btrfs_btree_balance_dirty(root, nr);
+
++ /*
++ * put inodes outside transaction, otherwise we may deadlock.
++ */
++ put_inodes(&inode_list);
++
+ if (replaced && rc->stage == UPDATE_DATA_PTRS)
+ invalidate_extent_cache(root, &key, &next_key);
+ }
+@@ -1752,19 +1771,7 @@ out:
+
+ btrfs_btree_balance_dirty(root, nr);
+
+- /*
+- * put inodes while we aren't holding the tree locks
+- */
+- while (!list_empty(&inode_list)) {
+- struct inodevec *ivec;
+- ivec = list_entry(inode_list.next, struct inodevec, list);
+- list_del(&ivec->list);
+- while (ivec->nr > 0) {
+- ivec->nr--;
+- iput(ivec->inode[ivec->nr]);
+- }
+- kfree(ivec);
+- }
++ put_inodes(&inode_list);
+
+ if (replaced && rc->stage == UPDATE_DATA_PTRS)
+ invalidate_extent_cache(root, &key, &next_key);
+@@ -3274,8 +3281,10 @@ static noinline_for_stack int relocate_block_group(struct reloc_control *rc)
+ return -ENOMEM;
+
+ path = btrfs_alloc_path();
+- if (!path)
++ if (!path) {
++ kfree(cluster);
+ return -ENOMEM;
++ }
+
+ rc->extents_found = 0;
+ rc->extents_skipped = 0;
+@@ -3534,8 +3543,8 @@ int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
+ (unsigned long long)rc->block_group->key.objectid,
+ (unsigned long long)rc->block_group->flags);
+
+- btrfs_start_delalloc_inodes(fs_info->tree_root);
+- btrfs_wait_ordered_extents(fs_info->tree_root, 0);
++ btrfs_start_delalloc_inodes(fs_info->tree_root, 0);
++ btrfs_wait_ordered_extents(fs_info->tree_root, 0, 0);
+
+ while (1) {
+ rc->extents_found = 0;
+@@ -3755,6 +3764,8 @@ out:
+ BTRFS_DATA_RELOC_TREE_OBJECTID);
+ if (IS_ERR(fs_root))
+ err = PTR_ERR(fs_root);
++ else
++ btrfs_orphan_cleanup(fs_root);
+ }
+ return err;
+ }
+diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
+index 752a546..a649305 100644
+--- a/fs/btrfs/super.c
++++ b/fs/btrfs/super.c
+@@ -126,8 +126,9 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
+ {
+ struct btrfs_fs_info *info = root->fs_info;
+ substring_t args[MAX_OPT_ARGS];
+- char *p, *num;
++ char *p, *num, *orig;
+ int intarg;
++ int ret = 0;
+
+ if (!options)
+ return 0;
+@@ -140,6 +141,7 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
+ if (!options)
+ return -ENOMEM;
+
++ orig = options;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+@@ -262,12 +264,18 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
+ case Opt_discard:
+ btrfs_set_opt(info->mount_opt, DISCARD);
+ break;
++ case Opt_err:
++ printk(KERN_INFO "btrfs: unrecognized mount option "
++ "'%s'\n", p);
++ ret = -EINVAL;
++ goto out;
+ default:
+ break;
+ }
+ }
+- kfree(options);
+- return 0;
++out:
++ kfree(orig);
++ return ret;
+ }
+
+ /*
+@@ -405,8 +413,8 @@ int btrfs_sync_fs(struct super_block *sb, int wait)
+ return 0;
+ }
+
+- btrfs_start_delalloc_inodes(root);
+- btrfs_wait_ordered_extents(root, 0);
++ btrfs_start_delalloc_inodes(root, 0);
++ btrfs_wait_ordered_extents(root, 0, 0);
+
+ trans = btrfs_start_transaction(root, 1);
+ ret = btrfs_commit_transaction(trans, root);
+@@ -450,6 +458,8 @@ static int btrfs_show_options(struct seq_file *seq, struct vfsmount *vfs)
+ seq_puts(seq, ",notreelog");
+ if (btrfs_test_opt(root, FLUSHONCOMMIT))
+ seq_puts(seq, ",flushoncommit");
++ if (btrfs_test_opt(root, DISCARD))
++ seq_puts(seq, ",discard");
+ if (!(root->fs_info->sb->s_flags & MS_POSIXACL))
+ seq_puts(seq, ",noacl");
+ return 0;
+diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
+index c207e8c..b2acc79 100644
+--- a/fs/btrfs/transaction.c
++++ b/fs/btrfs/transaction.c
+@@ -333,6 +333,9 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans,
+ memset(trans, 0, sizeof(*trans));
+ kmem_cache_free(btrfs_trans_handle_cachep, trans);
+
++ if (throttle)
++ btrfs_run_delayed_iputs(root);
++
+ return 0;
+ }
+
+@@ -354,7 +357,7 @@ int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
+ * those extents are sent to disk but does not wait on them
+ */
+ int btrfs_write_marked_extents(struct btrfs_root *root,
+- struct extent_io_tree *dirty_pages)
++ struct extent_io_tree *dirty_pages, int mark)
+ {
+ int ret;
+ int err = 0;
+@@ -367,7 +370,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
+
+ while (1) {
+ ret = find_first_extent_bit(dirty_pages, start, &start, &end,
+- EXTENT_DIRTY);
++ mark);
+ if (ret)
+ break;
+ while (start <= end) {
+@@ -413,7 +416,7 @@ int btrfs_write_marked_extents(struct btrfs_root *root,
+ * on all the pages and clear them from the dirty pages state tree
+ */
+ int btrfs_wait_marked_extents(struct btrfs_root *root,
+- struct extent_io_tree *dirty_pages)
++ struct extent_io_tree *dirty_pages, int mark)
+ {
+ int ret;
+ int err = 0;
+@@ -425,12 +428,12 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
+ unsigned long index;
+
+ while (1) {
+- ret = find_first_extent_bit(dirty_pages, 0, &start, &end,
+- EXTENT_DIRTY);
++ ret = find_first_extent_bit(dirty_pages, start, &start, &end,
++ mark);
+ if (ret)
+ break;
+
+- clear_extent_dirty(dirty_pages, start, end, GFP_NOFS);
++ clear_extent_bits(dirty_pages, start, end, mark, GFP_NOFS);
+ while (start <= end) {
+ index = start >> PAGE_CACHE_SHIFT;
+ start = (u64)(index + 1) << PAGE_CACHE_SHIFT;
+@@ -460,13 +463,13 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
+ * those extents are on disk for transaction or log commit
+ */
+ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
+- struct extent_io_tree *dirty_pages)
++ struct extent_io_tree *dirty_pages, int mark)
+ {
+ int ret;
+ int ret2;
+
+- ret = btrfs_write_marked_extents(root, dirty_pages);
+- ret2 = btrfs_wait_marked_extents(root, dirty_pages);
++ ret = btrfs_write_marked_extents(root, dirty_pages, mark);
++ ret2 = btrfs_wait_marked_extents(root, dirty_pages, mark);
+ return ret || ret2;
+ }
+
+@@ -479,7 +482,8 @@ int btrfs_write_and_wait_transaction(struct btrfs_trans_handle *trans,
+ return filemap_write_and_wait(btree_inode->i_mapping);
+ }
+ return btrfs_write_and_wait_marked_extents(root,
+- &trans->transaction->dirty_pages);
++ &trans->transaction->dirty_pages,
++ EXTENT_DIRTY);
+ }
+
+ /*
+@@ -497,13 +501,16 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
+ {
+ int ret;
+ u64 old_root_bytenr;
++ u64 old_root_used;
+ struct btrfs_root *tree_root = root->fs_info->tree_root;
+
++ old_root_used = btrfs_root_used(&root->root_item);
+ btrfs_write_dirty_block_groups(trans, root);
+
+ while (1) {
+ old_root_bytenr = btrfs_root_bytenr(&root->root_item);
+- if (old_root_bytenr == root->node->start)
++ if (old_root_bytenr == root->node->start &&
++ old_root_used == btrfs_root_used(&root->root_item))
+ break;
+
+ btrfs_set_root_node(&root->root_item, root->node);
+@@ -512,6 +519,7 @@ static int update_cowonly_root(struct btrfs_trans_handle *trans,
+ &root->root_item);
+ BUG_ON(ret);
+
++ old_root_used = btrfs_root_used(&root->root_item);
+ ret = btrfs_write_dirty_block_groups(trans, root);
+ BUG_ON(ret);
+ }
+@@ -795,7 +803,6 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
+ memcpy(&pending->root_key, &key, sizeof(key));
+ fail:
+ kfree(new_root_item);
+- btrfs_unreserve_metadata_space(root, 6);
+ return ret;
+ }
+
+@@ -807,7 +814,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
+ u64 index = 0;
+ struct btrfs_trans_handle *trans;
+ struct inode *parent_inode;
+- struct inode *inode;
+ struct btrfs_root *parent_root;
+
+ parent_inode = pending->dentry->d_parent->d_inode;
+@@ -839,8 +845,6 @@ static noinline int finish_pending_snapshot(struct btrfs_fs_info *fs_info,
+
+ BUG_ON(ret);
+
+- inode = btrfs_lookup_dentry(parent_inode, pending->dentry);
+- d_instantiate(pending->dentry, inode);
+ fail:
+ btrfs_end_transaction(trans, fs_info->fs_root);
+ return ret;
+@@ -994,11 +998,11 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
+ mutex_unlock(&root->fs_info->trans_mutex);
+
+ if (flush_on_commit) {
+- btrfs_start_delalloc_inodes(root);
+- ret = btrfs_wait_ordered_extents(root, 0);
++ btrfs_start_delalloc_inodes(root, 1);
++ ret = btrfs_wait_ordered_extents(root, 0, 1);
+ BUG_ON(ret);
+ } else if (snap_pending) {
+- ret = btrfs_wait_ordered_extents(root, 1);
++ ret = btrfs_wait_ordered_extents(root, 0, 1);
+ BUG_ON(ret);
+ }
+
+@@ -1116,6 +1120,10 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans,
+ current->journal_info = NULL;
+
+ kmem_cache_free(btrfs_trans_handle_cachep, trans);
++
++ if (current != root->fs_info->transaction_kthread)
++ btrfs_run_delayed_iputs(root);
++
+ return ret;
+ }
+
+diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
+index d4e3e7a..93c7ccb 100644
+--- a/fs/btrfs/transaction.h
++++ b/fs/btrfs/transaction.h
+@@ -107,10 +107,10 @@ void btrfs_throttle(struct btrfs_root *root);
+ int btrfs_record_root_in_trans(struct btrfs_trans_handle *trans,
+ struct btrfs_root *root);
+ int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
+- struct extent_io_tree *dirty_pages);
++ struct extent_io_tree *dirty_pages, int mark);
+ int btrfs_write_marked_extents(struct btrfs_root *root,
+- struct extent_io_tree *dirty_pages);
++ struct extent_io_tree *dirty_pages, int mark);
+ int btrfs_wait_marked_extents(struct btrfs_root *root,
+- struct extent_io_tree *dirty_pages);
++ struct extent_io_tree *dirty_pages, int mark);
+ int btrfs_transaction_in_commit(struct btrfs_fs_info *info);
+ #endif
+diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
+index 741666a..4a9434b 100644
+--- a/fs/btrfs/tree-log.c
++++ b/fs/btrfs/tree-log.c
+@@ -542,8 +542,8 @@ static noinline int replay_one_extent(struct btrfs_trans_handle *trans,
+
+ saved_nbytes = inode_get_bytes(inode);
+ /* drop any overlapping extents */
+- ret = btrfs_drop_extents(trans, root, inode,
+- start, extent_end, extent_end, start, &alloc_hint, 1);
++ ret = btrfs_drop_extents(trans, inode, start, extent_end,
++ &alloc_hint, 1);
+ BUG_ON(ret);
+
+ if (found_type == BTRFS_FILE_EXTENT_REG ||
+@@ -930,6 +930,17 @@ out_nowrite:
+ return 0;
+ }
+
++static int insert_orphan_item(struct btrfs_trans_handle *trans,
++ struct btrfs_root *root, u64 offset)
++{
++ int ret;
++ ret = btrfs_find_orphan_item(root, offset);
++ if (ret > 0)
++ ret = btrfs_insert_orphan_item(trans, root, offset);
++ return ret;
++}
++
++
+ /*
+ * There are a few corners where the link count of the file can't
+ * be properly maintained during replay. So, instead of adding
+@@ -997,9 +1008,13 @@ static noinline int fixup_inode_link_count(struct btrfs_trans_handle *trans,
+ }
+ BTRFS_I(inode)->index_cnt = (u64)-1;
+
+- if (inode->i_nlink == 0 && S_ISDIR(inode->i_mode)) {
+- ret = replay_dir_deletes(trans, root, NULL, path,
+- inode->i_ino, 1);
++ if (inode->i_nlink == 0) {
++ if (S_ISDIR(inode->i_mode)) {
++ ret = replay_dir_deletes(trans, root, NULL, path,
++ inode->i_ino, 1);
++ BUG_ON(ret);
++ }
++ ret = insert_orphan_item(trans, root, inode->i_ino);
+ BUG_ON(ret);
+ }
+ btrfs_free_path(path);
+@@ -1587,7 +1602,6 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
+ /* inode keys are done during the first stage */
+ if (key.type == BTRFS_INODE_ITEM_KEY &&
+ wc->stage == LOG_WALK_REPLAY_INODES) {
+- struct inode *inode;
+ struct btrfs_inode_item *inode_item;
+ u32 mode;
+
+@@ -1603,31 +1617,16 @@ static int replay_one_buffer(struct btrfs_root *log, struct extent_buffer *eb,
+ eb, i, &key);
+ BUG_ON(ret);
+
+- /* for regular files, truncate away
+- * extents past the new EOF
++ /* for regular files, make sure corresponding
++ * orhpan item exist. extents past the new EOF
++ * will be truncated later by orphan cleanup.
+ */
+ if (S_ISREG(mode)) {
+- inode = read_one_inode(root,
+- key.objectid);
+- BUG_ON(!inode);
+-
+- ret = btrfs_truncate_inode_items(wc->trans,
+- root, inode, inode->i_size,
+- BTRFS_EXTENT_DATA_KEY);
++ ret = insert_orphan_item(wc->trans, root,
++ key.objectid);
+ BUG_ON(ret);
+-
+- /* if the nlink count is zero here, the iput
+- * will free the inode. We bump it to make
+- * sure it doesn't get freed until the link
+- * count fixup is done
+- */
+- if (inode->i_nlink == 0) {
+- btrfs_inc_nlink(inode);
+- btrfs_update_inode(wc->trans,
+- root, inode);
+- }
+- iput(inode);
+ }
++
+ ret = link_to_fixup_dir(wc->trans, root,
+ path, key.objectid);
+ BUG_ON(ret);
+@@ -1977,10 +1976,11 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+ {
+ int index1;
+ int index2;
++ int mark;
+ int ret;
+ struct btrfs_root *log = root->log_root;
+ struct btrfs_root *log_root_tree = root->fs_info->log_root_tree;
+- u64 log_transid = 0;
++ unsigned long log_transid = 0;
+
+ mutex_lock(&root->log_mutex);
+ index1 = root->log_transid % 2;
+@@ -2014,24 +2014,29 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+ goto out;
+ }
+
++ log_transid = root->log_transid;
++ if (log_transid % 2 == 0)
++ mark = EXTENT_DIRTY;
++ else
++ mark = EXTENT_NEW;
++
+ /* we start IO on all the marked extents here, but we don't actually
+ * wait for them until later.
+ */
+- ret = btrfs_write_marked_extents(log, &log->dirty_log_pages);
++ ret = btrfs_write_marked_extents(log, &log->dirty_log_pages, mark);
+ BUG_ON(ret);
+
+ btrfs_set_root_node(&log->root_item, log->node);
+
+ root->log_batch = 0;
+- log_transid = root->log_transid;
+ root->log_transid++;
+ log->log_transid = root->log_transid;
+ root->log_start_pid = 0;
+ smp_mb();
+ /*
+- * log tree has been flushed to disk, new modifications of
+- * the log will be written to new positions. so it's safe to
+- * allow log writers to go in.
++ * IO has been started, blocks of the log tree have WRITTEN flag set
++ * in their headers. new modifications of the log will be written to
++ * new positions. so it's safe to allow log writers to go in.
+ */
+ mutex_unlock(&root->log_mutex);
+
+@@ -2052,7 +2057,7 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+
+ index2 = log_root_tree->log_transid % 2;
+ if (atomic_read(&log_root_tree->log_commit[index2])) {
+- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
++ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ wait_log_commit(trans, log_root_tree,
+ log_root_tree->log_transid);
+ mutex_unlock(&log_root_tree->log_mutex);
+@@ -2072,16 +2077,17 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
+ * check the full commit flag again
+ */
+ if (root->fs_info->last_trans_log_full_commit == trans->transid) {
+- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
++ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+ mutex_unlock(&log_root_tree->log_mutex);
+ ret = -EAGAIN;
+ goto out_wake_log_root;
+ }
+
+ ret = btrfs_write_and_wait_marked_extents(log_root_tree,
+- &log_root_tree->dirty_log_pages);
++ &log_root_tree->dirty_log_pages,
++ EXTENT_DIRTY | EXTENT_NEW);
+ BUG_ON(ret);
+- btrfs_wait_marked_extents(log, &log->dirty_log_pages);
++ btrfs_wait_marked_extents(log, &log->dirty_log_pages, mark);
+
+ btrfs_set_super_log_root(&root->fs_info->super_for_commit,
+ log_root_tree->node->start);
+@@ -2147,12 +2153,12 @@ int btrfs_free_log(struct btrfs_trans_handle *trans, struct btrfs_root *root)
+
+ while (1) {
+ ret = find_first_extent_bit(&log->dirty_log_pages,
+- 0, &start, &end, EXTENT_DIRTY);
++ 0, &start, &end, EXTENT_DIRTY | EXTENT_NEW);
+ if (ret)
+ break;
+
+- clear_extent_dirty(&log->dirty_log_pages,
+- start, end, GFP_NOFS);
++ clear_extent_bits(&log->dirty_log_pages, start, end,
++ EXTENT_DIRTY | EXTENT_NEW, GFP_NOFS);
+ }
+
+ if (log->log_transid > 0) {
+diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
+index 7eda483..41ecbb2 100644
+--- a/fs/btrfs/volumes.c
++++ b/fs/btrfs/volumes.c
+@@ -1135,7 +1135,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
+ root->fs_info->avail_metadata_alloc_bits;
+
+ if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) &&
+- root->fs_info->fs_devices->rw_devices <= 4) {
++ root->fs_info->fs_devices->num_devices <= 4) {
+ printk(KERN_ERR "btrfs: unable to go below four devices "
+ "on raid10\n");
+ ret = -EINVAL;
+@@ -1143,7 +1143,7 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
+ }
+
+ if ((all_avail & BTRFS_BLOCK_GROUP_RAID1) &&
+- root->fs_info->fs_devices->rw_devices <= 2) {
++ root->fs_info->fs_devices->num_devices <= 2) {
+ printk(KERN_ERR "btrfs: unable to go below two "
+ "devices on raid1\n");
+ ret = -EINVAL;
+@@ -1434,8 +1434,8 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
+ return -EINVAL;
+
+ bdev = open_bdev_exclusive(device_path, 0, root->fs_info->bdev_holder);
+- if (!bdev)
+- return -EIO;
++ if (IS_ERR(bdev))
++ return PTR_ERR(bdev);
+
+ if (root->fs_info->fs_devices->seeding) {
+ seeding_dev = 1;
+@@ -2209,7 +2209,7 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans,
+ max_chunk_size = 10 * calc_size;
+ min_stripe_size = 64 * 1024 * 1024;
+ } else if (type & BTRFS_BLOCK_GROUP_METADATA) {
+- max_chunk_size = 4 * calc_size;
++ max_chunk_size = 256 * 1024 * 1024;
+ min_stripe_size = 32 * 1024 * 1024;
+ } else if (type & BTRFS_BLOCK_GROUP_SYSTEM) {
+ calc_size = 8 * 1024 * 1024;
+@@ -2538,6 +2538,11 @@ int btrfs_chunk_readonly(struct btrfs_root *root, u64 chunk_offset)
+ if (!em)
+ return 1;
+
++ if (btrfs_test_opt(root, DEGRADED)) {
++ free_extent_map(em);
++ return 0;
++ }
++
+ map = (struct map_lookup *)em->bdev;
+ for (i = 0; i < map->num_stripes; i++) {
+ if (!map->stripes[i].dev->writeable) {
+@@ -2649,8 +2654,10 @@ again:
+ em = lookup_extent_mapping(em_tree, logical, *length);
+ read_unlock(&em_tree->lock);
+
+- if (!em && unplug_page)
++ if (!em && unplug_page) {
++ kfree(multi);
+ return 0;
++ }
+
+ if (!em) {
+ printk(KERN_CRIT "unable to find logical %llu len %llu\n",
+diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
+index b6dd596..193b58f 100644
+--- a/fs/btrfs/xattr.c
++++ b/fs/btrfs/xattr.c
+@@ -85,22 +85,23 @@ out:
+ return ret;
+ }
+
+-int __btrfs_setxattr(struct inode *inode, const char *name,
+- const void *value, size_t size, int flags)
++static int do_setxattr(struct btrfs_trans_handle *trans,
++ struct inode *inode, const char *name,
++ const void *value, size_t size, int flags)
+ {
+ struct btrfs_dir_item *di;
+ struct btrfs_root *root = BTRFS_I(inode)->root;
+- struct btrfs_trans_handle *trans;
+ struct btrfs_path *path;
+- int ret = 0, mod = 0;
++ size_t name_len = strlen(name);
++ int ret = 0;
++
++ if (name_len + size > BTRFS_MAX_XATTR_SIZE(root))
++ return -ENOSPC;
+
+ path = btrfs_alloc_path();
+ if (!path)
+ return -ENOMEM;
+
+- trans = btrfs_join_transaction(root, 1);
+- btrfs_set_trans_block_group(trans, inode);
+-
+ /* first lets see if we already have this xattr */
+ di = btrfs_lookup_xattr(trans, root, path, inode->i_ino, name,
+ strlen(name), -1);
+@@ -118,15 +119,12 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
+ }
+
+ ret = btrfs_delete_one_dir_name(trans, root, path, di);
+- if (ret)
+- goto out;
++ BUG_ON(ret);
+ btrfs_release_path(root, path);
+
+ /* if we don't have a value then we are removing the xattr */
+- if (!value) {
+- mod = 1;
++ if (!value)
+ goto out;
+- }
+ } else {
+ btrfs_release_path(root, path);
+
+@@ -138,20 +136,45 @@ int __btrfs_setxattr(struct inode *inode, const char *name,
+ }
+
+ /* ok we have to create a completely new xattr */
+- ret = btrfs_insert_xattr_item(trans, root, name, strlen(name),
+- value, size, inode->i_ino);
++ ret = btrfs_insert_xattr_item(trans, root, path, inode->i_ino,
++ name, name_len, value, size);
++ BUG_ON(ret);
++out:
++ btrfs_free_path(path);
++ return ret;
++}
++
++int __btrfs_setxattr(struct btrfs_trans_handle *trans,
++ struct inode *inode, const char *name,
++ const void *value, size_t size, int flags)
++{
++ struct btrfs_root *root = BTRFS_I(inode)->root;
++ int ret;
++
++ if (trans)
++ return do_setxattr(trans, inode, name, value, size, flags);
++
++ ret = btrfs_reserve_metadata_space(root, 2);
+ if (ret)
+- goto out;
+- mod = 1;
++ return ret;
+
+-out:
+- if (mod) {
+- inode->i_ctime = CURRENT_TIME;
+- ret = btrfs_update_inode(trans, root, inode);
++ trans = btrfs_start_transaction(root, 1);
++ if (!trans) {
++ ret = -ENOMEM;
++ goto out;
+ }
++ btrfs_set_trans_block_group(trans, inode);
+
+- btrfs_end_transaction(trans, root);
+- btrfs_free_path(path);
++ ret = do_setxattr(trans, inode, name, value, size, flags);
++ if (ret)
++ goto out;
++
++ inode->i_ctime = CURRENT_TIME;
++ ret = btrfs_update_inode(trans, root, inode);
++ BUG_ON(ret);
++out:
++ btrfs_end_transaction_throttle(trans, root);
++ btrfs_unreserve_metadata_space(root, 2);
+ return ret;
+ }
+
+@@ -314,7 +337,9 @@ int btrfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+
+ if (size == 0)
+ value = ""; /* empty EA, do not remove */
+- return __btrfs_setxattr(dentry->d_inode, name, value, size, flags);
++
++ return __btrfs_setxattr(NULL, dentry->d_inode, name, value, size,
++ flags);
+ }
+
+ int btrfs_removexattr(struct dentry *dentry, const char *name)
+@@ -329,10 +354,13 @@ int btrfs_removexattr(struct dentry *dentry, const char *name)
+
+ if (!btrfs_is_valid_xattr(name))
+ return -EOPNOTSUPP;
+- return __btrfs_setxattr(dentry->d_inode, name, NULL, 0, XATTR_REPLACE);
++
++ return __btrfs_setxattr(NULL, dentry->d_inode, name, NULL, 0,
++ XATTR_REPLACE);
+ }
+
+-int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
++int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
++ struct inode *inode, struct inode *dir)
+ {
+ int err;
+ size_t len;
+@@ -354,7 +382,7 @@ int btrfs_xattr_security_init(struct inode *inode, struct inode *dir)
+ } else {
+ strcpy(name, XATTR_SECURITY_PREFIX);
+ strcpy(name + XATTR_SECURITY_PREFIX_LEN, suffix);
+- err = __btrfs_setxattr(inode, name, value, len, 0);
++ err = __btrfs_setxattr(trans, inode, name, value, len, 0);
+ kfree(name);
+ }
+
+diff --git a/fs/btrfs/xattr.h b/fs/btrfs/xattr.h
+index c71e9c3..721efa0 100644
+--- a/fs/btrfs/xattr.h
++++ b/fs/btrfs/xattr.h
+@@ -27,15 +27,16 @@ extern struct xattr_handler *btrfs_xattr_handlers[];
+
+ extern ssize_t __btrfs_getxattr(struct inode *inode, const char *name,
+ void *buffer, size_t size);
+-extern int __btrfs_setxattr(struct inode *inode, const char *name,
+- const void *value, size_t size, int flags);
+-
++extern int __btrfs_setxattr(struct btrfs_trans_handle *trans,
++ struct inode *inode, const char *name,
++ const void *value, size_t size, int flags);
+ extern ssize_t btrfs_getxattr(struct dentry *dentry, const char *name,
+ void *buffer, size_t size);
+ extern int btrfs_setxattr(struct dentry *dentry, const char *name,
+ const void *value, size_t size, int flags);
+ extern int btrfs_removexattr(struct dentry *dentry, const char *name);
+
+-extern int btrfs_xattr_security_init(struct inode *inode, struct inode *dir);
++extern int btrfs_xattr_security_init(struct btrfs_trans_handle *trans,
++ struct inode *inode, struct inode *dir);
+
+ #endif /* __XATTR__ */
+diff --git a/fs/dlm/ast.c b/fs/dlm/ast.c
+index dc2ad60..4314f0d 100644
+--- a/fs/dlm/ast.c
++++ b/fs/dlm/ast.c
+@@ -2,7 +2,7 @@
+ *******************************************************************************
+ **
+ ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
+-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
++** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+ **
+ ** This copyrighted material is made available to anyone wishing to use,
+ ** modify, copy, or redistribute it subject to the terms and conditions
+@@ -33,10 +33,10 @@ void dlm_del_ast(struct dlm_lkb *lkb)
+ spin_unlock(&ast_queue_lock);
+ }
+
+-void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
++void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode)
+ {
+ if (lkb->lkb_flags & DLM_IFL_USER) {
+- dlm_user_add_ast(lkb, type, bastmode);
++ dlm_user_add_ast(lkb, type, mode);
+ return;
+ }
+
+@@ -44,10 +44,21 @@ void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
+ if (!(lkb->lkb_ast_type & (AST_COMP | AST_BAST))) {
+ kref_get(&lkb->lkb_ref);
+ list_add_tail(&lkb->lkb_astqueue, &ast_queue);
++ lkb->lkb_ast_first = type;
+ }
++
++ /* sanity check, this should not happen */
++
++ if ((type == AST_COMP) && (lkb->lkb_ast_type & AST_COMP))
++ log_print("repeat cast %d castmode %d lock %x %s",
++ mode, lkb->lkb_castmode,
++ lkb->lkb_id, lkb->lkb_resource->res_name);
++
+ lkb->lkb_ast_type |= type;
+- if (bastmode)
+- lkb->lkb_bastmode = bastmode;
++ if (type == AST_BAST)
++ lkb->lkb_bastmode = mode;
++ else
++ lkb->lkb_castmode = mode;
+ spin_unlock(&ast_queue_lock);
+
+ set_bit(WAKE_ASTS, &astd_wakeflags);
+@@ -59,9 +70,9 @@ static void process_asts(void)
+ struct dlm_ls *ls = NULL;
+ struct dlm_rsb *r = NULL;
+ struct dlm_lkb *lkb;
+- void (*cast) (void *astparam);
+- void (*bast) (void *astparam, int mode);
+- int type = 0, bastmode;
++ void (*castfn) (void *astparam);
++ void (*bastfn) (void *astparam, int mode);
++ int type, first, bastmode, castmode, do_bast, do_cast, last_castmode;
+
+ repeat:
+ spin_lock(&ast_queue_lock);
+@@ -75,17 +86,48 @@ repeat:
+ list_del(&lkb->lkb_astqueue);
+ type = lkb->lkb_ast_type;
+ lkb->lkb_ast_type = 0;
++ first = lkb->lkb_ast_first;
++ lkb->lkb_ast_first = 0;
+ bastmode = lkb->lkb_bastmode;
+-
++ castmode = lkb->lkb_castmode;
++ castfn = lkb->lkb_astfn;
++ bastfn = lkb->lkb_bastfn;
+ spin_unlock(&ast_queue_lock);
+- cast = lkb->lkb_astfn;
+- bast = lkb->lkb_bastfn;
+-
+- if ((type & AST_COMP) && cast)
+- cast(lkb->lkb_astparam);
+
+- if ((type & AST_BAST) && bast)
+- bast(lkb->lkb_astparam, bastmode);
++ do_cast = (type & AST_COMP) && castfn;
++ do_bast = (type & AST_BAST) && bastfn;
++
++ /* Skip a bast if its blocking mode is compatible with the
++ granted mode of the preceding cast. */
++
++ if (do_bast) {
++ if (first == AST_COMP)
++ last_castmode = castmode;
++ else
++ last_castmode = lkb->lkb_castmode_done;
++ if (dlm_modes_compat(bastmode, last_castmode))
++ do_bast = 0;
++ }
++
++ if (first == AST_COMP) {
++ if (do_cast)
++ castfn(lkb->lkb_astparam);
++ if (do_bast)
++ bastfn(lkb->lkb_astparam, bastmode);
++ } else if (first == AST_BAST) {
++ if (do_bast)
++ bastfn(lkb->lkb_astparam, bastmode);
++ if (do_cast)
++ castfn(lkb->lkb_astparam);
++ } else {
++ log_error(ls, "bad ast_first %d ast_type %d",
++ first, type);
++ }
++
++ if (do_cast)
++ lkb->lkb_castmode_done = castmode;
++ if (do_bast)
++ lkb->lkb_bastmode_done = bastmode;
+
+ /* this removes the reference added by dlm_add_ast
+ and may result in the lkb being freed */
+diff --git a/fs/dlm/ast.h b/fs/dlm/ast.h
+index 1b5fc5f..bcb1aab 100644
+--- a/fs/dlm/ast.h
++++ b/fs/dlm/ast.h
+@@ -1,7 +1,7 @@
+ /******************************************************************************
+ *******************************************************************************
+ **
+-** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
++** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
+ **
+ ** This copyrighted material is made available to anyone wishing to use,
+ ** modify, copy, or redistribute it subject to the terms and conditions
+@@ -13,7 +13,7 @@
+ #ifndef __ASTD_DOT_H__
+ #define __ASTD_DOT_H__
+
+-void dlm_add_ast(struct dlm_lkb *lkb, int type, int bastmode);
++void dlm_add_ast(struct dlm_lkb *lkb, int type, int mode);
+ void dlm_del_ast(struct dlm_lkb *lkb);
+
+ void dlm_astd_wake(void);
+diff --git a/fs/dlm/config.c b/fs/dlm/config.c
+index fd9859f..0df2438 100644
+--- a/fs/dlm/config.c
++++ b/fs/dlm/config.c
+@@ -410,10 +410,10 @@ static struct config_group *make_cluster(struct config_group *g,
+ struct dlm_comms *cms = NULL;
+ void *gps = NULL;
+
+- cl = kzalloc(sizeof(struct dlm_cluster), GFP_KERNEL);
+- gps = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
+- sps = kzalloc(sizeof(struct dlm_spaces), GFP_KERNEL);
+- cms = kzalloc(sizeof(struct dlm_comms), GFP_KERNEL);
++ cl = kzalloc(sizeof(struct dlm_cluster), GFP_NOFS);
++ gps = kcalloc(3, sizeof(struct config_group *), GFP_NOFS);
++ sps = kzalloc(sizeof(struct dlm_spaces), GFP_NOFS);
++ cms = kzalloc(sizeof(struct dlm_comms), GFP_NOFS);
+
+ if (!cl || !gps || !sps || !cms)
+ goto fail;
+@@ -482,9 +482,9 @@ static struct config_group *make_space(struct config_group *g, const char *name)
+ struct dlm_nodes *nds = NULL;
+ void *gps = NULL;
+
+- sp = kzalloc(sizeof(struct dlm_space), GFP_KERNEL);
+- gps = kcalloc(2, sizeof(struct config_group *), GFP_KERNEL);
+- nds = kzalloc(sizeof(struct dlm_nodes), GFP_KERNEL);
++ sp = kzalloc(sizeof(struct dlm_space), GFP_NOFS);
++ gps = kcalloc(2, sizeof(struct config_group *), GFP_NOFS);
++ nds = kzalloc(sizeof(struct dlm_nodes), GFP_NOFS);
+
+ if (!sp || !gps || !nds)
+ goto fail;
+@@ -536,7 +536,7 @@ static struct config_item *make_comm(struct config_group *g, const char *name)
+ {
+ struct dlm_comm *cm;
+
+- cm = kzalloc(sizeof(struct dlm_comm), GFP_KERNEL);
++ cm = kzalloc(sizeof(struct dlm_comm), GFP_NOFS);
+ if (!cm)
+ return ERR_PTR(-ENOMEM);
+
+@@ -569,7 +569,7 @@ static struct config_item *make_node(struct config_group *g, const char *name)
+ struct dlm_space *sp = config_item_to_space(g->cg_item.ci_parent);
+ struct dlm_node *nd;
+
+- nd = kzalloc(sizeof(struct dlm_node), GFP_KERNEL);
++ nd = kzalloc(sizeof(struct dlm_node), GFP_NOFS);
+ if (!nd)
+ return ERR_PTR(-ENOMEM);
+
+@@ -705,7 +705,7 @@ static ssize_t comm_addr_write(struct dlm_comm *cm, const char *buf, size_t len)
+ if (cm->addr_count >= DLM_MAX_ADDR_COUNT)
+ return -ENOSPC;
+
+- addr = kzalloc(sizeof(*addr), GFP_KERNEL);
++ addr = kzalloc(sizeof(*addr), GFP_NOFS);
+ if (!addr)
+ return -ENOMEM;
+
+@@ -868,7 +868,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
+
+ ids_count = sp->members_count;
+
+- ids = kcalloc(ids_count, sizeof(int), GFP_KERNEL);
++ ids = kcalloc(ids_count, sizeof(int), GFP_NOFS);
+ if (!ids) {
+ rv = -ENOMEM;
+ goto out;
+@@ -886,7 +886,7 @@ int dlm_nodeid_list(char *lsname, int **ids_out, int *ids_count_out,
+ if (!new_count)
+ goto out_ids;
+
+- new = kcalloc(new_count, sizeof(int), GFP_KERNEL);
++ new = kcalloc(new_count, sizeof(int), GFP_NOFS);
+ if (!new) {
+ kfree(ids);
+ rv = -ENOMEM;
+diff --git a/fs/dlm/debug_fs.c b/fs/dlm/debug_fs.c
+index 1c8bb8c..375a235 100644
+--- a/fs/dlm/debug_fs.c
++++ b/fs/dlm/debug_fs.c
+@@ -404,7 +404,7 @@ static void *table_seq_start(struct seq_file *seq, loff_t *pos)
+ if (bucket >= ls->ls_rsbtbl_size)
+ return NULL;
+
+- ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_KERNEL);
++ ri = kzalloc(sizeof(struct rsbtbl_iter), GFP_NOFS);
+ if (!ri)
+ return NULL;
+ if (n == 0)
+diff --git a/fs/dlm/dir.c b/fs/dlm/dir.c
+index c4dfa1d..7b84c1d 100644
+--- a/fs/dlm/dir.c
++++ b/fs/dlm/dir.c
+@@ -49,8 +49,7 @@ static struct dlm_direntry *get_free_de(struct dlm_ls *ls, int len)
+ spin_unlock(&ls->ls_recover_list_lock);
+
+ if (!found)
+- de = kzalloc(sizeof(struct dlm_direntry) + len,
+- ls->ls_allocation);
++ de = kzalloc(sizeof(struct dlm_direntry) + len, GFP_NOFS);
+ return de;
+ }
+
+@@ -212,7 +211,7 @@ int dlm_recover_directory(struct dlm_ls *ls)
+
+ dlm_dir_clear(ls);
+
+- last_name = kmalloc(DLM_RESNAME_MAXLEN, ls->ls_allocation);
++ last_name = kmalloc(DLM_RESNAME_MAXLEN, GFP_NOFS);
+ if (!last_name)
+ goto out;
+
+@@ -323,7 +322,7 @@ static int get_entry(struct dlm_ls *ls, int nodeid, char *name,
+ if (namelen > DLM_RESNAME_MAXLEN)
+ return -EINVAL;
+
+- de = kzalloc(sizeof(struct dlm_direntry) + namelen, ls->ls_allocation);
++ de = kzalloc(sizeof(struct dlm_direntry) + namelen, GFP_NOFS);
+ if (!de)
+ return -ENOMEM;
+
+diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h
+index d01ca0a..f632b58 100644
+--- a/fs/dlm/dlm_internal.h
++++ b/fs/dlm/dlm_internal.h
+@@ -2,7 +2,7 @@
+ *******************************************************************************
+ **
+ ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved.
+-** Copyright (C) 2004-2008 Red Hat, Inc. All rights reserved.
++** Copyright (C) 2004-2010 Red Hat, Inc. All rights reserved.
+ **
+ ** This copyrighted material is made available to anyone wishing to use,
+ ** modify, copy, or redistribute it subject to the terms and conditions
+@@ -232,11 +232,17 @@ struct dlm_lkb {
+ int8_t lkb_status; /* granted, waiting, convert */
+ int8_t lkb_rqmode; /* requested lock mode */
+ int8_t lkb_grmode; /* granted lock mode */
+- int8_t lkb_bastmode; /* requested mode */
+ int8_t lkb_highbast; /* highest mode bast sent for */
++
+ int8_t lkb_wait_type; /* type of reply waiting for */
+ int8_t lkb_wait_count;
+ int8_t lkb_ast_type; /* type of ast queued for */
++ int8_t lkb_ast_first; /* type of first ast queued */
++
++ int8_t lkb_bastmode; /* req mode of queued bast */
++ int8_t lkb_castmode; /* gr mode of queued cast */
++ int8_t lkb_bastmode_done; /* last delivered bastmode */
++ int8_t lkb_castmode_done; /* last delivered castmode */
+
+ struct list_head lkb_idtbl_list; /* lockspace lkbtbl */
+ struct list_head lkb_statequeue; /* rsb g/c/w list */
+@@ -473,7 +479,6 @@ struct dlm_ls {
+ int ls_low_nodeid;
+ int ls_total_weight;
+ int *ls_node_array;
+- gfp_t ls_allocation;
+
+ struct dlm_rsb ls_stub_rsb; /* for returning errors */
+ struct dlm_lkb ls_stub_lkb; /* for returning errors */
+diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c
+index eb507c4..d0e43a3 100644
+--- a/fs/dlm/lock.c
++++ b/fs/dlm/lock.c
+@@ -1,7 +1,7 @@
+ /******************************************************************************
+ *******************************************************************************
+ **
+-** Copyright (C) 2005-2008 Red Hat, Inc. All rights reserved.
++** Copyright (C) 2005-2010 Red Hat, Inc. All rights reserved.
+ **
+ ** This copyrighted material is made available to anyone wishing to use,
+ ** modify, copy, or redistribute it subject to the terms and conditions
+@@ -307,7 +307,7 @@ static void queue_cast(struct dlm_rsb *r, struct dlm_lkb *lkb, int rv)
+ lkb->lkb_lksb->sb_status = rv;
+ lkb->lkb_lksb->sb_flags = lkb->lkb_sbflags;
+
+- dlm_add_ast(lkb, AST_COMP, 0);
++ dlm_add_ast(lkb, AST_COMP, lkb->lkb_grmode);
+ }
+
+ static inline void queue_cast_overlap(struct dlm_rsb *r, struct dlm_lkb *lkb)
+@@ -2280,20 +2280,30 @@ static int do_request(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ if (can_be_queued(lkb)) {
+ error = -EINPROGRESS;
+ add_lkb(r, lkb, DLM_LKSTS_WAITING);
+- send_blocking_asts(r, lkb);
+ add_timeout(lkb);
+ goto out;
+ }
+
+ error = -EAGAIN;
+- if (force_blocking_asts(lkb))
+- send_blocking_asts_all(r, lkb);
+ queue_cast(r, lkb, -EAGAIN);
+-
+ out:
+ return error;
+ }
+
++static void do_request_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
++ int error)
++{
++ switch (error) {
++ case -EAGAIN:
++ if (force_blocking_asts(lkb))
++ send_blocking_asts_all(r, lkb);
++ break;
++ case -EINPROGRESS:
++ send_blocking_asts(r, lkb);
++ break;
++ }
++}
++
+ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ {
+ int error = 0;
+@@ -2304,7 +2314,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ if (can_be_granted(r, lkb, 1, &deadlk)) {
+ grant_lock(r, lkb);
+ queue_cast(r, lkb, 0);
+- grant_pending_locks(r);
+ goto out;
+ }
+
+@@ -2334,7 +2343,6 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ if (_can_be_granted(r, lkb, 1)) {
+ grant_lock(r, lkb);
+ queue_cast(r, lkb, 0);
+- grant_pending_locks(r);
+ goto out;
+ }
+ /* else fall through and move to convert queue */
+@@ -2344,28 +2352,47 @@ static int do_convert(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ error = -EINPROGRESS;
+ del_lkb(r, lkb);
+ add_lkb(r, lkb, DLM_LKSTS_CONVERT);
+- send_blocking_asts(r, lkb);
+ add_timeout(lkb);
+ goto out;
+ }
+
+ error = -EAGAIN;
+- if (force_blocking_asts(lkb))
+- send_blocking_asts_all(r, lkb);
+ queue_cast(r, lkb, -EAGAIN);
+-
+ out:
+ return error;
+ }
+
++static void do_convert_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
++ int error)
++{
++ switch (error) {
++ case 0:
++ grant_pending_locks(r);
++ /* grant_pending_locks also sends basts */
++ break;
++ case -EAGAIN:
++ if (force_blocking_asts(lkb))
++ send_blocking_asts_all(r, lkb);
++ break;
++ case -EINPROGRESS:
++ send_blocking_asts(r, lkb);
++ break;
++ }
++}
++
+ static int do_unlock(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ {
+ remove_lock(r, lkb);
+ queue_cast(r, lkb, -DLM_EUNLOCK);
+- grant_pending_locks(r);
+ return -DLM_EUNLOCK;
+ }
+
++static void do_unlock_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
++ int error)
++{
++ grant_pending_locks(r);
++}
++
+ /* returns: 0 did nothing, -DLM_ECANCEL canceled lock */
+
+ static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
+@@ -2375,12 +2402,18 @@ static int do_cancel(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ error = revert_lock(r, lkb);
+ if (error) {
+ queue_cast(r, lkb, -DLM_ECANCEL);
+- grant_pending_locks(r);
+ return -DLM_ECANCEL;
+ }
+ return 0;
+ }
+
++static void do_cancel_effects(struct dlm_rsb *r, struct dlm_lkb *lkb,
++ int error)
++{
++ if (error)
++ grant_pending_locks(r);
++}
++
+ /*
+ * Four stage 3 varieties:
+ * _request_lock(), _convert_lock(), _unlock_lock(), _cancel_lock()
+@@ -2402,11 +2435,15 @@ static int _request_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ goto out;
+ }
+
+- if (is_remote(r))
++ if (is_remote(r)) {
+ /* receive_request() calls do_request() on remote node */
+ error = send_request(r, lkb);
+- else
++ } else {
+ error = do_request(r, lkb);
++ /* for remote locks the request_reply is sent
++ between do_request and do_request_effects */
++ do_request_effects(r, lkb, error);
++ }
+ out:
+ return error;
+ }
+@@ -2417,11 +2454,15 @@ static int _convert_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ {
+ int error;
+
+- if (is_remote(r))
++ if (is_remote(r)) {
+ /* receive_convert() calls do_convert() on remote node */
+ error = send_convert(r, lkb);
+- else
++ } else {
+ error = do_convert(r, lkb);
++ /* for remote locks the convert_reply is sent
++ between do_convert and do_convert_effects */
++ do_convert_effects(r, lkb, error);
++ }
+
+ return error;
+ }
+@@ -2432,11 +2473,15 @@ static int _unlock_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ {
+ int error;
+
+- if (is_remote(r))
++ if (is_remote(r)) {
+ /* receive_unlock() calls do_unlock() on remote node */
+ error = send_unlock(r, lkb);
+- else
++ } else {
+ error = do_unlock(r, lkb);
++ /* for remote locks the unlock_reply is sent
++ between do_unlock and do_unlock_effects */
++ do_unlock_effects(r, lkb, error);
++ }
+
+ return error;
+ }
+@@ -2447,11 +2492,15 @@ static int _cancel_lock(struct dlm_rsb *r, struct dlm_lkb *lkb)
+ {
+ int error;
+
+- if (is_remote(r))
++ if (is_remote(r)) {
+ /* receive_cancel() calls do_cancel() on remote node */
+ error = send_cancel(r, lkb);
+- else
++ } else {
+ error = do_cancel(r, lkb);
++ /* for remote locks the cancel_reply is sent
++ between do_cancel and do_cancel_effects */
++ do_cancel_effects(r, lkb, error);
++ }
+
+ return error;
+ }
+@@ -2689,7 +2738,7 @@ static int _create_message(struct dlm_ls *ls, int mb_len,
+ pass into lowcomms_commit and a message buffer (mb) that we
+ write our data into */
+
+- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
++ mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+ if (!mh)
+ return -ENOBUFS;
+
+@@ -3191,6 +3240,7 @@ static void receive_request(struct dlm_ls *ls, struct dlm_message *ms)
+ attach_lkb(r, lkb);
+ error = do_request(r, lkb);
+ send_request_reply(r, lkb, error);
++ do_request_effects(r, lkb, error);
+
+ unlock_rsb(r);
+ put_rsb(r);
+@@ -3226,15 +3276,19 @@ static void receive_convert(struct dlm_ls *ls, struct dlm_message *ms)
+ goto out;
+
+ receive_flags(lkb, ms);
++
+ error = receive_convert_args(ls, lkb, ms);
+- if (error)
+- goto out_reply;
++ if (error) {
++ send_convert_reply(r, lkb, error);
++ goto out;
++ }
++
+ reply = !down_conversion(lkb);
+
+ error = do_convert(r, lkb);
+- out_reply:
+ if (reply)
+ send_convert_reply(r, lkb, error);
++ do_convert_effects(r, lkb, error);
+ out:
+ unlock_rsb(r);
+ put_rsb(r);
+@@ -3266,13 +3320,16 @@ static void receive_unlock(struct dlm_ls *ls, struct dlm_message *ms)
+ goto out;
+
+ receive_flags(lkb, ms);
++
+ error = receive_unlock_args(ls, lkb, ms);
+- if (error)
+- goto out_reply;
++ if (error) {
++ send_unlock_reply(r, lkb, error);
++ goto out;
++ }
+
+ error = do_unlock(r, lkb);
+- out_reply:
+ send_unlock_reply(r, lkb, error);
++ do_unlock_effects(r, lkb, error);
+ out:
+ unlock_rsb(r);
+ put_rsb(r);
+@@ -3307,6 +3364,7 @@ static void receive_cancel(struct dlm_ls *ls, struct dlm_message *ms)
+
+ error = do_cancel(r, lkb);
+ send_cancel_reply(r, lkb, error);
++ do_cancel_effects(r, lkb, error);
+ out:
+ unlock_rsb(r);
+ put_rsb(r);
+@@ -4512,7 +4570,7 @@ int dlm_user_request(struct dlm_ls *ls, struct dlm_user_args *ua,
+ }
+
+ if (flags & DLM_LKF_VALBLK) {
+- ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
++ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
+ if (!ua->lksb.sb_lvbptr) {
+ kfree(ua);
+ __put_lkb(ls, lkb);
+@@ -4582,7 +4640,7 @@ int dlm_user_convert(struct dlm_ls *ls, struct dlm_user_args *ua_tmp,
+ ua = lkb->lkb_ua;
+
+ if (flags & DLM_LKF_VALBLK && !ua->lksb.sb_lvbptr) {
+- ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_KERNEL);
++ ua->lksb.sb_lvbptr = kzalloc(DLM_USER_LVB_LEN, GFP_NOFS);
+ if (!ua->lksb.sb_lvbptr) {
+ error = -ENOMEM;
+ goto out_put;
+diff --git a/fs/dlm/lockspace.c b/fs/dlm/lockspace.c
+index d489fcc..c010ecf 100644
+--- a/fs/dlm/lockspace.c
++++ b/fs/dlm/lockspace.c
+@@ -430,7 +430,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
+
+ error = -ENOMEM;
+
+- ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_KERNEL);
++ ls = kzalloc(sizeof(struct dlm_ls) + namelen, GFP_NOFS);
+ if (!ls)
+ goto out;
+ memcpy(ls->ls_name, name, namelen);
+@@ -443,11 +443,6 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
+ if (flags & DLM_LSFL_TIMEWARN)
+ set_bit(LSFL_TIMEWARN, &ls->ls_flags);
+
+- if (flags & DLM_LSFL_FS)
+- ls->ls_allocation = GFP_NOFS;
+- else
+- ls->ls_allocation = GFP_KERNEL;
+-
+ /* ls_exflags are forced to match among nodes, and we don't
+ need to require all nodes to have some flags set */
+ ls->ls_exflags = (flags & ~(DLM_LSFL_TIMEWARN | DLM_LSFL_FS |
+@@ -456,7 +451,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
+ size = dlm_config.ci_rsbtbl_size;
+ ls->ls_rsbtbl_size = size;
+
+- ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_KERNEL);
++ ls->ls_rsbtbl = kmalloc(sizeof(struct dlm_rsbtable) * size, GFP_NOFS);
+ if (!ls->ls_rsbtbl)
+ goto out_lsfree;
+ for (i = 0; i < size; i++) {
+@@ -468,7 +463,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
+ size = dlm_config.ci_lkbtbl_size;
+ ls->ls_lkbtbl_size = size;
+
+- ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_KERNEL);
++ ls->ls_lkbtbl = kmalloc(sizeof(struct dlm_lkbtable) * size, GFP_NOFS);
+ if (!ls->ls_lkbtbl)
+ goto out_rsbfree;
+ for (i = 0; i < size; i++) {
+@@ -480,7 +475,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
+ size = dlm_config.ci_dirtbl_size;
+ ls->ls_dirtbl_size = size;
+
+- ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_KERNEL);
++ ls->ls_dirtbl = kmalloc(sizeof(struct dlm_dirtable) * size, GFP_NOFS);
+ if (!ls->ls_dirtbl)
+ goto out_lkbfree;
+ for (i = 0; i < size; i++) {
+@@ -527,7 +522,7 @@ static int new_lockspace(const char *name, int namelen, void **lockspace,
+ mutex_init(&ls->ls_requestqueue_mutex);
+ mutex_init(&ls->ls_clear_proc_locks);
+
+- ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_KERNEL);
++ ls->ls_recover_buf = kmalloc(dlm_config.ci_buffer_size, GFP_NOFS);
+ if (!ls->ls_recover_buf)
+ goto out_dirfree;
+
+diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
+index 70736eb..52cab16 100644
+--- a/fs/dlm/lowcomms.c
++++ b/fs/dlm/lowcomms.c
+@@ -1060,7 +1060,7 @@ static void init_local(void)
+ if (dlm_our_addr(&sas, i))
+ break;
+
+- addr = kmalloc(sizeof(*addr), GFP_KERNEL);
++ addr = kmalloc(sizeof(*addr), GFP_NOFS);
+ if (!addr)
+ break;
+ memcpy(addr, &sas, sizeof(*addr));
+@@ -1099,7 +1099,7 @@ static int sctp_listen_for_all(void)
+ struct sockaddr_storage localaddr;
+ struct sctp_event_subscribe subscribe;
+ int result = -EINVAL, num = 1, i, addr_len;
+- struct connection *con = nodeid2con(0, GFP_KERNEL);
++ struct connection *con = nodeid2con(0, GFP_NOFS);
+ int bufsize = NEEDED_RMEM;
+
+ if (!con)
+@@ -1171,7 +1171,7 @@ out:
+ static int tcp_listen_for_all(void)
+ {
+ struct socket *sock = NULL;
+- struct connection *con = nodeid2con(0, GFP_KERNEL);
++ struct connection *con = nodeid2con(0, GFP_NOFS);
+ int result = -EINVAL;
+
+ if (!con)
+diff --git a/fs/dlm/member.c b/fs/dlm/member.c
+index b128775..84f70bf 100644
+--- a/fs/dlm/member.c
++++ b/fs/dlm/member.c
+@@ -48,7 +48,7 @@ static int dlm_add_member(struct dlm_ls *ls, int nodeid)
+ struct dlm_member *memb;
+ int w, error;
+
+- memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
++ memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
+ if (!memb)
+ return -ENOMEM;
+
+@@ -143,7 +143,7 @@ static void make_member_array(struct dlm_ls *ls)
+
+ ls->ls_total_weight = total;
+
+- array = kmalloc(sizeof(int) * total, ls->ls_allocation);
++ array = kmalloc(sizeof(int) * total, GFP_NOFS);
+ if (!array)
+ return;
+
+@@ -226,7 +226,7 @@ int dlm_recover_members(struct dlm_ls *ls, struct dlm_recover *rv, int *neg_out)
+ continue;
+ log_debug(ls, "new nodeid %d is a re-added member", rv->new[i]);
+
+- memb = kzalloc(sizeof(struct dlm_member), ls->ls_allocation);
++ memb = kzalloc(sizeof(struct dlm_member), GFP_NOFS);
+ if (!memb)
+ return -ENOMEM;
+ memb->nodeid = rv->new[i];
+@@ -341,7 +341,7 @@ int dlm_ls_start(struct dlm_ls *ls)
+ int *ids = NULL, *new = NULL;
+ int error, ids_count = 0, new_count = 0;
+
+- rv = kzalloc(sizeof(struct dlm_recover), ls->ls_allocation);
++ rv = kzalloc(sizeof(struct dlm_recover), GFP_NOFS);
+ if (!rv)
+ return -ENOMEM;
+
+diff --git a/fs/dlm/memory.c b/fs/dlm/memory.c
+index c1775b8..8e0d00d 100644
+--- a/fs/dlm/memory.c
++++ b/fs/dlm/memory.c
+@@ -39,7 +39,7 @@ char *dlm_allocate_lvb(struct dlm_ls *ls)
+ {
+ char *p;
+
+- p = kzalloc(ls->ls_lvblen, ls->ls_allocation);
++ p = kzalloc(ls->ls_lvblen, GFP_NOFS);
+ return p;
+ }
+
+@@ -57,7 +57,7 @@ struct dlm_rsb *dlm_allocate_rsb(struct dlm_ls *ls, int namelen)
+
+ DLM_ASSERT(namelen <= DLM_RESNAME_MAXLEN,);
+
+- r = kzalloc(sizeof(*r) + namelen, ls->ls_allocation);
++ r = kzalloc(sizeof(*r) + namelen, GFP_NOFS);
+ return r;
+ }
+
+@@ -72,7 +72,7 @@ struct dlm_lkb *dlm_allocate_lkb(struct dlm_ls *ls)
+ {
+ struct dlm_lkb *lkb;
+
+- lkb = kmem_cache_zalloc(lkb_cache, ls->ls_allocation);
++ lkb = kmem_cache_zalloc(lkb_cache, GFP_NOFS);
+ return lkb;
+ }
+
+diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
+index 55ea369..052095c 100644
+--- a/fs/dlm/netlink.c
++++ b/fs/dlm/netlink.c
+@@ -26,7 +26,7 @@ static int prepare_data(u8 cmd, struct sk_buff **skbp, size_t size)
+ struct sk_buff *skb;
+ void *data;
+
+- skb = genlmsg_new(size, GFP_KERNEL);
++ skb = genlmsg_new(size, GFP_NOFS);
+ if (!skb)
+ return -ENOMEM;
+
+diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
+index 16f682e..2863deb 100644
+--- a/fs/dlm/plock.c
++++ b/fs/dlm/plock.c
+@@ -82,7 +82,7 @@ int dlm_posix_lock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ if (!ls)
+ return -EINVAL;
+
+- xop = kzalloc(sizeof(*xop), GFP_KERNEL);
++ xop = kzalloc(sizeof(*xop), GFP_NOFS);
+ if (!xop) {
+ rv = -ENOMEM;
+ goto out;
+@@ -211,7 +211,7 @@ int dlm_posix_unlock(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ if (!ls)
+ return -EINVAL;
+
+- op = kzalloc(sizeof(*op), GFP_KERNEL);
++ op = kzalloc(sizeof(*op), GFP_NOFS);
+ if (!op) {
+ rv = -ENOMEM;
+ goto out;
+@@ -266,7 +266,7 @@ int dlm_posix_get(dlm_lockspace_t *lockspace, u64 number, struct file *file,
+ if (!ls)
+ return -EINVAL;
+
+- op = kzalloc(sizeof(*op), GFP_KERNEL);
++ op = kzalloc(sizeof(*op), GFP_NOFS);
+ if (!op) {
+ rv = -ENOMEM;
+ goto out;
+diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c
+index 67522c2..3c83a49 100644
+--- a/fs/dlm/rcom.c
++++ b/fs/dlm/rcom.c
+@@ -38,7 +38,7 @@ static int create_rcom(struct dlm_ls *ls, int to_nodeid, int type, int len,
+ char *mb;
+ int mb_len = sizeof(struct dlm_rcom) + len;
+
+- mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, ls->ls_allocation, &mb);
++ mh = dlm_lowcomms_get_buffer(to_nodeid, mb_len, GFP_NOFS, &mb);
+ if (!mh) {
+ log_print("create_rcom to %d type %d len %d ENOBUFS",
+ to_nodeid, type, len);
+diff --git a/fs/dlm/requestqueue.c b/fs/dlm/requestqueue.c
+index 7a2307c..a44fa22 100644
+--- a/fs/dlm/requestqueue.c
++++ b/fs/dlm/requestqueue.c
+@@ -35,7 +35,7 @@ void dlm_add_requestqueue(struct dlm_ls *ls, int nodeid, struct dlm_message *ms)
+ struct rq_entry *e;
+ int length = ms->m_header.h_length - sizeof(struct dlm_message);
+
+- e = kmalloc(sizeof(struct rq_entry) + length, ls->ls_allocation);
++ e = kmalloc(sizeof(struct rq_entry) + length, GFP_NOFS);
+ if (!e) {
+ log_print("dlm_add_requestqueue: out of memory len %d", length);
+ return;
+diff --git a/fs/dlm/user.c b/fs/dlm/user.c
+index ebce994..a4bfd31 100644
+--- a/fs/dlm/user.c
++++ b/fs/dlm/user.c
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2006-2009 Red Hat, Inc. All rights reserved.
++ * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+@@ -173,7 +173,7 @@ static int lkb_is_endoflife(struct dlm_lkb *lkb, int sb_status, int type)
+ /* we could possibly check if the cancel of an orphan has resulted in the lkb
+ being removed and then remove that lkb from the orphans list and free it */
+
+-void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
++void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode)
+ {
+ struct dlm_ls *ls;
+ struct dlm_user_args *ua;
+@@ -206,8 +206,10 @@ void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode)
+
+ ast_type = lkb->lkb_ast_type;
+ lkb->lkb_ast_type |= type;
+- if (bastmode)
+- lkb->lkb_bastmode = bastmode;
++ if (type == AST_BAST)
++ lkb->lkb_bastmode = mode;
++ else
++ lkb->lkb_castmode = mode;
+
+ if (!ast_type) {
+ kref_get(&lkb->lkb_ref);
+@@ -267,7 +269,7 @@ static int device_user_lock(struct dlm_user_proc *proc,
+ goto out;
+ }
+
+- ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
++ ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
+ if (!ua)
+ goto out;
+ ua->proc = proc;
+@@ -307,7 +309,7 @@ static int device_user_unlock(struct dlm_user_proc *proc,
+ if (!ls)
+ return -ENOENT;
+
+- ua = kzalloc(sizeof(struct dlm_user_args), GFP_KERNEL);
++ ua = kzalloc(sizeof(struct dlm_user_args), GFP_NOFS);
+ if (!ua)
+ goto out;
+ ua->proc = proc;
+@@ -352,7 +354,7 @@ static int dlm_device_register(struct dlm_ls *ls, char *name)
+
+ error = -ENOMEM;
+ len = strlen(name) + strlen(name_prefix) + 2;
+- ls->ls_device.name = kzalloc(len, GFP_KERNEL);
++ ls->ls_device.name = kzalloc(len, GFP_NOFS);
+ if (!ls->ls_device.name)
+ goto fail;
+
+@@ -520,7 +522,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
+ #endif
+ return -EINVAL;
+
+- kbuf = kzalloc(count + 1, GFP_KERNEL);
++ kbuf = kzalloc(count + 1, GFP_NOFS);
+ if (!kbuf)
+ return -ENOMEM;
+
+@@ -546,7 +548,7 @@ static ssize_t device_write(struct file *file, const char __user *buf,
+
+ /* add 1 after namelen so that the name string is terminated */
+ kbuf = kzalloc(sizeof(struct dlm_write_request) + namelen + 1,
+- GFP_KERNEL);
++ GFP_NOFS);
+ if (!kbuf) {
+ kfree(k32buf);
+ return -ENOMEM;
+@@ -648,7 +650,7 @@ static int device_open(struct inode *inode, struct file *file)
+ if (!ls)
+ return -ENOENT;
+
+- proc = kzalloc(sizeof(struct dlm_user_proc), GFP_KERNEL);
++ proc = kzalloc(sizeof(struct dlm_user_proc), GFP_NOFS);
+ if (!proc) {
+ dlm_put_lockspace(ls);
+ return -ENOMEM;
+diff --git a/fs/dlm/user.h b/fs/dlm/user.h
+index 1c96864..f196091 100644
+--- a/fs/dlm/user.h
++++ b/fs/dlm/user.h
+@@ -1,5 +1,5 @@
+ /*
+- * Copyright (C) 2006-2008 Red Hat, Inc. All rights reserved.
++ * Copyright (C) 2006-2010 Red Hat, Inc. All rights reserved.
+ *
+ * This copyrighted material is made available to anyone wishing to use,
+ * modify, copy, or redistribute it subject to the terms and conditions
+@@ -9,7 +9,7 @@
+ #ifndef __USER_DOT_H__
+ #define __USER_DOT_H__
+
+-void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int bastmode);
++void dlm_user_add_ast(struct dlm_lkb *lkb, int type, int mode);
+ int dlm_user_init(void);
+ void dlm_user_exit(void);
+ int dlm_device_deregister(struct dlm_ls *ls);
+diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
+index 1744f17..4e25328 100644
+--- a/fs/ecryptfs/file.c
++++ b/fs/ecryptfs/file.c
+@@ -198,7 +198,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
+ "the persistent file for the dentry with name "
+ "[%s]; rc = [%d]\n", __func__,
+ ecryptfs_dentry->d_name.name, rc);
+- goto out;
++ goto out_free;
+ }
+ }
+ if ((ecryptfs_inode_to_private(inode)->lower_file->f_flags & O_RDONLY)
+@@ -206,7 +206,7 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
+ rc = -EPERM;
+ printk(KERN_WARNING "%s: Lower persistent file is RO; eCryptfs "
+ "file must hence be opened RO\n", __func__);
+- goto out;
++ goto out_free;
+ }
+ ecryptfs_set_file_lower(
+ file, ecryptfs_inode_to_private(inode)->lower_file);
+@@ -293,12 +293,40 @@ static int ecryptfs_fasync(int fd, struct file *file, int flag)
+ return rc;
+ }
+
+-static int ecryptfs_ioctl(struct inode *inode, struct file *file,
+- unsigned int cmd, unsigned long arg);
++static long
++ecryptfs_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ struct file *lower_file = NULL;
++ long rc = -ENOTTY;
++
++ if (ecryptfs_file_to_private(file))
++ lower_file = ecryptfs_file_to_lower(file);
++ if (lower_file && lower_file->f_op && lower_file->f_op->unlocked_ioctl)
++ rc = lower_file->f_op->unlocked_ioctl(lower_file, cmd, arg);
++ return rc;
++}
++
++#ifdef CONFIG_COMPAT
++static long
++ecryptfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
++{
++ struct file *lower_file = NULL;
++ long rc = -ENOIOCTLCMD;
++
++ if (ecryptfs_file_to_private(file))
++ lower_file = ecryptfs_file_to_lower(file);
++ if (lower_file && lower_file->f_op && lower_file->f_op->compat_ioctl)
++ rc = lower_file->f_op->compat_ioctl(lower_file, cmd, arg);
++ return rc;
++}
++#endif
+
+ const struct file_operations ecryptfs_dir_fops = {
+ .readdir = ecryptfs_readdir,
+- .ioctl = ecryptfs_ioctl,
++ .unlocked_ioctl = ecryptfs_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++ .compat_ioctl = ecryptfs_compat_ioctl,
++#endif
+ .mmap = generic_file_mmap,
+ .open = ecryptfs_open,
+ .flush = ecryptfs_flush,
+@@ -315,7 +343,10 @@ const struct file_operations ecryptfs_main_fops = {
+ .write = do_sync_write,
+ .aio_write = generic_file_aio_write,
+ .readdir = ecryptfs_readdir,
+- .ioctl = ecryptfs_ioctl,
++ .unlocked_ioctl = ecryptfs_unlocked_ioctl,
++#ifdef CONFIG_COMPAT
++ .compat_ioctl = ecryptfs_compat_ioctl,
++#endif
+ .mmap = generic_file_mmap,
+ .open = ecryptfs_open,
+ .flush = ecryptfs_flush,
+@@ -324,20 +355,3 @@ const struct file_operations ecryptfs_main_fops = {
+ .fasync = ecryptfs_fasync,
+ .splice_read = generic_file_splice_read,
+ };
+-
+-static int
+-ecryptfs_ioctl(struct inode *inode, struct file *file, unsigned int cmd,
+- unsigned long arg)
+-{
+- int rc = 0;
+- struct file *lower_file = NULL;
+-
+- if (ecryptfs_file_to_private(file))
+- lower_file = ecryptfs_file_to_lower(file);
+- if (lower_file && lower_file->f_op && lower_file->f_op->ioctl)
+- rc = lower_file->f_op->ioctl(ecryptfs_inode_to_lower(inode),
+- lower_file, cmd, arg);
+- else
+- rc = -ENOTTY;
+- return rc;
+-}
+diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
+index 268b7d1..585987c 100644
+--- a/fs/ecryptfs/inode.c
++++ b/fs/ecryptfs/inode.c
+@@ -272,7 +272,7 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
+ printk(KERN_ERR "%s: Out of memory whilst attempting "
+ "to allocate ecryptfs_dentry_info struct\n",
+ __func__);
+- goto out_dput;
++ goto out_put;
+ }
+ ecryptfs_set_dentry_lower(ecryptfs_dentry, lower_dentry);
+ ecryptfs_set_dentry_lower_mnt(ecryptfs_dentry, lower_mnt);
+@@ -345,8 +345,9 @@ int ecryptfs_lookup_and_interpose_lower(struct dentry *ecryptfs_dentry,
+ out_free_kmem:
+ kmem_cache_free(ecryptfs_header_cache_2, page_virt);
+ goto out;
+-out_dput:
++out_put:
+ dput(lower_dentry);
++ mntput(lower_mnt);
+ d_drop(ecryptfs_dentry);
+ out:
+ return rc;
+diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
+index 5b14d11..a73ed78 100644
+--- a/fs/ext4/move_extent.c
++++ b/fs/ext4/move_extent.c
+@@ -958,6 +958,9 @@ mext_check_arguments(struct inode *orig_inode,
+ return -EINVAL;
+ }
+
++ if (IS_IMMUTABLE(donor_inode) || IS_APPEND(donor_inode))
++ return -EPERM;
++
+ /* Ext4 move extent does not support swapfile */
+ if (IS_SWAPFILE(orig_inode) || IS_SWAPFILE(donor_inode)) {
+ ext4_debug("ext4 move extent: The argument files should "
+diff --git a/fs/ext4/super.c b/fs/ext4/super.c
+index 54a05cc..f27e045 100644
+--- a/fs/ext4/super.c
++++ b/fs/ext4/super.c
+@@ -227,7 +227,7 @@ handle_t *ext4_journal_start_sb(struct super_block *sb, int nblocks)
+ if (sb->s_flags & MS_RDONLY)
+ return ERR_PTR(-EROFS);
+
+- vfs_check_frozen(sb, SB_FREEZE_WRITE);
++ vfs_check_frozen(sb, SB_FREEZE_TRANS);
+ /* Special case here: if the journal has aborted behind our
+ * backs (eg. EIO in the commit thread), then we still need to
+ * take the FS itself readonly cleanly. */
+@@ -1218,6 +1218,11 @@ static int parse_options(char *options, struct super_block *sb,
+ if (!*p)
+ continue;
+
++ /*
++ * Initialize args struct so we know whether arg was
++ * found; some options take optional arguments.
++ */
++ args[0].to = args[0].from = 0;
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_bsd_df:
+@@ -1503,10 +1508,11 @@ set_qf_format:
+ clear_opt(sbi->s_mount_opt, BARRIER);
+ break;
+ case Opt_barrier:
+- if (match_int(&args[0], &option)) {
+- set_opt(sbi->s_mount_opt, BARRIER);
+- break;
+- }
++ if (args[0].from) {
++ if (match_int(&args[0], &option))
++ return 0;
++ } else
++ option = 1; /* No argument, default to 1 */
+ if (option)
+ set_opt(sbi->s_mount_opt, BARRIER);
+ else
+@@ -1579,10 +1585,11 @@ set_qf_format:
+ set_opt(sbi->s_mount_opt,NO_AUTO_DA_ALLOC);
+ break;
+ case Opt_auto_da_alloc:
+- if (match_int(&args[0], &option)) {
+- clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+- break;
+- }
++ if (args[0].from) {
++ if (match_int(&args[0], &option))
++ return 0;
++ } else
++ option = 1; /* No argument, default to 1 */
+ if (option)
+ clear_opt(sbi->s_mount_opt, NO_AUTO_DA_ALLOC);
+ else
+@@ -3390,7 +3397,7 @@ int ext4_force_commit(struct super_block *sb)
+
+ journal = EXT4_SB(sb)->s_journal;
+ if (journal) {
+- vfs_check_frozen(sb, SB_FREEZE_WRITE);
++ vfs_check_frozen(sb, SB_FREEZE_TRANS);
+ ret = ext4_journal_force_commit(journal);
+ }
+
+diff --git a/fs/jfs/xattr.c b/fs/jfs/xattr.c
+index fad3645..8b0da9b 100644
+--- a/fs/jfs/xattr.c
++++ b/fs/jfs/xattr.c
+@@ -85,46 +85,25 @@ struct ea_buffer {
+ #define EA_MALLOC 0x0008
+
+
++static int is_known_namespace(const char *name)
++{
++ if (strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) &&
++ strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
++ strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
++ strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
++ return false;
++
++ return true;
++}
++
+ /*
+ * These three routines are used to recognize on-disk extended attributes
+ * that are in a recognized namespace. If the attribute is not recognized,
+ * "os2." is prepended to the name
+ */
+-static inline int is_os2_xattr(struct jfs_ea *ea)
++static int is_os2_xattr(struct jfs_ea *ea)
+ {
+- /*
+- * Check for "system."
+- */
+- if ((ea->namelen >= XATTR_SYSTEM_PREFIX_LEN) &&
+- !strncmp(ea->name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+- return false;
+- /*
+- * Check for "user."
+- */
+- if ((ea->namelen >= XATTR_USER_PREFIX_LEN) &&
+- !strncmp(ea->name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+- return false;
+- /*
+- * Check for "security."
+- */
+- if ((ea->namelen >= XATTR_SECURITY_PREFIX_LEN) &&
+- !strncmp(ea->name, XATTR_SECURITY_PREFIX,
+- XATTR_SECURITY_PREFIX_LEN))
+- return false;
+- /*
+- * Check for "trusted."
+- */
+- if ((ea->namelen >= XATTR_TRUSTED_PREFIX_LEN) &&
+- !strncmp(ea->name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN))
+- return false;
+- /*
+- * Add any other valid namespace prefixes here
+- */
+-
+- /*
+- * We assume it's OS/2's flat namespace
+- */
+- return true;
++ return !is_known_namespace(ea->name);
+ }
+
+ static inline int name_size(struct jfs_ea *ea)
+@@ -762,13 +741,23 @@ static int can_set_xattr(struct inode *inode, const char *name,
+ if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
+ return can_set_system_xattr(inode, name, value, value_len);
+
++ if (!strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN)) {
++ /*
++ * This makes sure that we aren't trying to set an
++ * attribute in a different namespace by prefixing it
++ * with "os2."
++ */
++ if (is_known_namespace(name + XATTR_OS2_PREFIX_LEN))
++ return -EOPNOTSUPP;
++ return 0;
++ }
++
+ /*
+ * Don't allow setting an attribute in an unknown namespace.
+ */
+ if (strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) &&
+ strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN) &&
+- strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN) &&
+- strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN))
++ strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN))
+ return -EOPNOTSUPP;
+
+ return 0;
+@@ -950,19 +939,8 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
+ int xattr_size;
+ ssize_t size;
+ int namelen = strlen(name);
+- char *os2name = NULL;
+ char *value;
+
+- if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
+- os2name = kmalloc(namelen - XATTR_OS2_PREFIX_LEN + 1,
+- GFP_KERNEL);
+- if (!os2name)
+- return -ENOMEM;
+- strcpy(os2name, name + XATTR_OS2_PREFIX_LEN);
+- name = os2name;
+- namelen -= XATTR_OS2_PREFIX_LEN;
+- }
+-
+ down_read(&JFS_IP(inode)->xattr_sem);
+
+ xattr_size = ea_get(inode, &ea_buf, 0);
+@@ -1000,8 +978,6 @@ ssize_t __jfs_getxattr(struct inode *inode, const char *name, void *data,
+ out:
+ up_read(&JFS_IP(inode)->xattr_sem);
+
+- kfree(os2name);
+-
+ return size;
+ }
+
+@@ -1010,6 +986,19 @@ ssize_t jfs_getxattr(struct dentry *dentry, const char *name, void *data,
+ {
+ int err;
+
++ if (strncmp(name, XATTR_OS2_PREFIX, XATTR_OS2_PREFIX_LEN) == 0) {
++ /*
++ * skip past "os2." prefix
++ */
++ name += XATTR_OS2_PREFIX_LEN;
++ /*
++ * Don't allow retrieving properly prefixed attributes
++ * by prepending them with "os2."
++ */
++ if (is_known_namespace(name))
++ return -EOPNOTSUPP;
++ }
++
+ err = __jfs_getxattr(dentry->d_inode, name, data, buf_size);
+
+ return err;
+diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
+index 38a42f5..5661db1 100644
+--- a/fs/ocfs2/alloc.c
++++ b/fs/ocfs2/alloc.c
+@@ -1765,9 +1765,9 @@ set_and_inc:
+ *
+ * The array index of the subtree root is passed back.
+ */
+-static int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
+- struct ocfs2_path *left,
+- struct ocfs2_path *right)
++int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
++ struct ocfs2_path *left,
++ struct ocfs2_path *right)
+ {
+ int i = 0;
+
+@@ -2872,8 +2872,8 @@ out:
+ * This looks similar, but is subtly different to
+ * ocfs2_find_cpos_for_left_leaf().
+ */
+-static int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
+- struct ocfs2_path *path, u32 *cpos)
++int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
++ struct ocfs2_path *path, u32 *cpos)
+ {
+ int i, j, ret = 0;
+ u64 blkno;
+diff --git a/fs/ocfs2/alloc.h b/fs/ocfs2/alloc.h
+index 9c122d5..1db4359 100644
+--- a/fs/ocfs2/alloc.h
++++ b/fs/ocfs2/alloc.h
+@@ -317,4 +317,9 @@ int ocfs2_path_bh_journal_access(handle_t *handle,
+ int ocfs2_journal_access_path(struct ocfs2_caching_info *ci,
+ handle_t *handle,
+ struct ocfs2_path *path);
++int ocfs2_find_cpos_for_right_leaf(struct super_block *sb,
++ struct ocfs2_path *path, u32 *cpos);
++int ocfs2_find_subtree_root(struct ocfs2_extent_tree *et,
++ struct ocfs2_path *left,
++ struct ocfs2_path *right);
+ #endif /* OCFS2_ALLOC_H */
+diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c
+index 544ac62..b5cb3ed 100644
+--- a/fs/ocfs2/locks.c
++++ b/fs/ocfs2/locks.c
+@@ -133,7 +133,7 @@ int ocfs2_lock(struct file *file, int cmd, struct file_lock *fl)
+
+ if (!(fl->fl_flags & FL_POSIX))
+ return -ENOLCK;
+- if (__mandatory_lock(inode))
++ if (__mandatory_lock(inode) && fl->fl_type != F_UNLCK)
+ return -ENOLCK;
+
+ return ocfs2_plock(osb->cconn, OCFS2_I(inode)->ip_blkno, file, cmd, fl);
+diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
+index 03a1ab8..bb0465f 100644
+--- a/fs/ocfs2/refcounttree.c
++++ b/fs/ocfs2/refcounttree.c
+@@ -969,6 +969,103 @@ out:
+ }
+
+ /*
++ * Find the end range for a leaf refcount block indicated by
++ * el->l_recs[index].e_blkno.
++ */
++static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
++ struct buffer_head *ref_root_bh,
++ struct ocfs2_extent_block *eb,
++ struct ocfs2_extent_list *el,
++ int index, u32 *cpos_end)
++{
++ int ret, i, subtree_root;
++ u32 cpos;
++ u64 blkno;
++ struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
++ struct ocfs2_path *left_path = NULL, *right_path = NULL;
++ struct ocfs2_extent_tree et;
++ struct ocfs2_extent_list *tmp_el;
++
++ if (index < le16_to_cpu(el->l_next_free_rec) - 1) {
++ /*
++ * We have a extent rec after index, so just use the e_cpos
++ * of the next extent rec.
++ */
++ *cpos_end = le32_to_cpu(el->l_recs[index+1].e_cpos);
++ return 0;
++ }
++
++ if (!eb || (eb && !eb->h_next_leaf_blk)) {
++ /*
++ * We are the last extent rec, so any high cpos should
++ * be stored in this leaf refcount block.
++ */
++ *cpos_end = UINT_MAX;
++ return 0;
++ }
++
++ /*
++ * If the extent block isn't the last one, we have to find
++ * the subtree root between this extent block and the next
++ * leaf extent block and get the corresponding e_cpos from
++ * the subroot. Otherwise we may corrupt the b-tree.
++ */
++ ocfs2_init_refcount_extent_tree(&et, ci, ref_root_bh);
++
++ left_path = ocfs2_new_path_from_et(&et);
++ if (!left_path) {
++ ret = -ENOMEM;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ cpos = le32_to_cpu(eb->h_list.l_recs[index].e_cpos);
++ ret = ocfs2_find_path(ci, left_path, cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ right_path = ocfs2_new_path_from_path(left_path);
++ if (!right_path) {
++ ret = -ENOMEM;
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ret = ocfs2_find_cpos_for_right_leaf(sb, left_path, &cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ ret = ocfs2_find_path(ci, right_path, cpos);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
++
++ subtree_root = ocfs2_find_subtree_root(&et, left_path,
++ right_path);
++
++ tmp_el = left_path->p_node[subtree_root].el;
++ blkno = left_path->p_node[subtree_root+1].bh->b_blocknr;
++ for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) {
++ if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) {
++ *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos);
++ break;
++ }
++ }
++
++ BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec));
++
++out:
++ ocfs2_free_path(left_path);
++ ocfs2_free_path(right_path);
++ return ret;
++}
++
++/*
+ * Given a cpos and len, try to find the refcount record which contains cpos.
+ * 1. If cpos can be found in one refcount record, return the record.
+ * 2. If cpos can't be found, return a fake record which start from cpos
+@@ -983,10 +1080,10 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
+ struct buffer_head **ret_bh)
+ {
+ int ret = 0, i, found;
+- u32 low_cpos;
++ u32 low_cpos, uninitialized_var(cpos_end);
+ struct ocfs2_extent_list *el;
+- struct ocfs2_extent_rec *tmp, *rec = NULL;
+- struct ocfs2_extent_block *eb;
++ struct ocfs2_extent_rec *rec = NULL;
++ struct ocfs2_extent_block *eb = NULL;
+ struct buffer_head *eb_bh = NULL, *ref_leaf_bh = NULL;
+ struct super_block *sb = ocfs2_metadata_cache_get_super(ci);
+ struct ocfs2_refcount_block *rb =
+@@ -1034,12 +1131,16 @@ static int ocfs2_get_refcount_rec(struct ocfs2_caching_info *ci,
+ }
+ }
+
+- /* adjust len when we have ocfs2_extent_rec after it. */
+- if (found && i < le16_to_cpu(el->l_next_free_rec) - 1) {
+- tmp = &el->l_recs[i+1];
++ if (found) {
++ ret = ocfs2_get_refcount_cpos_end(ci, ref_root_bh,
++ eb, el, i, &cpos_end);
++ if (ret) {
++ mlog_errno(ret);
++ goto out;
++ }
+
+- if (le32_to_cpu(tmp->e_cpos) < cpos + len)
+- len = le32_to_cpu(tmp->e_cpos) - cpos;
++ if (cpos_end < low_cpos + len)
++ len = cpos_end - low_cpos;
+ }
+
+ ret = ocfs2_read_refcount_block(ci, le64_to_cpu(rec->e_blkno),
+diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
+index 14f47d2..9f55be4 100644
+--- a/fs/ocfs2/super.c
++++ b/fs/ocfs2/super.c
+@@ -701,6 +701,10 @@ unlock_osb:
+
+ if (!ocfs2_is_hard_readonly(osb))
+ ocfs2_set_journal_params(osb);
++
++ sb->s_flags = (sb->s_flags & ~MS_POSIXACL) |
++ ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) ?
++ MS_POSIXACL : 0);
+ }
+ out:
+ unlock_kernel();
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index 725a650..42fdc76 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -405,7 +405,6 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+
+ /* add up live thread stats at the group level */
+ if (whole) {
+- struct task_cputime cputime;
+ struct task_struct *t = task;
+ do {
+ min_flt += t->min_flt;
+@@ -416,9 +415,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+
+ min_flt += sig->min_flt;
+ maj_flt += sig->maj_flt;
+- thread_group_cputime(task, &cputime);
+- utime = cputime.utime;
+- stime = cputime.stime;
++ thread_group_times(task, &utime, &stime);
+ gtime = cputime_add(gtime, sig->gtime);
+ }
+
+diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
+index 9062220..b5fe0aa 100644
+--- a/fs/reiserfs/journal.c
++++ b/fs/reiserfs/journal.c
+@@ -2184,6 +2184,15 @@ static int journal_read_transaction(struct super_block *sb,
+ brelse(d_bh);
+ return 1;
+ }
++
++ if (bdev_read_only(sb->s_bdev)) {
++ reiserfs_warning(sb, "clm-2076",
++ "device is readonly, unable to replay log");
++ brelse(c_bh);
++ brelse(d_bh);
++ return -EROFS;
++ }
++
+ trans_id = get_desc_trans_id(desc);
+ /* now we know we've got a good transaction, and it was inside the valid time ranges */
+ log_blocks = kmalloc(get_desc_trans_len(desc) *
+@@ -2422,12 +2431,6 @@ static int journal_read(struct super_block *sb)
+ goto start_log_replay;
+ }
+
+- if (continue_replay && bdev_read_only(sb->s_bdev)) {
+- reiserfs_warning(sb, "clm-2076",
+- "device is readonly, unable to replay log");
+- return -1;
+- }
+-
+ /* ok, there are transactions that need to be replayed. start with the first log block, find
+ ** all the valid transactions, and pick out the oldest.
+ */
+diff --git a/fs/reiserfs/xattr_security.c b/fs/reiserfs/xattr_security.c
+index a92c879..b37b13b 100644
+--- a/fs/reiserfs/xattr_security.c
++++ b/fs/reiserfs/xattr_security.c
+@@ -75,7 +75,7 @@ int reiserfs_security_init(struct inode *dir, struct inode *inode,
+ return error;
+ }
+
+- if (sec->length) {
++ if (sec->length && reiserfs_xattrs_initialized(inode->i_sb)) {
+ blocks = reiserfs_xattr_jcreate_nblocks(inode) +
+ reiserfs_xattr_nblocks(inode, sec->length);
+ /* We don't want to count the directories twice if we have
+diff --git a/fs/signalfd.c b/fs/signalfd.c
+index b07565c..d98bea8 100644
+--- a/fs/signalfd.c
++++ b/fs/signalfd.c
+@@ -87,6 +87,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
+ err |= __put_user(kinfo->si_tid, &uinfo->ssi_tid);
+ err |= __put_user(kinfo->si_overrun, &uinfo->ssi_overrun);
+ err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
++ err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
+ break;
+ case __SI_POLL:
+ err |= __put_user(kinfo->si_band, &uinfo->ssi_band);
+@@ -110,6 +111,7 @@ static int signalfd_copyinfo(struct signalfd_siginfo __user *uinfo,
+ err |= __put_user(kinfo->si_pid, &uinfo->ssi_pid);
+ err |= __put_user(kinfo->si_uid, &uinfo->ssi_uid);
+ err |= __put_user((long) kinfo->si_ptr, &uinfo->ssi_ptr);
++ err |= __put_user(kinfo->si_int, &uinfo->ssi_int);
+ break;
+ default:
+ /*
+diff --git a/fs/splice.c b/fs/splice.c
+index 7394e9e..e5efbb9 100644
+--- a/fs/splice.c
++++ b/fs/splice.c
+@@ -365,17 +365,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
+ * If the page isn't uptodate, we may need to start io on it
+ */
+ if (!PageUptodate(page)) {
+- /*
+- * If in nonblock mode then dont block on waiting
+- * for an in-flight io page
+- */
+- if (flags & SPLICE_F_NONBLOCK) {
+- if (!trylock_page(page)) {
+- error = -EAGAIN;
+- break;
+- }
+- } else
+- lock_page(page);
++ lock_page(page);
+
+ /*
+ * Page was truncated, or invalidated by the
+diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
+index 64b1a4c..f73bc1b 100644
+--- a/include/linux/clocksource.h
++++ b/include/linux/clocksource.h
+@@ -282,10 +282,12 @@ extern struct clocksource * __init __weak clocksource_default_clock(void);
+ extern void clocksource_mark_unstable(struct clocksource *cs);
+
+ #ifdef CONFIG_GENERIC_TIME_VSYSCALL
+-extern void update_vsyscall(struct timespec *ts, struct clocksource *c);
++extern void
++update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult);
+ extern void update_vsyscall_tz(void);
+ #else
+-static inline void update_vsyscall(struct timespec *ts, struct clocksource *c)
++static inline void
++update_vsyscall(struct timespec *ts, struct clocksource *c, u32 mult)
+ {
+ }
+
+diff --git a/include/linux/fs.h b/include/linux/fs.h
+index b6a5622..1ff0962 100644
+--- a/include/linux/fs.h
++++ b/include/linux/fs.h
+@@ -145,11 +145,11 @@ struct inodes_stat_t {
+ *
+ */
+ #define RW_MASK 1
+-#define RWA_MASK 2
++#define RWA_MASK 16
+ #define READ 0
+ #define WRITE 1
+-#define READA 2 /* read-ahead - don't block if no resources */
+-#define SWRITE 3 /* for ll_rw_block() - wait for buffer lock */
++#define READA 16 /* readahead - don't block if no resources */
++#define SWRITE 17 /* for ll_rw_block(), wait for buffer lock */
+ #define READ_SYNC (READ | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_UNPLUG))
+ #define READ_META (READ | (1 << BIO_RW_META))
+ #define WRITE_SYNC_PLUG (WRITE | (1 << BIO_RW_SYNCIO) | (1 << BIO_RW_NOIDLE))
+diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
+index 7ca72b7..c49d6f5 100644
+--- a/include/linux/interrupt.h
++++ b/include/linux/interrupt.h
+@@ -52,16 +52,21 @@
+ * IRQF_ONESHOT - Interrupt is not reenabled after the hardirq handler finished.
+ * Used by threaded interrupts which need to keep the
+ * irq line disabled until the threaded handler has been run.
++ * IRQF_NO_SUSPEND - Do not disable this IRQ during suspend
++ *
+ */
+ #define IRQF_DISABLED 0x00000020
+ #define IRQF_SAMPLE_RANDOM 0x00000040
+ #define IRQF_SHARED 0x00000080
+ #define IRQF_PROBE_SHARED 0x00000100
+-#define IRQF_TIMER 0x00000200
++#define __IRQF_TIMER 0x00000200
+ #define IRQF_PERCPU 0x00000400
+ #define IRQF_NOBALANCING 0x00000800
+ #define IRQF_IRQPOLL 0x00001000
+ #define IRQF_ONESHOT 0x00002000
++#define IRQF_NO_SUSPEND 0x00004000
++
++#define IRQF_TIMER (__IRQF_TIMER | IRQF_NO_SUSPEND)
+
+ /*
+ * Bits used by threaded handlers:
+diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
+index 812a5f3..ec12f8c 100644
+--- a/include/linux/netdevice.h
++++ b/include/linux/netdevice.h
+@@ -1560,6 +1560,8 @@ extern void netif_carrier_on(struct net_device *dev);
+
+ extern void netif_carrier_off(struct net_device *dev);
+
++extern void netif_notify_peers(struct net_device *dev);
++
+ /**
+ * netif_dormant_on - mark device as dormant.
+ * @dev: network device
+diff --git a/include/linux/notifier.h b/include/linux/notifier.h
+index 44428d2..5ecdb50 100644
+--- a/include/linux/notifier.h
++++ b/include/linux/notifier.h
+@@ -201,6 +201,7 @@ static inline int notifier_to_errno(int ret)
+ #define NETDEV_PRE_UP 0x000D
+ #define NETDEV_BONDING_OLDTYPE 0x000E
+ #define NETDEV_BONDING_NEWTYPE 0x000F
++#define NETDEV_NOTIFY_PEERS 0x0013
+
+ #define SYS_DOWN 0x0001 /* Notify of system down */
+ #define SYS_RESTART SYS_DOWN
+diff --git a/include/linux/reiserfs_xattr.h b/include/linux/reiserfs_xattr.h
+index 99928dc..7fa02b4 100644
+--- a/include/linux/reiserfs_xattr.h
++++ b/include/linux/reiserfs_xattr.h
+@@ -70,6 +70,11 @@ int reiserfs_security_write(struct reiserfs_transaction_handle *th,
+ void reiserfs_security_free(struct reiserfs_security_handle *sec);
+ #endif
+
++static inline int reiserfs_xattrs_initialized(struct super_block *sb)
++{
++ return REISERFS_SB(sb)->priv_root != NULL;
++}
++
+ #define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
+ static inline loff_t reiserfs_xattr_nblocks(struct inode *inode, loff_t size)
+ {
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index b253434..cc24beb 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -628,6 +628,9 @@ struct signal_struct {
+ cputime_t utime, stime, cutime, cstime;
+ cputime_t gtime;
+ cputime_t cgtime;
++#ifndef CONFIG_VIRT_CPU_ACCOUNTING
++ cputime_t prev_utime, prev_stime;
++#endif
+ unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
+ unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+ unsigned long inblock, oublock, cinblock, coublock;
+@@ -1725,6 +1728,7 @@ static inline void put_task_struct(struct task_struct *t)
+ extern cputime_t task_utime(struct task_struct *p);
+ extern cputime_t task_stime(struct task_struct *p);
+ extern cputime_t task_gtime(struct task_struct *p);
++extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
+
+ /*
+ * Per process flags
+diff --git a/include/linux/tick.h b/include/linux/tick.h
+index 0482229..8dc0821 100644
+--- a/include/linux/tick.h
++++ b/include/linux/tick.h
+@@ -98,6 +98,9 @@ extern int tick_check_oneshot_change(int allow_nohz);
+ extern struct tick_sched *tick_get_tick_sched(int cpu);
+ extern void tick_check_idle(int cpu);
+ extern int tick_oneshot_mode_active(void);
++# ifndef arch_needs_cpu
++# define arch_needs_cpu(cpu) (0)
++# endif
+ # else
+ static inline void tick_clock_notify(void) { }
+ static inline int tick_check_oneshot_change(int allow_nohz) { return 0; }
+diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
+index 2526f3b..dd0bde1 100644
+--- a/include/linux/usb/quirks.h
++++ b/include/linux/usb/quirks.h
+@@ -19,4 +19,8 @@
+ /* device can't handle its Configuration or Interface strings */
+ #define USB_QUIRK_CONFIG_INTF_STRINGS 0x00000008
+
++/* device needs a pause during initialization, after we read the device
++ descriptor */
++#define USB_QUIRK_DELAY_INIT 0x00000040
++
+ #endif /* __LINUX_USB_QUIRKS_H */
+diff --git a/kernel/exit.c b/kernel/exit.c
+index f7864ac..4a0e062 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -110,8 +110,8 @@ static void __exit_signal(struct task_struct *tsk)
+ * We won't ever get here for the group leader, since it
+ * will have been the last reference on the signal_struct.
+ */
+- sig->utime = cputime_add(sig->utime, task_utime(tsk));
+- sig->stime = cputime_add(sig->stime, task_stime(tsk));
++ sig->utime = cputime_add(sig->utime, tsk->utime);
++ sig->stime = cputime_add(sig->stime, tsk->stime);
+ sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
+ sig->min_flt += tsk->min_flt;
+ sig->maj_flt += tsk->maj_flt;
+@@ -1205,6 +1205,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ struct signal_struct *psig;
+ struct signal_struct *sig;
+ unsigned long maxrss;
++ cputime_t tgutime, tgstime;
+
+ /*
+ * The resource counters for the group leader are in its
+@@ -1220,20 +1221,23 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ * need to protect the access to parent->signal fields,
+ * as other threads in the parent group can be right
+ * here reaping other children at the same time.
++ *
++ * We use thread_group_times() to get times for the thread
++ * group, which consolidates times for all threads in the
++ * group including the group leader.
+ */
++ thread_group_times(p, &tgutime, &tgstime);
+ spin_lock_irq(&p->real_parent->sighand->siglock);
+ psig = p->real_parent->signal;
+ sig = p->signal;
+ psig->cutime =
+ cputime_add(psig->cutime,
+- cputime_add(p->utime,
+- cputime_add(sig->utime,
+- sig->cutime)));
++ cputime_add(tgutime,
++ sig->cutime));
+ psig->cstime =
+ cputime_add(psig->cstime,
+- cputime_add(p->stime,
+- cputime_add(sig->stime,
+- sig->cstime)));
++ cputime_add(tgstime,
++ sig->cstime));
+ psig->cgtime =
+ cputime_add(psig->cgtime,
+ cputime_add(p->gtime,
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 28b4874..ce2f585 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -884,6 +884,9 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+ sig->gtime = cputime_zero;
+ sig->cgtime = cputime_zero;
++#ifndef CONFIG_VIRT_CPU_ACCOUNTING
++ sig->prev_utime = sig->prev_stime = cputime_zero;
++#endif
+ sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+ sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+ sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
+index fa4bdd4..f34e231 100644
+--- a/kernel/irq/manage.c
++++ b/kernel/irq/manage.c
+@@ -200,7 +200,7 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
+ void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
+ {
+ if (suspend) {
+- if (!desc->action || (desc->action->flags & IRQF_TIMER))
++ if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
+ return;
+ desc->status |= IRQ_SUSPENDED;
+ }
+diff --git a/kernel/sched.c b/kernel/sched.c
+index d0958da..9990074 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -5215,45 +5215,90 @@ cputime_t task_stime(struct task_struct *p)
+ {
+ return p->stime;
+ }
++
++void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++ struct task_cputime cputime;
++
++ thread_group_cputime(p, &cputime);
++
++ *ut = cputime.utime;
++ *st = cputime.stime;
++}
+ #else
++
++#ifndef nsecs_to_cputime
++# define nsecs_to_cputime(__nsecs) \
++ msecs_to_cputime(div_u64((__nsecs), NSEC_PER_MSEC))
++#endif
++
+ cputime_t task_utime(struct task_struct *p)
+ {
+- clock_t utime = cputime_to_clock_t(p->utime),
+- total = utime + cputime_to_clock_t(p->stime);
++ cputime_t utime = p->utime, total = utime + p->stime;
+ u64 temp;
+
+ /*
+ * Use CFS's precise accounting:
+ */
+- temp = (u64)nsec_to_clock_t(p->se.sum_exec_runtime);
++ temp = (u64)nsecs_to_cputime(p->se.sum_exec_runtime);
+
+ if (total) {
+ temp *= utime;
+ do_div(temp, total);
+ }
+- utime = (clock_t)temp;
++ utime = (cputime_t)temp;
+
+- p->prev_utime = max(p->prev_utime, clock_t_to_cputime(utime));
++ p->prev_utime = max(p->prev_utime, utime);
+ return p->prev_utime;
+ }
+
+ cputime_t task_stime(struct task_struct *p)
+ {
+- clock_t stime;
++ cputime_t stime;
+
+ /*
+ * Use CFS's precise accounting. (we subtract utime from
+ * the total, to make sure the total observed by userspace
+ * grows monotonically - apps rely on that):
+ */
+- stime = nsec_to_clock_t(p->se.sum_exec_runtime) -
+- cputime_to_clock_t(task_utime(p));
++ stime = nsecs_to_cputime(p->se.sum_exec_runtime) - task_utime(p);
+
+ if (stime >= 0)
+- p->prev_stime = max(p->prev_stime, clock_t_to_cputime(stime));
++ p->prev_stime = max(p->prev_stime, stime);
+
+ return p->prev_stime;
+ }
++
++/*
++ * Must be called with siglock held.
++ */
++void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
++{
++ struct signal_struct *sig = p->signal;
++ struct task_cputime cputime;
++ cputime_t rtime, utime, total;
++
++ thread_group_cputime(p, &cputime);
++
++ total = cputime_add(cputime.utime, cputime.stime);
++ rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
++
++ if (total) {
++ u64 temp;
++
++ temp = (u64)(rtime * cputime.utime);
++ do_div(temp, total);
++ utime = (cputime_t)temp;
++ } else
++ utime = rtime;
++
++ sig->prev_utime = max(sig->prev_utime, utime);
++ sig->prev_stime = max(sig->prev_stime,
++ cputime_sub(rtime, sig->prev_utime));
++
++ *ut = sig->prev_utime;
++ *st = sig->prev_stime;
++}
+ #endif
+
+ inline cputime_t task_gtime(struct task_struct *p)
+@@ -5545,7 +5590,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+ * the mutex owner just released it and exited.
+ */
+ if (probe_kernel_address(&owner->cpu, cpu))
+- goto out;
++ return 0;
+ #else
+ cpu = owner->cpu;
+ #endif
+@@ -5555,14 +5600,14 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+ * the cpu field may no longer be valid.
+ */
+ if (cpu >= nr_cpumask_bits)
+- goto out;
++ return 0;
+
+ /*
+ * We need to validate that we can do a
+ * get_cpu() and that we have the percpu area.
+ */
+ if (!cpu_online(cpu))
+- goto out;
++ return 0;
+
+ rq = cpu_rq(cpu);
+
+@@ -5581,7 +5626,7 @@ int mutex_spin_on_owner(struct mutex *lock, struct thread_info *owner)
+
+ cpu_relax();
+ }
+-out:
++
+ return 1;
+ }
+ #endif
+diff --git a/kernel/sys.c b/kernel/sys.c
+index ce17760..26e4b8a 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -911,16 +911,15 @@ change_okay:
+
+ void do_sys_times(struct tms *tms)
+ {
+- struct task_cputime cputime;
+- cputime_t cutime, cstime;
++ cputime_t tgutime, tgstime, cutime, cstime;
+
+- thread_group_cputime(current, &cputime);
+ spin_lock_irq(¤t->sighand->siglock);
++ thread_group_times(current, &tgutime, &tgstime);
+ cutime = current->signal->cutime;
+ cstime = current->signal->cstime;
+ spin_unlock_irq(¤t->sighand->siglock);
+- tms->tms_utime = cputime_to_clock_t(cputime.utime);
+- tms->tms_stime = cputime_to_clock_t(cputime.stime);
++ tms->tms_utime = cputime_to_clock_t(tgutime);
++ tms->tms_stime = cputime_to_clock_t(tgstime);
+ tms->tms_cutime = cputime_to_clock_t(cutime);
+ tms->tms_cstime = cputime_to_clock_t(cstime);
+ }
+@@ -1338,8 +1337,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
+ {
+ struct task_struct *t;
+ unsigned long flags;
+- cputime_t utime, stime;
+- struct task_cputime cputime;
++ cputime_t tgutime, tgstime, utime, stime;
+ unsigned long maxrss = 0;
+
+ memset((char *) r, 0, sizeof *r);
+@@ -1373,9 +1371,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
+ break;
+
+ case RUSAGE_SELF:
+- thread_group_cputime(p, &cputime);
+- utime = cputime_add(utime, cputime.utime);
+- stime = cputime_add(stime, cputime.stime);
++ thread_group_times(p, &tgutime, &tgstime);
++ utime = cputime_add(utime, tgutime);
++ stime = cputime_add(stime, tgstime);
+ r->ru_nvcsw += p->signal->nvcsw;
+ r->ru_nivcsw += p->signal->nivcsw;
+ r->ru_minflt += p->signal->min_flt;
+diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
+index 44320b1..b63cfeb 100644
+--- a/kernel/time/tick-sched.c
++++ b/kernel/time/tick-sched.c
+@@ -134,18 +134,13 @@ __setup("nohz=", setup_tick_nohz);
+ * value. We do this unconditionally on any cpu, as we don't know whether the
+ * cpu, which has the update task assigned is in a long sleep.
+ */
+-static void tick_nohz_update_jiffies(void)
++static void tick_nohz_update_jiffies(ktime_t now)
+ {
+ int cpu = smp_processor_id();
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+ unsigned long flags;
+- ktime_t now;
+-
+- if (!ts->tick_stopped)
+- return;
+
+ cpumask_clear_cpu(cpu, nohz_cpu_mask);
+- now = ktime_get();
+ ts->idle_waketime = now;
+
+ local_irq_save(flags);
+@@ -155,20 +150,17 @@ static void tick_nohz_update_jiffies(void)
+ touch_softlockup_watchdog();
+ }
+
+-static void tick_nohz_stop_idle(int cpu)
++static void tick_nohz_stop_idle(int cpu, ktime_t now)
+ {
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
++ ktime_t delta;
+
+- if (ts->idle_active) {
+- ktime_t now, delta;
+- now = ktime_get();
+- delta = ktime_sub(now, ts->idle_entrytime);
+- ts->idle_lastupdate = now;
+- ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
+- ts->idle_active = 0;
++ delta = ktime_sub(now, ts->idle_entrytime);
++ ts->idle_lastupdate = now;
++ ts->idle_sleeptime = ktime_add(ts->idle_sleeptime, delta);
++ ts->idle_active = 0;
+
+- sched_clock_idle_wakeup_event(0);
+- }
++ sched_clock_idle_wakeup_event(0);
+ }
+
+ static ktime_t tick_nohz_start_idle(struct tick_sched *ts)
+@@ -289,12 +281,15 @@ void tick_nohz_stop_sched_tick(int inidle)
+ time_delta = KTIME_MAX;
+ } while (read_seqretry(&xtime_lock, seq));
+
+- /* Get the next timer wheel timer */
+- next_jiffies = get_next_timer_interrupt(last_jiffies);
+- delta_jiffies = next_jiffies - last_jiffies;
+-
+- if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu))
++ if (rcu_needs_cpu(cpu) || printk_needs_cpu(cpu) ||
++ arch_needs_cpu(cpu)) {
++ next_jiffies = last_jiffies + 1;
+ delta_jiffies = 1;
++ } else {
++ /* Get the next timer wheel timer */
++ next_jiffies = get_next_timer_interrupt(last_jiffies);
++ delta_jiffies = next_jiffies - last_jiffies;
++ }
+ /*
+ * Do not stop the tick, if we are only one off
+ * or if the cpu is required for rcu
+@@ -460,7 +455,11 @@ void tick_nohz_restart_sched_tick(void)
+ ktime_t now;
+
+ local_irq_disable();
+- tick_nohz_stop_idle(cpu);
++ if (ts->idle_active || (ts->inidle && ts->tick_stopped))
++ now = ktime_get();
++
++ if (ts->idle_active)
++ tick_nohz_stop_idle(cpu, now);
+
+ if (!ts->inidle || !ts->tick_stopped) {
+ ts->inidle = 0;
+@@ -474,7 +473,6 @@ void tick_nohz_restart_sched_tick(void)
+
+ /* Update jiffies first */
+ select_nohz_load_balancer(0);
+- now = ktime_get();
+ tick_do_update_jiffies64(now);
+ cpumask_clear_cpu(cpu, nohz_cpu_mask);
+
+@@ -608,22 +606,18 @@ static void tick_nohz_switch_to_nohz(void)
+ * timer and do not touch the other magic bits which need to be done
+ * when idle is left.
+ */
+-static void tick_nohz_kick_tick(int cpu)
++static void tick_nohz_kick_tick(int cpu, ktime_t now)
+ {
+ #if 0
+ /* Switch back to 2.6.27 behaviour */
+
+ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
+- ktime_t delta, now;
+-
+- if (!ts->tick_stopped)
+- return;
++ ktime_t delta;
+
+ /*
+ * Do not touch the tick device, when the next expiry is either
+ * already reached or less/equal than the tick period.
+ */
+- now = ktime_get();
+ delta = ktime_sub(hrtimer_get_expires(&ts->sched_timer), now);
+ if (delta.tv64 <= tick_period.tv64)
+ return;
+@@ -632,9 +626,26 @@ static void tick_nohz_kick_tick(int cpu)
+ #endif
+ }
+
++static inline void tick_check_nohz(int cpu)
++{
++ struct tick_sched *ts = &per_cpu(tick_cpu_sched, cpu);
++ ktime_t now;
++
++ if (!ts->idle_active && !ts->tick_stopped)
++ return;
++ now = ktime_get();
++ if (ts->idle_active)
++ tick_nohz_stop_idle(cpu, now);
++ if (ts->tick_stopped) {
++ tick_nohz_update_jiffies(now);
++ tick_nohz_kick_tick(cpu, now);
++ }
++}
++
+ #else
+
+ static inline void tick_nohz_switch_to_nohz(void) { }
++static inline void tick_check_nohz(int cpu) { }
+
+ #endif /* NO_HZ */
+
+@@ -644,11 +655,7 @@ static inline void tick_nohz_switch_to_nohz(void) { }
+ void tick_check_idle(int cpu)
+ {
+ tick_check_oneshot_broadcast(cpu);
+-#ifdef CONFIG_NO_HZ
+- tick_nohz_stop_idle(cpu);
+- tick_nohz_update_jiffies();
+- tick_nohz_kick_tick(cpu);
+-#endif
++ tick_check_nohz(cpu);
+ }
+
+ /*
+diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
+index 8b709de..26e2f37 100644
+--- a/kernel/time/timekeeping.c
++++ b/kernel/time/timekeeping.c
+@@ -177,7 +177,7 @@ void timekeeping_leap_insert(int leapsecond)
+ {
+ xtime.tv_sec += leapsecond;
+ wall_to_monotonic.tv_sec -= leapsecond;
+- update_vsyscall(&xtime, timekeeper.clock);
++ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+ }
+
+ #ifdef CONFIG_GENERIC_TIME
+@@ -337,7 +337,7 @@ int do_settimeofday(struct timespec *tv)
+ timekeeper.ntp_error = 0;
+ ntp_clear();
+
+- update_vsyscall(&xtime, timekeeper.clock);
++ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+
+ write_sequnlock_irqrestore(&xtime_lock, flags);
+
+@@ -822,7 +822,7 @@ void update_wall_time(void)
+ update_xtime_cache(nsecs);
+
+ /* check to see if there is a new clocksource to use */
+- update_vsyscall(&xtime, timekeeper.clock);
++ update_vsyscall(&xtime, timekeeper.clock, timekeeper.mult);
+ }
+
+ /**
+diff --git a/mm/memory-failure.c b/mm/memory-failure.c
+index dacc641..8aeba53 100644
+--- a/mm/memory-failure.c
++++ b/mm/memory-failure.c
+@@ -589,7 +589,6 @@ static struct page_state {
+
+ { lru|dirty, lru|dirty, "LRU", me_pagecache_dirty },
+ { lru|dirty, lru, "clean LRU", me_pagecache_clean },
+- { swapbacked, swapbacked, "anonymous", me_pagecache_clean },
+
+ /*
+ * Catchall entry: must be at end.
+@@ -638,7 +637,7 @@ static int page_action(struct page_state *ps, struct page *p,
+ * Do all that is necessary to remove user space mappings. Unmap
+ * the pages and send SIGBUS to the processes if the data was dirty.
+ */
+-static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
++static int hwpoison_user_mappings(struct page *p, unsigned long pfn,
+ int trapno)
+ {
+ enum ttu_flags ttu = TTU_UNMAP | TTU_IGNORE_MLOCK | TTU_IGNORE_ACCESS;
+@@ -648,15 +647,18 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+ int i;
+ int kill = 1;
+
+- if (PageReserved(p) || PageCompound(p) || PageSlab(p) || PageKsm(p))
+- return;
++ if (PageReserved(p) || PageSlab(p))
++ return SWAP_SUCCESS;
+
+ /*
+ * This check implies we don't kill processes if their pages
+ * are in the swap cache early. Those are always late kills.
+ */
+ if (!page_mapped(p))
+- return;
++ return SWAP_SUCCESS;
++
++ if (PageCompound(p) || PageKsm(p))
++ return SWAP_FAIL;
+
+ if (PageSwapCache(p)) {
+ printk(KERN_ERR
+@@ -718,6 +720,8 @@ static void hwpoison_user_mappings(struct page *p, unsigned long pfn,
+ */
+ kill_procs_ao(&tokill, !!PageDirty(p), trapno,
+ ret != SWAP_SUCCESS, pfn);
++
++ return ret;
+ }
+
+ int __memory_failure(unsigned long pfn, int trapno, int ref)
+@@ -787,8 +791,13 @@ int __memory_failure(unsigned long pfn, int trapno, int ref)
+
+ /*
+ * Now take care of user space mappings.
++ * Abort on fail: __remove_from_page_cache() assumes unmapped page.
+ */
+- hwpoison_user_mappings(p, pfn, trapno);
++ if (hwpoison_user_mappings(p, pfn, trapno) != SWAP_SUCCESS) {
++ printk(KERN_ERR "MCE %#lx: cannot unmap page, give up\n", pfn);
++ res = -EBUSY;
++ goto out;
++ }
+
+ /*
+ * Torn down by someone else?
+diff --git a/mm/swapfile.c b/mm/swapfile.c
+index 9c590ee..270e136 100644
+--- a/mm/swapfile.c
++++ b/mm/swapfile.c
+@@ -330,8 +330,10 @@ checks:
+ if (offset > si->highest_bit)
+ scan_base = offset = si->lowest_bit;
+
+- /* reuse swap entry of cache-only swap if not busy. */
+- if (vm_swap_full() && si->swap_map[offset] == SWAP_HAS_CACHE) {
++ /* reuse swap entry of cache-only swap if not hibernation. */
++ if (vm_swap_full()
++ && cache == SWAP_CACHE
++ && si->swap_map[offset] == SWAP_HAS_CACHE) {
+ int swap_was_freed;
+ spin_unlock(&swap_lock);
+ swap_was_freed = __try_to_reclaim_swap(si, offset);
+diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+index cc35645..f84f6dd 100644
+--- a/net/ipv4/devinet.c
++++ b/net/ipv4/devinet.c
+@@ -1076,6 +1076,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event,
+ }
+ ip_mc_up(in_dev);
+ /* fall through */
++ case NETDEV_NOTIFY_PEERS:
+ case NETDEV_CHANGEADDR:
+ /* Send gratuitous ARP to notify of link change */
+ if (IN_DEV_ARP_NOTIFY(in_dev)) {
+diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
+index 4ae6aa5..3028001 100644
+--- a/net/sched/sch_generic.c
++++ b/net/sched/sch_generic.c
+@@ -330,6 +330,24 @@ void netif_carrier_off(struct net_device *dev)
+ }
+ EXPORT_SYMBOL(netif_carrier_off);
+
++/**
++ * netif_notify_peers - notify network peers about existence of @dev
++ * @dev: network device
++ *
++ * Generate traffic such that interested network peers are aware of
++ * @dev, such as by generating a gratuitous ARP. This may be used when
++ * a device wants to inform the rest of the network about some sort of
++ * reconfiguration such as a failover event or virtual machine
++ * migration.
++ */
++void netif_notify_peers(struct net_device *dev)
++{
++ rtnl_lock();
++ call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev);
++ rtnl_unlock();
++}
++EXPORT_SYMBOL(netif_notify_peers);
++
+ /* "NOOP" scheduler: the best scheduler, recommended for all interfaces
+ under all circumstances. It is difficult to invent anything faster or
+ cheaper.
Added: dists/sid/linux-2.6/debian/patches/debian/fs-buffer.c-Avoid-ABI-change-in-2.6.32.19.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/fs-buffer.c-Avoid-ABI-change-in-2.6.32.19.patch Fri Aug 13 03:29:13 2010 (r16129)
@@ -0,0 +1,42 @@
+From 34a78e878a5641a4c6a1f5eda3735ef28dd7954b Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Fri, 13 Aug 2010 03:43:22 +0100
+Subject: [PATCH 1/2] fs/buffer.c: Avoid ABI change in 2.6.32.19
+
+The operation type flags used by ll_rw_block() and submit_bh() are
+supposed to be interchangeable with those used by submit_bio(), but
+RWA_MASK was not equal to 1 << BIO_RW_AHEAD. Changing its value is
+an ABI change; we need to accept the old value too.
+---
+ fs/buffer.c | 8 ++++++++
+ 1 files changed, 8 insertions(+), 0 deletions(-)
+
+diff --git a/fs/buffer.c b/fs/buffer.c
+index 6fa5302..f24989c 100644
+--- a/fs/buffer.c
++++ b/fs/buffer.c
+@@ -2954,6 +2954,10 @@ int submit_bh(int rw, struct buffer_head * bh)
+ BUG_ON(buffer_delay(bh));
+ BUG_ON(buffer_unwritten(bh));
+
++ /* Fix broken value of RWA_FLAG */
++ if (rw & 2)
++ rw ^= 2 | RWA_FLAG;
++
+ /*
+ * Mask in barrier bit for a write (could be either a WRITE or a
+ * WRITE_SYNC
+@@ -3027,6 +3031,10 @@ void ll_rw_block(int rw, int nr, struct buffer_head *bhs[])
+ {
+ int i;
+
++ /* Fix broken value of RWA_FLAG */
++ if (rw & 2)
++ rw ^= 2 | RWA_FLAG;
++
+ for (i = 0; i < nr; i++) {
+ struct buffer_head *bh = bhs[i];
+
+--
+1.7.1
+
Added: dists/sid/linux-2.6/debian/patches/debian/revert-sched-cputime-Introduce-thread_group_times.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/debian/revert-sched-cputime-Introduce-thread_group_times.patch Fri Aug 13 03:29:13 2010 (r16129)
@@ -0,0 +1,238 @@
+From da275351d6167de9c929e7138a8edf1571e83fb0 Mon Sep 17 00:00:00 2001
+From: Ben Hutchings <ben at decadent.org.uk>
+Date: Fri, 13 Aug 2010 03:59:33 +0100
+Subject: [PATCH 2/2] Revert "sched, cputime: Introduce thread_group_times()"
+
+This reverts commit 0cf55e1ec08bb5a22e068309e2d8ba1180ab4239
+which causes an ABI change.
+---
+ fs/proc/array.c | 5 ++++-
+ include/linux/sched.h | 4 ----
+ kernel/exit.c | 20 ++++++++------------
+ kernel/fork.c | 3 ---
+ kernel/sched.c | 41 -----------------------------------------
+ kernel/sys.c | 18 ++++++++++--------
+ 6 files changed, 22 insertions(+), 69 deletions(-)
+
+diff --git a/fs/proc/array.c b/fs/proc/array.c
+index 42fdc76..725a650 100644
+--- a/fs/proc/array.c
++++ b/fs/proc/array.c
+@@ -405,6 +405,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+
+ /* add up live thread stats at the group level */
+ if (whole) {
++ struct task_cputime cputime;
+ struct task_struct *t = task;
+ do {
+ min_flt += t->min_flt;
+@@ -415,7 +416,9 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns,
+
+ min_flt += sig->min_flt;
+ maj_flt += sig->maj_flt;
+- thread_group_times(task, &utime, &stime);
++ thread_group_cputime(task, &cputime);
++ utime = cputime.utime;
++ stime = cputime.stime;
+ gtime = cputime_add(gtime, sig->gtime);
+ }
+
+diff --git a/include/linux/sched.h b/include/linux/sched.h
+index cc24beb..b253434 100644
+--- a/include/linux/sched.h
++++ b/include/linux/sched.h
+@@ -628,9 +628,6 @@ struct signal_struct {
+ cputime_t utime, stime, cutime, cstime;
+ cputime_t gtime;
+ cputime_t cgtime;
+-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+- cputime_t prev_utime, prev_stime;
+-#endif
+ unsigned long nvcsw, nivcsw, cnvcsw, cnivcsw;
+ unsigned long min_flt, maj_flt, cmin_flt, cmaj_flt;
+ unsigned long inblock, oublock, cinblock, coublock;
+@@ -1728,7 +1725,6 @@ static inline void put_task_struct(struct task_struct *t)
+ extern cputime_t task_utime(struct task_struct *p);
+ extern cputime_t task_stime(struct task_struct *p);
+ extern cputime_t task_gtime(struct task_struct *p);
+-extern void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st);
+
+ /*
+ * Per process flags
+diff --git a/kernel/exit.c b/kernel/exit.c
+index 4a0e062..f7864ac 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -110,8 +110,8 @@ static void __exit_signal(struct task_struct *tsk)
+ * We won't ever get here for the group leader, since it
+ * will have been the last reference on the signal_struct.
+ */
+- sig->utime = cputime_add(sig->utime, tsk->utime);
+- sig->stime = cputime_add(sig->stime, tsk->stime);
++ sig->utime = cputime_add(sig->utime, task_utime(tsk));
++ sig->stime = cputime_add(sig->stime, task_stime(tsk));
+ sig->gtime = cputime_add(sig->gtime, task_gtime(tsk));
+ sig->min_flt += tsk->min_flt;
+ sig->maj_flt += tsk->maj_flt;
+@@ -1205,7 +1205,6 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ struct signal_struct *psig;
+ struct signal_struct *sig;
+ unsigned long maxrss;
+- cputime_t tgutime, tgstime;
+
+ /*
+ * The resource counters for the group leader are in its
+@@ -1221,23 +1220,20 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
+ * need to protect the access to parent->signal fields,
+ * as other threads in the parent group can be right
+ * here reaping other children at the same time.
+- *
+- * We use thread_group_times() to get times for the thread
+- * group, which consolidates times for all threads in the
+- * group including the group leader.
+ */
+- thread_group_times(p, &tgutime, &tgstime);
+ spin_lock_irq(&p->real_parent->sighand->siglock);
+ psig = p->real_parent->signal;
+ sig = p->signal;
+ psig->cutime =
+ cputime_add(psig->cutime,
+- cputime_add(tgutime,
+- sig->cutime));
++ cputime_add(p->utime,
++ cputime_add(sig->utime,
++ sig->cutime)));
+ psig->cstime =
+ cputime_add(psig->cstime,
+- cputime_add(tgstime,
+- sig->cstime));
++ cputime_add(p->stime,
++ cputime_add(sig->stime,
++ sig->cstime)));
+ psig->cgtime =
+ cputime_add(psig->cgtime,
+ cputime_add(p->gtime,
+diff --git a/kernel/fork.c b/kernel/fork.c
+index ce2f585..28b4874 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -884,9 +884,6 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
+ sig->utime = sig->stime = sig->cutime = sig->cstime = cputime_zero;
+ sig->gtime = cputime_zero;
+ sig->cgtime = cputime_zero;
+-#ifndef CONFIG_VIRT_CPU_ACCOUNTING
+- sig->prev_utime = sig->prev_stime = cputime_zero;
+-#endif
+ sig->nvcsw = sig->nivcsw = sig->cnvcsw = sig->cnivcsw = 0;
+ sig->min_flt = sig->maj_flt = sig->cmin_flt = sig->cmaj_flt = 0;
+ sig->inblock = sig->oublock = sig->cinblock = sig->coublock = 0;
+diff --git a/kernel/sched.c b/kernel/sched.c
+index 9990074..389ac4d 100644
+--- a/kernel/sched.c
++++ b/kernel/sched.c
+@@ -5215,16 +5215,6 @@ cputime_t task_stime(struct task_struct *p)
+ {
+ return p->stime;
+ }
+-
+-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+-{
+- struct task_cputime cputime;
+-
+- thread_group_cputime(p, &cputime);
+-
+- *ut = cputime.utime;
+- *st = cputime.stime;
+-}
+ #else
+
+ #ifndef nsecs_to_cputime
+@@ -5268,37 +5258,6 @@ cputime_t task_stime(struct task_struct *p)
+
+ return p->prev_stime;
+ }
+-
+-/*
+- * Must be called with siglock held.
+- */
+-void thread_group_times(struct task_struct *p, cputime_t *ut, cputime_t *st)
+-{
+- struct signal_struct *sig = p->signal;
+- struct task_cputime cputime;
+- cputime_t rtime, utime, total;
+-
+- thread_group_cputime(p, &cputime);
+-
+- total = cputime_add(cputime.utime, cputime.stime);
+- rtime = nsecs_to_cputime(cputime.sum_exec_runtime);
+-
+- if (total) {
+- u64 temp;
+-
+- temp = (u64)(rtime * cputime.utime);
+- do_div(temp, total);
+- utime = (cputime_t)temp;
+- } else
+- utime = rtime;
+-
+- sig->prev_utime = max(sig->prev_utime, utime);
+- sig->prev_stime = max(sig->prev_stime,
+- cputime_sub(rtime, sig->prev_utime));
+-
+- *ut = sig->prev_utime;
+- *st = sig->prev_stime;
+-}
+ #endif
+
+ inline cputime_t task_gtime(struct task_struct *p)
+diff --git a/kernel/sys.c b/kernel/sys.c
+index 26e4b8a..ce17760 100644
+--- a/kernel/sys.c
++++ b/kernel/sys.c
+@@ -911,15 +911,16 @@ change_okay:
+
+ void do_sys_times(struct tms *tms)
+ {
+- cputime_t tgutime, tgstime, cutime, cstime;
++ struct task_cputime cputime;
++ cputime_t cutime, cstime;
+
++ thread_group_cputime(current, &cputime);
+ spin_lock_irq(¤t->sighand->siglock);
+- thread_group_times(current, &tgutime, &tgstime);
+ cutime = current->signal->cutime;
+ cstime = current->signal->cstime;
+ spin_unlock_irq(¤t->sighand->siglock);
+- tms->tms_utime = cputime_to_clock_t(tgutime);
+- tms->tms_stime = cputime_to_clock_t(tgstime);
++ tms->tms_utime = cputime_to_clock_t(cputime.utime);
++ tms->tms_stime = cputime_to_clock_t(cputime.stime);
+ tms->tms_cutime = cputime_to_clock_t(cutime);
+ tms->tms_cstime = cputime_to_clock_t(cstime);
+ }
+@@ -1337,7 +1338,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
+ {
+ struct task_struct *t;
+ unsigned long flags;
+- cputime_t tgutime, tgstime, utime, stime;
++ cputime_t utime, stime;
++ struct task_cputime cputime;
+ unsigned long maxrss = 0;
+
+ memset((char *) r, 0, sizeof *r);
+@@ -1371,9 +1373,9 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
+ break;
+
+ case RUSAGE_SELF:
+- thread_group_times(p, &tgutime, &tgstime);
+- utime = cputime_add(utime, tgutime);
+- stime = cputime_add(stime, tgstime);
++ thread_group_cputime(p, &cputime);
++ utime = cputime_add(utime, cputime.utime);
++ stime = cputime_add(stime, cputime.stime);
+ r->ru_nvcsw += p->signal->nvcsw;
+ r->ru_nivcsw += p->signal->nivcsw;
+ r->ru_minflt += p->signal->min_flt;
+--
+1.7.1
+
Copied and modified: dists/sid/linux-2.6/debian/patches/features/all/USB-option-Use-generic-USB-wwan-code-3.patch (from r16127, dists/sid/linux-2.6/debian/patches/features/all/USB-option-Use-generic-USB-wwan-code-2.patch)
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/USB-option-Use-generic-USB-wwan-code-2.patch Thu Aug 12 17:33:01 2010 (r16127, copy source)
+++ dists/sid/linux-2.6/debian/patches/features/all/USB-option-Use-generic-USB-wwan-code-3.patch Fri Aug 13 03:29:13 2010 (r16129)
@@ -10,10 +10,11 @@
Signed-off-by: Matthew Garrett <mjg at redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh at suse.de>
[mhy: Backport to 2.6.32]
+[bwh: Adjust context for 2.6.32.19]
---
--- a/drivers/usb/serial/Kconfig
+++ b/drivers/usb/serial/Kconfig
-@@ -581,6 +581,7 @@ config USB_SERIAL_WWAN
+@@ -571,6 +571,7 @@ config USB_SERIAL_WWAN
config USB_SERIAL_OPTION
tristate "USB driver for GSM and CDMA modems"
@@ -61,7 +62,7 @@
/* Vendor and product IDs */
#define OPTION_VENDOR_ID 0x0AF0
#define OPTION_PRODUCT_COLT 0x5000
-@@ -680,22 +659,22 @@
+@@ -909,22 +888,22 @@
.id_table = option_ids,
.num_ports = 1,
.probe = option_probe,
@@ -98,7 +99,7 @@
#endif
};
-@@ -708,12 +687,6 @@
+@@ -937,12 +916,6 @@
#define IN_BUFLEN 4096
#define OUT_BUFLEN 4096
@@ -111,7 +112,7 @@
struct option_port_private {
/* Input endpoints and buffer for this port */
struct urb *in_urbs[N_IN_URB];
-@@ -770,216 +743,28 @@
+@@ -999,7 +972,7 @@
static int option_probe(struct usb_serial *serial,
const struct usb_device_id *id)
{
@@ -120,14 +121,8 @@
/* D-Link DWM 652 still exposes CD-Rom emulation interface in modem mode */
if (serial->dev->descriptor.idVendor == DLINK_VENDOR_ID &&
- serial->dev->descriptor.idProduct == DLINK_PRODUCT_DWM_652 &&
- serial->interface->cur_altsetting->desc.bInterfaceClass == 0x8)
- return -ENODEV;
-
- /* Bandrich modem and AT command interface is 0xff */
- if ((serial->dev->descriptor.idVendor == BANDRICH_VENDOR_ID ||
- serial->dev->descriptor.idVendor == PIRELLI_VENDOR_ID) &&
- serial->interface->cur_altsetting->desc.bInterfaceClass != 0xff)
+@@ -1020,202 +993,14 @@
+ serial->interface->cur_altsetting->desc.bInterfaceNumber == 1)
return -ENODEV;
- data = serial->private = kzalloc(sizeof(struct option_intf_private), GFP_KERNEL);
@@ -331,7 +326,7 @@
static void option_instat_callback(struct urb *urb)
{
int err;
-@@ -1029,183 +814,6 @@
+@@ -1272,183 +1057,6 @@
}
}
@@ -515,7 +510,7 @@
/** send RTS/DTR state to the port.
*
* This is exactly the same as SET_CONTROL_LINE_STATE from the PSTN
-@@ -1231,224 +839,6 @@
+@@ -1474,224 +1082,6 @@
0x22, 0x21, val, ifNum, NULL, 0, USB_CTRL_SET_TIMEOUT);
}
Modified: dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch Thu Aug 12 21:50:04 2010 (r16128)
+++ dists/sid/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.27.patch Fri Aug 13 03:29:13 2010 (r16129)
@@ -1,4 +1,4 @@
-bwh: Adjust context in fs/ext4/ext4.h
+bwh: Adjust context in fs/ext4/ext4.h, fs/btrfs/super.h
diff -NurpP --minimal linux-2.6.32.1/arch/alpha/Kconfig linux-2.6.32.1-vs2.3.0.36.27/arch/alpha/Kconfig
--- linux-2.6.32.1/arch/alpha/Kconfig 2009-12-03 20:01:49.000000000 +0100
@@ -2446,9 +2446,9 @@
+ btrfs_set_opt(info->mount_opt, TAGGED);
+ break;
+#endif
- default:
- break;
- }
+ case Opt_err:
+ printk(KERN_INFO "btrfs: unrecognized mount option "
+ "'%s'\n", p);
@@ -575,6 +594,12 @@ static int btrfs_remount(struct super_bl
if (ret)
return -EINVAL;
Modified: dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch Thu Aug 12 21:50:04 2010 (r16128)
+++ dists/sid/linux-2.6/debian/patches/features/all/xen/pvops.patch Fri Aug 13 03:29:13 2010 (r16129)
@@ -207,332 +207,6 @@
static inline void detect_calgary(void) { return; }
#endif
-diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h
-index ee1931b..5af5051 100644
---- a/arch/x86/include/asm/cmpxchg_32.h
-+++ b/arch/x86/include/asm/cmpxchg_32.h
-@@ -34,12 +34,12 @@ static inline void __set_64bit(unsigned long long *ptr,
- unsigned int low, unsigned int high)
- {
- asm volatile("\n1:\t"
-- "movl (%0), %%eax\n\t"
-- "movl 4(%0), %%edx\n\t"
-- LOCK_PREFIX "cmpxchg8b (%0)\n\t"
-+ "movl (%1), %%eax\n\t"
-+ "movl 4(%1), %%edx\n\t"
-+ LOCK_PREFIX "cmpxchg8b %0\n\t"
- "jnz 1b"
-- : /* no outputs */
-- : "D"(ptr),
-+ : "=m"(*ptr)
-+ : "D" (ptr),
- "b"(low),
- "c"(high)
- : "ax", "dx", "memory");
-@@ -82,20 +82,20 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
- switch (size) {
- case 1:
- asm volatile("xchgb %b0,%1"
-- : "=q" (x)
-- : "m" (*__xg(ptr)), "0" (x)
-+ : "=q" (x), "+m" (*__xg(ptr))
-+ : "0" (x)
- : "memory");
- break;
- case 2:
- asm volatile("xchgw %w0,%1"
-- : "=r" (x)
-- : "m" (*__xg(ptr)), "0" (x)
-+ : "=r" (x), "+m" (*__xg(ptr))
-+ : "0" (x)
- : "memory");
- break;
- case 4:
- asm volatile("xchgl %0,%1"
-- : "=r" (x)
-- : "m" (*__xg(ptr)), "0" (x)
-+ : "=r" (x), "+m" (*__xg(ptr))
-+ : "0" (x)
- : "memory");
- break;
- }
-@@ -139,21 +139,21 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long prev;
- switch (size) {
- case 1:
-- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
-- : "=a"(prev)
-- : "q"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "q"(new), "0"(old)
- : "memory");
- return prev;
- case 2:
-- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 4:
-- asm volatile(LOCK_PREFIX "cmpxchgl %1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile(LOCK_PREFIX "cmpxchgl %2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- }
-@@ -172,21 +172,21 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
- unsigned long prev;
- switch (size) {
- case 1:
-- asm volatile("lock; cmpxchgb %b1,%2"
-- : "=a"(prev)
-- : "q"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("lock; cmpxchgb %b2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "q"(new), "0"(old)
- : "memory");
- return prev;
- case 2:
-- asm volatile("lock; cmpxchgw %w1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("lock; cmpxchgw %w2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 4:
-- asm volatile("lock; cmpxchgl %1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("lock; cmpxchgl %2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- }
-@@ -200,21 +200,21 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
- unsigned long prev;
- switch (size) {
- case 1:
-- asm volatile("cmpxchgb %b1,%2"
-- : "=a"(prev)
-- : "q"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("cmpxchgb %b2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "q"(new), "0"(old)
- : "memory");
- return prev;
- case 2:
-- asm volatile("cmpxchgw %w1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("cmpxchgw %w2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 4:
-- asm volatile("cmpxchgl %1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("cmpxchgl %2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- }
-@@ -226,11 +226,10 @@ static inline unsigned long long __cmpxchg64(volatile void *ptr,
- unsigned long long new)
- {
- unsigned long long prev;
-- asm volatile(LOCK_PREFIX "cmpxchg8b %3"
-- : "=A"(prev)
-+ asm volatile(LOCK_PREFIX "cmpxchg8b %1"
-+ : "=A"(prev), "+m" (*__xg(ptr))
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
-- "m"(*__xg(ptr)),
- "0"(old)
- : "memory");
- return prev;
-@@ -241,11 +240,10 @@ static inline unsigned long long __cmpxchg64_local(volatile void *ptr,
- unsigned long long new)
- {
- unsigned long long prev;
-- asm volatile("cmpxchg8b %3"
-- : "=A"(prev)
-+ asm volatile("cmpxchg8b %1"
-+ : "=A"(prev), "+m"(*__xg(ptr))
- : "b"((unsigned long)new),
- "c"((unsigned long)(new >> 32)),
-- "m"(*__xg(ptr)),
- "0"(old)
- : "memory");
- return prev;
-diff --git a/arch/x86/include/asm/cmpxchg_64.h b/arch/x86/include/asm/cmpxchg_64.h
-index 52de72e..1871cb0 100644
---- a/arch/x86/include/asm/cmpxchg_64.h
-+++ b/arch/x86/include/asm/cmpxchg_64.h
-@@ -26,26 +26,26 @@ static inline unsigned long __xchg(unsigned long x, volatile void *ptr,
- switch (size) {
- case 1:
- asm volatile("xchgb %b0,%1"
-- : "=q" (x)
-- : "m" (*__xg(ptr)), "0" (x)
-+ : "=q" (x), "+m" (*__xg(ptr))
-+ : "0" (x)
- : "memory");
- break;
- case 2:
- asm volatile("xchgw %w0,%1"
-- : "=r" (x)
-- : "m" (*__xg(ptr)), "0" (x)
-+ : "=r" (x), "+m" (*__xg(ptr))
-+ : "0" (x)
- : "memory");
- break;
- case 4:
- asm volatile("xchgl %k0,%1"
-- : "=r" (x)
-- : "m" (*__xg(ptr)), "0" (x)
-+ : "=r" (x), "+m" (*__xg(ptr))
-+ : "0" (x)
- : "memory");
- break;
- case 8:
- asm volatile("xchgq %0,%1"
-- : "=r" (x)
-- : "m" (*__xg(ptr)), "0" (x)
-+ : "=r" (x), "+m" (*__xg(ptr))
-+ : "0" (x)
- : "memory");
- break;
- }
-@@ -66,27 +66,27 @@ static inline unsigned long __cmpxchg(volatile void *ptr, unsigned long old,
- unsigned long prev;
- switch (size) {
- case 1:
-- asm volatile(LOCK_PREFIX "cmpxchgb %b1,%2"
-- : "=a"(prev)
-- : "q"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile(LOCK_PREFIX "cmpxchgb %b2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "q"(new), "0"(old)
- : "memory");
- return prev;
- case 2:
-- asm volatile(LOCK_PREFIX "cmpxchgw %w1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile(LOCK_PREFIX "cmpxchgw %w2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 4:
-- asm volatile(LOCK_PREFIX "cmpxchgl %k1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile(LOCK_PREFIX "cmpxchgl %k2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 8:
-- asm volatile(LOCK_PREFIX "cmpxchgq %1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile(LOCK_PREFIX "cmpxchgq %2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- }
-@@ -105,21 +105,27 @@ static inline unsigned long __sync_cmpxchg(volatile void *ptr,
- unsigned long prev;
- switch (size) {
- case 1:
-- asm volatile("lock; cmpxchgb %b1,%2"
-- : "=a"(prev)
-- : "q"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("lock; cmpxchgb %b2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "q"(new), "0"(old)
- : "memory");
- return prev;
- case 2:
-- asm volatile("lock; cmpxchgw %w1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("lock; cmpxchgw %w2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 4:
-- asm volatile("lock; cmpxchgl %1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("lock; cmpxchgl %k2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
-+ : "memory");
-+ return prev;
-+ case 8:
-+ asm volatile("lock; cmpxchgq %2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- }
-@@ -133,27 +139,27 @@ static inline unsigned long __cmpxchg_local(volatile void *ptr,
- unsigned long prev;
- switch (size) {
- case 1:
-- asm volatile("cmpxchgb %b1,%2"
-- : "=a"(prev)
-- : "q"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("cmpxchgb %b2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "q"(new), "0"(old)
- : "memory");
- return prev;
- case 2:
-- asm volatile("cmpxchgw %w1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("cmpxchgw %w2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 4:
-- asm volatile("cmpxchgl %k1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("cmpxchgl %k2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- case 8:
-- asm volatile("cmpxchgq %1,%2"
-- : "=a"(prev)
-- : "r"(new), "m"(*__xg(ptr)), "0"(old)
-+ asm volatile("cmpxchgq %2,%1"
-+ : "=a"(prev), "+m"(*__xg(ptr))
-+ : "r"(new), "0"(old)
- : "memory");
- return prev;
- }
diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h
index 6a25d5d..ac91eed 100644
--- a/arch/x86/include/asm/dma-mapping.h
@@ -16217,14 +15891,6 @@
int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu,
irq_handler_t handler,
unsigned long irqflags, const char *devname, void *dev_id)
-@@ -535,6 +949,7 @@ int bind_ipi_to_irqhandler(enum ipi_vector ipi,
- if (irq < 0)
- return irq;
-
-+ irqflags |= IRQF_NO_SUSPEND;
- retval = request_irq(irq, handler, irqflags, devname, dev_id);
- if (retval != 0) {
- unbind_from_irq(irq);
@@ -616,17 +1031,13 @@ static DEFINE_PER_CPU(unsigned, xed_nesting_count);
* a bitset of words which contain pending event bits. The second
* level is a bitset of pending events themselves.
@@ -29713,18 +29379,6 @@
+ __u32 tx_rate;
+};
#endif /* _LINUX_IF_LINK_H */
-diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
-index 7ca72b7..1c30adf 100644
---- a/include/linux/interrupt.h
-+++ b/include/linux/interrupt.h
-@@ -62,6 +62,7 @@
- #define IRQF_NOBALANCING 0x00000800
- #define IRQF_IRQPOLL 0x00001000
- #define IRQF_ONESHOT 0x00002000
-+#define IRQF_NO_SUSPEND 0x00004000
-
- /*
- * Bits used by threaded handlers:
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 24c3956..e8cf80f 100644
--- a/include/linux/mm.h
@@ -32386,20 +32040,6 @@
struct device_driver driver;
int (*read_otherend_details)(struct xenbus_device *dev);
int (*is_ready)(struct xenbus_device *dev);
-diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
-index 986519e..cae345b 100644
---- a/kernel/irq/manage.c
-+++ b/kernel/irq/manage.c
-@@ -200,7 +200,8 @@ static inline int setup_affinity(unsigned int irq, struct irq_desc *desc)
- void __disable_irq(struct irq_desc *desc, unsigned int irq, bool suspend)
- {
- if (suspend) {
-- if (!desc->action || (desc->action->flags & IRQF_TIMER))
-+ if (!desc->action ||
-+ (desc->action->flags & (IRQF_TIMER | IRQF_NO_SUSPEND)))
- return;
- desc->status |= IRQ_SUSPENDED;
- }
diff --git a/lib/Makefile b/lib/Makefile
index 452f188..001e918 100644
--- a/lib/Makefile
Added: dists/sid/linux-2.6/debian/patches/series/21
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/sid/linux-2.6/debian/patches/series/21 Fri Aug 13 03:29:13 2010 (r16129)
@@ -0,0 +1,6 @@
+- bugfix/all/ext4-fix-freeze-deadlock-under-IO.patch
+- features/all/USB-option-Use-generic-USB-wwan-code-2.patch
++ bugfix/all/stable/2.6.32.19-rc1
++ debian/fs-buffer.c-Avoid-ABI-change-in-2.6.32.19.patch
++ debian/revert-sched-cputime-Introduce-thread_group_times.patch
++ features/all/USB-option-Use-generic-USB-wwan-code-3.patch
More information about the Kernel-svn-changes
mailing list