[kernel] r18979 - in dists/squeeze/linux-2.6: . debian debian/patches/bugfix/all debian/patches/bugfix/x86 debian/patches/features/all/vserver debian/patches/series
Dann Frazier
dannf at alioth.debian.org
Fri May 4 06:33:30 UTC 2012
Author: dannf
Date: Fri May 4 06:33:24 2012
New Revision: 18979
Log:
merge squeeze-security
Added:
dists/squeeze/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-clone-with-CLONE_IO.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-clone-with-CLONE_IO.patch
dists/squeeze/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-failure-of-clone-with-CLONE_IO.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-failure-of-clone-with-CLONE_IO.patch
dists/squeeze/linux-2.6/debian/patches/bugfix/all/fcaps-clear-the-same-personality-flags-as-suid-when-fcaps-are-used.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/fcaps-clear-the-same-personality-flags-as-suid-when-fcaps-are-used.patch
dists/squeeze/linux-2.6/debian/patches/bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch
dists/squeeze/linux-2.6/debian/patches/bugfix/all/jbd2-clear-BH_Delay-BH_Unwritten-in-journal_unmap_buffer.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/jbd2-clear-BH_Delay-BH_Unwritten-in-journal_unmap_buffer.patch
dists/squeeze/linux-2.6/debian/patches/bugfix/all/security-fix-compile-error-in-commoncap.c.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/security-fix-compile-error-in-commoncap.c.patch
dists/squeeze/linux-2.6/debian/patches/bugfix/x86/KVM-Ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/x86/KVM-Ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch
dists/squeeze/linux-2.6/debian/patches/bugfix/x86/KVM-disallow-multiple-KVM_CREATE_IRQCHIP.patch
- copied unchanged from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/x86/KVM-disallow-multiple-KVM_CREATE_IRQCHIP.patch
Modified:
dists/squeeze/linux-2.6/ (props changed)
dists/squeeze/linux-2.6/debian/changelog
dists/squeeze/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.29.8.patch
dists/squeeze/linux-2.6/debian/patches/series/44
Modified: dists/squeeze/linux-2.6/debian/changelog
==============================================================================
--- dists/squeeze/linux-2.6/debian/changelog Fri May 4 06:25:21 2012 (r18978)
+++ dists/squeeze/linux-2.6/debian/changelog Fri May 4 06:33:24 2012 (r18979)
@@ -7,6 +7,15 @@
* Revert "Work around unhappy compat problem on x86-64",
included in stable update 2.6.32.58, due to userspace beakage.
Reopens #633423.
+ * CVE-2012-0879:
+ - block: Fix io_context leak after clone with CLONE_IO
+ - block: Fix io_context leak after failure of clone with CLONE_IO
+ * KVM: Ensure all vcpus are consistent with in-kernel irqchip settings
+ (CVE-2012-1601)
+ * hugepages: fix use after free bug in "quota" handling (CVE-2012-2133)
+ * fcaps: clear the same personality flags as suid when fcaps are used
+ (CVE-2012-2123)
+ * jbd2: clear BH_Delay & BH_Unwritten in journal_unmap_buffer (CVE-2011-4086)
-- Ben Hutchings <ben at decadent.org.uk> Tue, 24 Apr 2012 04:24:55 +0100
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-clone-with-CLONE_IO.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-clone-with-CLONE_IO.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-clone-with-CLONE_IO.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-clone-with-CLONE_IO.patch)
@@ -0,0 +1,29 @@
+commit 61cc74fbb87af6aa551a06a370590c9bc07e29d9
+Author: Louis Rilling <louis.rilling at kerlabs.com>
+Date: Fri Dec 4 14:52:41 2009 +0100
+
+ block: Fix io_context leak after clone with CLONE_IO
+
+ With CLONE_IO, copy_io() increments both ioc->refcount and ioc->nr_tasks.
+ However exit_io_context() only decrements ioc->refcount if ioc->nr_tasks
+ reaches 0.
+
+ Always call put_io_context() in exit_io_context().
+
+ Signed-off-by: Louis Rilling <louis.rilling at kerlabs.com>
+ Signed-off-by: Jens Axboe <jens.axboe at oracle.com>
+
+diff --git a/block/blk-ioc.c b/block/blk-ioc.c
+index d4ed600..dcd0412 100644
+--- a/block/blk-ioc.c
++++ b/block/blk-ioc.c
+@@ -80,8 +80,8 @@ void exit_io_context(void)
+ ioc->aic->exit(ioc->aic);
+ cfq_exit(ioc);
+
+- put_io_context(ioc);
+ }
++ put_io_context(ioc);
+ }
+
+ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-failure-of-clone-with-CLONE_IO.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-failure-of-clone-with-CLONE_IO.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-failure-of-clone-with-CLONE_IO.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/block-Fix-io_context-leak-after-failure-of-clone-with-CLONE_IO.patch)
@@ -0,0 +1,89 @@
+commit b69f2292063d2caf37ca9aec7d63ded203701bf3
+Author: Louis Rilling <louis.rilling at kerlabs.com>
+Date: Fri Dec 4 14:52:42 2009 +0100
+
+ block: Fix io_context leak after failure of clone with CLONE_IO
+
+ With CLONE_IO, parent's io_context->nr_tasks is incremented, but never
+ decremented whenever copy_process() fails afterwards, which prevents
+ exit_io_context() from calling IO schedulers exit functions.
+
+ Give a task_struct to exit_io_context(), and call exit_io_context() instead of
+ put_io_context() in copy_process() cleanup path.
+
+ Signed-off-by: Louis Rilling <louis.rilling at kerlabs.com>
+ Signed-off-by: Jens Axboe <jens.axboe at oracle.com>
+
+diff --git a/block/blk-ioc.c b/block/blk-ioc.c
+index dcd0412..cbdabb0 100644
+--- a/block/blk-ioc.c
++++ b/block/blk-ioc.c
+@@ -66,14 +66,14 @@ static void cfq_exit(struct io_context *ioc)
+ }
+
+ /* Called by the exitting task */
+-void exit_io_context(void)
++void exit_io_context(struct task_struct *task)
+ {
+ struct io_context *ioc;
+
+- task_lock(current);
+- ioc = current->io_context;
+- current->io_context = NULL;
+- task_unlock(current);
++ task_lock(task);
++ ioc = task->io_context;
++ task->io_context = NULL;
++ task_unlock(task);
+
+ if (atomic_dec_and_test(&ioc->nr_tasks)) {
+ if (ioc->aic && ioc->aic->exit)
+diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
+index d61b0b8..a632359 100644
+--- a/include/linux/iocontext.h
++++ b/include/linux/iocontext.h
+@@ -98,14 +98,15 @@ static inline struct io_context *ioc_task_link(struct io_context *ioc)
+ return NULL;
+ }
+
++struct task_struct;
+ #ifdef CONFIG_BLOCK
+ int put_io_context(struct io_context *ioc);
+-void exit_io_context(void);
++void exit_io_context(struct task_struct *task);
+ struct io_context *get_io_context(gfp_t gfp_flags, int node);
+ struct io_context *alloc_io_context(gfp_t gfp_flags, int node);
+ void copy_io_context(struct io_context **pdst, struct io_context **psrc);
+ #else
+-static inline void exit_io_context(void)
++static inline void exit_io_context(struct task_struct *task)
+ {
+ }
+
+diff --git a/kernel/exit.c b/kernel/exit.c
+index f7864ac..2544000 100644
+--- a/kernel/exit.c
++++ b/kernel/exit.c
+@@ -1004,7 +1004,7 @@ NORET_TYPE void do_exit(long code)
+ tsk->flags |= PF_EXITPIDONE;
+
+ if (tsk->io_context)
+- exit_io_context();
++ exit_io_context(tsk);
+
+ if (tsk->splice_pipe)
+ __free_pipe_info(tsk->splice_pipe);
+diff --git a/kernel/fork.c b/kernel/fork.c
+index 166b8c4..6073534 100644
+--- a/kernel/fork.c
++++ b/kernel/fork.c
+@@ -1310,7 +1310,8 @@ bad_fork_free_pid:
+ if (pid != &init_struct_pid)
+ free_pid(pid);
+ bad_fork_cleanup_io:
+- put_io_context(p->io_context);
++ if (p->io_context)
++ exit_io_context(p);
+ bad_fork_cleanup_namespaces:
+ exit_task_namespaces(p);
+ bad_fork_cleanup_mm:
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/all/fcaps-clear-the-same-personality-flags-as-suid-when-fcaps-are-used.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/fcaps-clear-the-same-personality-flags-as-suid-when-fcaps-are-used.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/all/fcaps-clear-the-same-personality-flags-as-suid-when-fcaps-are-used.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/fcaps-clear-the-same-personality-flags-as-suid-when-fcaps-are-used.patch)
@@ -0,0 +1,32 @@
+commit d52fc5dde171f030170a6cb78034d166b13c9445
+Author: Eric Paris <eparis at redhat.com>
+Date: Tue Apr 17 16:26:54 2012 -0400
+
+ fcaps: clear the same personality flags as suid when fcaps are used
+
+ If a process increases permissions using fcaps all of the dangerous
+ personality flags which are cleared for suid apps should also be cleared.
+ Thus programs given priviledge with fcaps will continue to have address space
+ randomization enabled even if the parent tried to disable it to make it
+ easier to attack.
+
+ Signed-off-by: Eric Paris <eparis at redhat.com>
+ Reviewed-by: Serge Hallyn <serge.hallyn at canonical.com>
+ Signed-off-by: James Morris <james.l.morris at oracle.com>
+
+diff --git a/security/commoncap.c b/security/commoncap.c
+index 0cf4b53..0ecf4ba 100644
+--- a/security/commoncap.c
++++ b/security/commoncap.c
+@@ -505,6 +505,11 @@ int cap_bprm_set_creds(struct linux_binprm *bprm)
+ }
+ skip:
+
++ /* if we have fs caps, clear dangerous personality flags */
++ if (!cap_issubset(new->cap_permitted, old->cap_permitted))
++ bprm->per_clear |= PER_CLEAR_ON_SETID;
++
++
+ /* Don't let someone trace a set[ug]id/setpcap binary with the revised
+ * credentials unless they have the appropriate permit
+ */
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch)
@@ -0,0 +1,449 @@
+commit 90481622d75715bfcb68501280a917dbfe516029
+Author: David Gibson <david at gibson.dropbear.id.au>
+Date: Wed Mar 21 16:34:12 2012 -0700
+
+ hugepages: fix use after free bug in "quota" handling
+
+ hugetlbfs_{get,put}_quota() are badly named. They don't interact with the
+ general quota handling code, and they don't much resemble its behaviour.
+ Rather than being about maintaining limits on on-disk block usage by
+ particular users, they are instead about maintaining limits on in-memory
+ page usage (including anonymous MAP_PRIVATE copied-on-write pages)
+ associated with a particular hugetlbfs filesystem instance.
+
+ Worse, they work by having callbacks to the hugetlbfs filesystem code from
+ the low-level page handling code, in particular from free_huge_page().
+ This is a layering violation of itself, but more importantly, if the
+ kernel does a get_user_pages() on hugepages (which can happen from KVM
+ amongst others), then the free_huge_page() can be delayed until after the
+ associated inode has already been freed. If an unmount occurs at the
+ wrong time, even the hugetlbfs superblock where the "quota" limits are
+ stored may have been freed.
+
+ Andrew Barry proposed a patch to fix this by having hugepages, instead of
+ storing a pointer to their address_space and reaching the superblock from
+ there, had the hugepages store pointers directly to the superblock,
+ bumping the reference count as appropriate to avoid it being freed.
+ Andrew Morton rejected that version, however, on the grounds that it made
+ the existing layering violation worse.
+
+ This is a reworked version of Andrew's patch, which removes the extra, and
+ some of the existing, layering violation. It works by introducing the
+ concept of a hugepage "subpool" at the lower hugepage mm layer - that is a
+ finite logical pool of hugepages to allocate from. hugetlbfs now creates
+ a subpool for each filesystem instance with a page limit set, and a
+ pointer to the subpool gets added to each allocated hugepage, instead of
+ the address_space pointer used now. The subpool has its own lifetime and
+ is only freed once all pages in it _and_ all other references to it (i.e.
+ superblocks) are gone.
+
+ subpools are optional - a NULL subpool pointer is taken by the code to
+ mean that no subpool limits are in effect.
+
+ Previous discussion of this bug found in: "Fix refcounting in hugetlbfs
+ quota handling.". See: https://lkml.org/lkml/2011/8/11/28 or
+ http://marc.info/?l=linux-mm&m=126928970510627&w=1
+
+ v2: Fixed a bug spotted by Hillf Danton, and removed the extra parameter to
+ alloc_huge_page() - since it already takes the vma, it is not necessary.
+
+ Signed-off-by: Andrew Barry <abarry at cray.com>
+ Signed-off-by: David Gibson <david at gibson.dropbear.id.au>
+ Cc: Hugh Dickins <hughd at google.com>
+ Cc: Mel Gorman <mgorman at suse.de>
+ Cc: Minchan Kim <minchan.kim at gmail.com>
+ Cc: Hillf Danton <dhillf at gmail.com>
+ Cc: Paul Mackerras <paulus at samba.org>
+ Signed-off-by: Andrew Morton <akpm at linux-foundation.org>
+ Signed-off-by: Linus Torvalds <torvalds at linux-foundation.org>
+ [dannf: backported to Debian's 2.6.32]
+
+diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
+index 87a1258..2179de8 100644
+--- a/fs/hugetlbfs/inode.c
++++ b/fs/hugetlbfs/inode.c
+@@ -601,9 +601,15 @@ static int hugetlbfs_statfs(struct dentry *dentry, struct kstatfs *buf)
+ spin_lock(&sbinfo->stat_lock);
+ /* If no limits set, just report 0 for max/free/used
+ * blocks, like simple_statfs() */
+- if (sbinfo->max_blocks >= 0) {
+- buf->f_blocks = sbinfo->max_blocks;
+- buf->f_bavail = buf->f_bfree = sbinfo->free_blocks;
++ if (sbinfo->spool) {
++ long free_pages;
++
++ spin_lock(&sbinfo->spool->lock);
++ buf->f_blocks = sbinfo->spool->max_hpages;
++ free_pages = sbinfo->spool->max_hpages
++ - sbinfo->spool->used_hpages;
++ buf->f_bavail = buf->f_bfree = free_pages;
++ spin_unlock(&sbinfo->spool->lock);
+ buf->f_files = sbinfo->max_inodes;
+ buf->f_ffree = sbinfo->free_inodes;
+ }
+@@ -619,6 +625,10 @@ static void hugetlbfs_put_super(struct super_block *sb)
+
+ if (sbi) {
+ sb->s_fs_info = NULL;
++
++ if (sbi->spool)
++ hugepage_put_subpool(sbi->spool);
++
+ kfree(sbi);
+ }
+ }
+@@ -842,10 +852,14 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
+ sb->s_fs_info = sbinfo;
+ sbinfo->hstate = config.hstate;
+ spin_lock_init(&sbinfo->stat_lock);
+- sbinfo->max_blocks = config.nr_blocks;
+- sbinfo->free_blocks = config.nr_blocks;
+ sbinfo->max_inodes = config.nr_inodes;
+ sbinfo->free_inodes = config.nr_inodes;
++ sbinfo->spool = NULL;
++ if (config.nr_blocks != -1) {
++ sbinfo->spool = hugepage_new_subpool(config.nr_blocks);
++ if (!sbinfo->spool)
++ goto out_free;
++ }
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+ sb->s_blocksize = huge_page_size(config.hstate);
+ sb->s_blocksize_bits = huge_page_shift(config.hstate);
+@@ -865,38 +879,12 @@ hugetlbfs_fill_super(struct super_block *sb, void *data, int silent)
+ sb->s_root = root;
+ return 0;
+ out_free:
++ if (sbinfo->spool)
++ kfree(sbinfo->spool);
+ kfree(sbinfo);
+ return -ENOMEM;
+ }
+
+-int hugetlb_get_quota(struct address_space *mapping, long delta)
+-{
+- int ret = 0;
+- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
+-
+- if (sbinfo->free_blocks > -1) {
+- spin_lock(&sbinfo->stat_lock);
+- if (sbinfo->free_blocks - delta >= 0)
+- sbinfo->free_blocks -= delta;
+- else
+- ret = -ENOMEM;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+-
+- return ret;
+-}
+-
+-void hugetlb_put_quota(struct address_space *mapping, long delta)
+-{
+- struct hugetlbfs_sb_info *sbinfo = HUGETLBFS_SB(mapping->host->i_sb);
+-
+- if (sbinfo->free_blocks > -1) {
+- spin_lock(&sbinfo->stat_lock);
+- sbinfo->free_blocks += delta;
+- spin_unlock(&sbinfo->stat_lock);
+- }
+-}
+-
+ static int hugetlbfs_get_sb(struct file_system_type *fs_type,
+ int flags, const char *dev_name, void *data, struct vfsmount *mnt)
+ {
+diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
+index 41a59af..6b3feef 100644
+--- a/include/linux/hugetlb.h
++++ b/include/linux/hugetlb.h
+@@ -12,6 +12,15 @@ struct user_struct;
+ #include <linux/shm.h>
+ #include <asm/tlbflush.h>
+
++struct hugepage_subpool {
++ spinlock_t lock;
++ long count;
++ long max_hpages, used_hpages;
++};
++
++struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);
++void hugepage_put_subpool(struct hugepage_subpool *spool);
++
+ int PageHuge(struct page *page);
+
+ static inline int is_vm_hugetlb_page(struct vm_area_struct *vma)
+@@ -138,12 +147,11 @@ struct hugetlbfs_config {
+ };
+
+ struct hugetlbfs_sb_info {
+- long max_blocks; /* blocks allowed */
+- long free_blocks; /* blocks free */
+ long max_inodes; /* inodes allowed */
+ long free_inodes; /* inodes free */
+ spinlock_t stat_lock;
+ struct hstate *hstate;
++ struct hugepage_subpool *spool;
+ };
+
+
+@@ -166,8 +174,6 @@ extern const struct file_operations hugetlbfs_file_operations;
+ extern const struct vm_operations_struct hugetlb_vm_ops;
+ struct file *hugetlb_file_setup(const char *name, size_t size, int acct,
+ struct user_struct **user, int creat_flags);
+-int hugetlb_get_quota(struct address_space *mapping, long delta);
+-void hugetlb_put_quota(struct address_space *mapping, long delta);
+
+ static inline int is_file_hugepages(struct file *file)
+ {
+diff --git a/mm/hugetlb.c b/mm/hugetlb.c
+index 5e1e508..20f9240 100644
+--- a/mm/hugetlb.c
++++ b/mm/hugetlb.c
+@@ -49,6 +49,84 @@ static unsigned long __initdata default_hstate_size;
+ */
+ static DEFINE_SPINLOCK(hugetlb_lock);
+
++static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
++{
++ bool free = (spool->count == 0) && (spool->used_hpages == 0);
++
++ spin_unlock(&spool->lock);
++
++ /* If no pages are used, and no other handles to the subpool
++ * remain, free the subpool the subpool remain */
++ if (free)
++ kfree(spool);
++}
++
++struct hugepage_subpool *hugepage_new_subpool(long nr_blocks)
++{
++ struct hugepage_subpool *spool;
++
++ spool = kmalloc(sizeof(*spool), GFP_KERNEL);
++ if (!spool)
++ return NULL;
++
++ spin_lock_init(&spool->lock);
++ spool->count = 1;
++ spool->max_hpages = nr_blocks;
++ spool->used_hpages = 0;
++
++ return spool;
++}
++
++void hugepage_put_subpool(struct hugepage_subpool *spool)
++{
++ spin_lock(&spool->lock);
++ BUG_ON(!spool->count);
++ spool->count--;
++ unlock_or_release_subpool(spool);
++}
++
++static int hugepage_subpool_get_pages(struct hugepage_subpool *spool,
++ long delta)
++{
++ int ret = 0;
++
++ if (!spool)
++ return 0;
++
++ spin_lock(&spool->lock);
++ if ((spool->used_hpages + delta) <= spool->max_hpages) {
++ spool->used_hpages += delta;
++ } else {
++ ret = -ENOMEM;
++ }
++ spin_unlock(&spool->lock);
++
++ return ret;
++}
++
++static void hugepage_subpool_put_pages(struct hugepage_subpool *spool,
++ long delta)
++{
++ if (!spool)
++ return;
++
++ spin_lock(&spool->lock);
++ spool->used_hpages -= delta;
++ /* If hugetlbfs_put_super couldn't free spool due to
++ * an outstanding quota reference, free it now. */
++ unlock_or_release_subpool(spool);
++}
++
++static inline struct hugepage_subpool *subpool_inode(struct inode *inode)
++{
++ return HUGETLBFS_SB(inode->i_sb)->spool;
++}
++
++static inline struct hugepage_subpool *subpool_vma(struct vm_area_struct *vma)
++{
++ return subpool_inode(vma->vm_file->f_dentry->d_inode);
++}
++
+ /*
+ * Region tracking -- allows tracking of reservations and instantiated pages
+ * across the pages in a mapping.
+@@ -541,9 +619,9 @@ static void free_huge_page(struct page *page)
+ */
+ struct hstate *h = page_hstate(page);
+ int nid = page_to_nid(page);
+- struct address_space *mapping;
++ struct hugepage_subpool *spool =
++ (struct hugepage_subpool *)page_private(page);
+
+- mapping = (struct address_space *) page_private(page);
+ set_page_private(page, 0);
+ page->mapping = NULL;
+ BUG_ON(page_count(page));
+@@ -558,8 +636,7 @@ static void free_huge_page(struct page *page)
+ enqueue_huge_page(h, page);
+ }
+ spin_unlock(&hugetlb_lock);
+- if (mapping)
+- hugetlb_put_quota(mapping, 1);
++ hugepage_subpool_put_pages(spool, 1);
+ }
+
+ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
+@@ -927,11 +1004,12 @@ static void return_unused_surplus_pages(struct hstate *h,
+ /*
+ * Determine if the huge page at addr within the vma has an associated
+ * reservation. Where it does not we will need to logically increase
+- * reservation and actually increase quota before an allocation can occur.
+- * Where any new reservation would be required the reservation change is
+- * prepared, but not committed. Once the page has been quota'd allocated
+- * an instantiated the change should be committed via vma_commit_reservation.
+- * No action is required on failure.
++ * reservation and actually increase subpool usage before an allocation
++ * can occur. Where any new reservation would be required the
++ * reservation change is prepared, but not committed. Once the page
++ * has been allocated from the subpool and instantiated the change should
++ * be committed via vma_commit_reservation. No action is required on
++ * failure.
+ */
+ static long vma_needs_reservation(struct hstate *h,
+ struct vm_area_struct *vma, unsigned long addr)
+@@ -980,24 +1058,24 @@ static void vma_commit_reservation(struct hstate *h,
+ static struct page *alloc_huge_page(struct vm_area_struct *vma,
+ unsigned long addr, int avoid_reserve)
+ {
++ struct hugepage_subpool *spool = subpool_vma(vma);
+ struct hstate *h = hstate_vma(vma);
+ struct page *page;
+- struct address_space *mapping = vma->vm_file->f_mapping;
+- struct inode *inode = mapping->host;
+ long chg;
+
+ /*
+- * Processes that did not create the mapping will have no reserves and
+- * will not have accounted against quota. Check that the quota can be
+- * made before satisfying the allocation
+- * MAP_NORESERVE mappings may also need pages and quota allocated
+- * if no reserve mapping overlaps.
++ * Processes that did not create the mapping will have no
++ * reserves and will not have accounted against subpool
++ * limit. Check that the subpool limit can be made before
++ * satisfying the allocation MAP_NORESERVE mappings may also
++ * need pages and subpool limit allocated allocated if no reserve
++ * mapping overlaps.
+ */
+ chg = vma_needs_reservation(h, vma, addr);
+ if (chg < 0)
+ return ERR_PTR(-VM_FAULT_OOM);
+ if (chg)
+- if (hugetlb_get_quota(inode->i_mapping, chg))
++ if (hugepage_subpool_get_pages(spool, chg))
+ return ERR_PTR(-VM_FAULT_SIGBUS);
+
+ spin_lock(&hugetlb_lock);
+@@ -1007,13 +1085,13 @@ static struct page *alloc_huge_page(struct vm_area_struct *vma,
+ if (!page) {
+ page = alloc_buddy_huge_page(h, vma, addr);
+ if (!page) {
+- hugetlb_put_quota(inode->i_mapping, chg);
++ hugepage_subpool_put_pages(spool, chg);
+ return ERR_PTR(-VM_FAULT_SIGBUS);
+ }
+ }
+
+ set_page_refcounted(page);
+- set_page_private(page, (unsigned long) mapping);
++ set_page_private(page, (unsigned long)spool);
+
+ vma_commit_reservation(h, vma, addr);
+
+@@ -1698,6 +1776,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
+ {
+ struct hstate *h = hstate_vma(vma);
+ struct resv_map *reservations = vma_resv_map(vma);
++ struct hugepage_subpool *spool = subpool_vma(vma);
+ unsigned long reserve;
+ unsigned long start;
+ unsigned long end;
+@@ -1713,7 +1792,7 @@ static void hugetlb_vm_op_close(struct vm_area_struct *vma)
+
+ if (reserve) {
+ hugetlb_acct_memory(h, -reserve);
+- hugetlb_put_quota(vma->vm_file->f_mapping, reserve);
++ hugepage_subpool_put_pages(spool, reserve);
+ }
+ }
+ }
+@@ -1910,7 +1989,7 @@ static int unmap_ref_private(struct mm_struct *mm, struct vm_area_struct *vma,
+ address = address & huge_page_mask(h);
+ pgoff = ((address - vma->vm_start) >> PAGE_SHIFT)
+ + (vma->vm_pgoff >> PAGE_SHIFT);
+- mapping = (struct address_space *)page_private(page);
++ mapping = vma->vm_file->f_dentry->d_inode->i_mapping;
+
+ vma_prio_tree_foreach(iter_vma, &iter, &mapping->i_mmap, pgoff, pgoff) {
+ /* Do not unmap the current VMA */
+@@ -2364,11 +2443,12 @@ int hugetlb_reserve_pages(struct inode *inode,
+ {
+ long ret, chg;
+ struct hstate *h = hstate_inode(inode);
++ struct hugepage_subpool *spool = subpool_inode(inode);
+
+ /*
+ * Only apply hugepage reservation if asked. At fault time, an
+ * attempt will be made for VM_NORESERVE to allocate a page
+- * and filesystem quota without using reserves
++ * without using reserves
+ */
+ if (acctflag & VM_NORESERVE)
+ return 0;
+@@ -2395,17 +2475,17 @@ int hugetlb_reserve_pages(struct inode *inode,
+ if (chg < 0)
+ return chg;
+
+- /* There must be enough filesystem quota for the mapping */
+- if (hugetlb_get_quota(inode->i_mapping, chg))
++ /* There must be enough pages in the subpool for the mapping */
++ if (hugepage_subpool_get_pages(spool, chg))
+ return -ENOSPC;
+
+ /*
+ * Check enough hugepages are available for the reservation.
+- * Hand back the quota if there are not
++ * Hand the pages back to the subpool if there are not
+ */
+ ret = hugetlb_acct_memory(h, chg);
+ if (ret < 0) {
+- hugetlb_put_quota(inode->i_mapping, chg);
++ hugepage_subpool_put_pages(spool, chg);
+ return ret;
+ }
+
+@@ -2429,11 +2509,12 @@ void hugetlb_unreserve_pages(struct inode *inode, long offset, long freed)
+ {
+ struct hstate *h = hstate_inode(inode);
+ long chg = region_truncate(&inode->i_mapping->private_list, offset);
++ struct hugepage_subpool *spool = subpool_inode(inode);
+
+ spin_lock(&inode->i_lock);
+ inode->i_blocks -= (blocks_per_huge_page(h) * freed);
+ spin_unlock(&inode->i_lock);
+
+- hugetlb_put_quota(inode->i_mapping, (chg - freed));
++ hugepage_subpool_put_pages(spool, (chg - freed));
+ hugetlb_acct_memory(h, -(chg - freed));
+ }
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/all/jbd2-clear-BH_Delay-BH_Unwritten-in-journal_unmap_buffer.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/jbd2-clear-BH_Delay-BH_Unwritten-in-journal_unmap_buffer.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/all/jbd2-clear-BH_Delay-BH_Unwritten-in-journal_unmap_buffer.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/jbd2-clear-BH_Delay-BH_Unwritten-in-journal_unmap_buffer.patch)
@@ -0,0 +1,42 @@
+commit 15291164b22a357cb211b618adfef4fa82fc0de3
+Author: Eric Sandeen <sandeen at redhat.com>
+Date: Mon Feb 20 17:53:01 2012 -0500
+
+ jbd2: clear BH_Delay & BH_Unwritten in journal_unmap_buffer
+
+ journal_unmap_buffer()'s zap_buffer: code clears a lot of buffer head
+ state ala discard_buffer(), but does not touch _Delay or _Unwritten as
+ discard_buffer() does.
+
+ This can be problematic in some areas of the ext4 code which assume
+ that if they have found a buffer marked unwritten or delay, then it's
+ a live one. Perhaps those spots should check whether it is mapped
+ as well, but if jbd2 is going to tear down a buffer, let's really
+ tear it down completely.
+
+ Without this I get some fsx failures on sub-page-block filesystems
+ up until v3.2, at which point 4e96b2dbbf1d7e81f22047a50f862555a6cb87cb
+ and 189e868fa8fdca702eb9db9d8afc46b5cb9144c9 make the failures go
+ away, because buried within that large change is some more flag
+ clearing. I still think it's worth doing in jbd2, since
+ ->invalidatepage leads here directly, and it's the right place
+ to clear away these flags.
+
+ Signed-off-by: Eric Sandeen <sandeen at redhat.com>
+ Signed-off-by: "Theodore Ts'o" <tytso at mit.edu>
+ Cc: stable at vger.kernel.org
+ [dannf: backported to Debian's 2.6.32]
+
+diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
+index a051270..5c156ad 100644
+--- a/fs/jbd2/transaction.c
++++ b/fs/jbd2/transaction.c
+@@ -1822,6 +1822,8 @@ zap_buffer_unlocked:
+ clear_buffer_mapped(bh);
+ clear_buffer_req(bh);
+ clear_buffer_new(bh);
++ clear_buffer_delay(bh);
++ clear_buffer_unwritten(bh);
+ bh->b_bdev = NULL;
+ return may_free;
+ }
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/all/security-fix-compile-error-in-commoncap.c.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/security-fix-compile-error-in-commoncap.c.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/all/security-fix-compile-error-in-commoncap.c.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/all/security-fix-compile-error-in-commoncap.c.patch)
@@ -0,0 +1,29 @@
+commit 51b79bee627d526199b2f6a6bef8ee0c0739b6d1
+Author: Jonghwan Choi <jhbird.choi at samsung.com>
+Date: Wed Apr 18 17:23:04 2012 -0400
+
+ security: fix compile error in commoncap.c
+
+ Add missing "personality.h"
+ security/commoncap.c: In function 'cap_bprm_set_creds':
+ security/commoncap.c:510: error: 'PER_CLEAR_ON_SETID' undeclared (first use in this function)
+ security/commoncap.c:510: error: (Each undeclared identifier is reported only once
+ security/commoncap.c:510: error: for each function it appears in.)
+
+ Signed-off-by: Jonghwan Choi <jhbird.choi at samsung.com>
+ Acked-by: Serge Hallyn <serge.hallyn at canonical.com>
+ Signed-off-by: James Morris <james.l.morris at oracle.com>
+ [dannf: adjusted to apply to Debian's 2.6.32]
+
+diff --git a/security/commoncap.c b/security/commoncap.c
+index ddc07e0..625cf12 100644
+--- a/security/commoncap.c
++++ b/security/commoncap.c
+@@ -27,6 +27,7 @@
+ #include <linux/sched.h>
+ #include <linux/prctl.h>
+ #include <linux/securebits.h>
++#include <linux/personality.h>
+
+ /*
+ * If a non-root user executes a setuid-root binary in
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/x86/KVM-Ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/x86/KVM-Ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/x86/KVM-Ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/x86/KVM-Ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch)
@@ -0,0 +1,97 @@
+commit 3e515705a1f46beb1c942bb8043c16f8ac7b1e9e
+Author: Avi Kivity <avi at redhat.com>
+Date: Mon Mar 5 14:23:29 2012 +0200
+
+ KVM: Ensure all vcpus are consistent with in-kernel irqchip settings
+
+ If some vcpus are created before KVM_CREATE_IRQCHIP, then
+ irqchip_in_kernel() and vcpu->arch.apic will be inconsistent, leading
+ to potential NULL pointer dereferences.
+
+ Fix by:
+ - ensuring that no vcpus are installed when KVM_CREATE_IRQCHIP is called
+ - ensuring that a vcpu has an apic if it is installed after KVM_CREATE_IRQCHIP
+
+ This is somewhat long winded because vcpu->arch.apic is created without
+ kvm->lock held.
+
+ Based on earlier patch by Michael Ellerman.
+
+ Signed-off-by: Michael Ellerman <michael at ellerman.id.au>
+ Signed-off-by: Avi Kivity <avi at redhat.com>
+ [dannf: backported to Debian's 2.6.32]
+
+diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
+index 2eb6365..416122b 100644
+--- a/arch/ia64/kvm/kvm-ia64.c
++++ b/arch/ia64/kvm/kvm-ia64.c
+@@ -1185,6 +1185,11 @@ out:
+
+ #define PALE_RESET_ENTRY 0x80000000ffffffb0UL
+
++bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
++{
++ return irqchip_in_kernel(vcpu->kcm) == (vcpu->arch.apic != NULL);
++}
++
+ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+ {
+ struct kvm_vcpu *v;
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 39a11bd..2ebf763 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2396,6 +2396,9 @@ long kvm_arch_vm_ioctl(struct file *filp,
+ r = -EEXIST;
+ if (kvm->arch.vpic)
+ goto create_irqchip_unlock;
++ r = -EINVAL;
++ if (atomic_read(&kvm->online_vcpus))
++ goto create_irqchip_unlock;
+ r = -ENOMEM;
+ vpic = kvm_create_pic(kvm);
+ if (vpic) {
+@@ -5154,6 +5157,11 @@ void kvm_arch_check_processor_compat(void *rtn)
+ kvm_x86_ops->check_processor_compatibility(rtn);
+ }
+
++bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu)
++{
++ return irqchip_in_kernel(vcpu->kvm) == (vcpu->arch.apic != NULL);
++}
++
+ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
+ {
+ struct page *page;
+diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
+index c728a50..8bfed57 100644
+--- a/include/linux/kvm_host.h
++++ b/include/linux/kvm_host.h
+@@ -556,5 +556,12 @@ static inline bool kvm_vcpu_is_bsp(struct kvm_vcpu *vcpu)
+ {
+ return vcpu->kvm->bsp_vcpu_id == vcpu->vcpu_id;
+ }
++
++bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu);
++
++#else
++
++static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
++
+ #endif
+ #endif
+diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
+index 311ec18..82b6fdc 100644
+--- a/virt/kvm/kvm_main.c
++++ b/virt/kvm/kvm_main.c
+@@ -1857,6 +1857,10 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id)
+ return r;
+
+ mutex_lock(&kvm->lock);
++ if (!kvm_vcpu_compatible(vcpu)) {
++ r = -EINVAL;
++ goto vcpu_destroy;
++ }
+ if (atomic_read(&kvm->online_vcpus) == KVM_MAX_VCPUS) {
+ r = -EINVAL;
+ goto vcpu_destroy;
Copied: dists/squeeze/linux-2.6/debian/patches/bugfix/x86/KVM-disallow-multiple-KVM_CREATE_IRQCHIP.patch (from r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/x86/KVM-disallow-multiple-KVM_CREATE_IRQCHIP.patch)
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ dists/squeeze/linux-2.6/debian/patches/bugfix/x86/KVM-disallow-multiple-KVM_CREATE_IRQCHIP.patch Fri May 4 06:33:24 2012 (r18979, copy of r18978, dists/squeeze-security/linux-2.6/debian/patches/bugfix/x86/KVM-disallow-multiple-KVM_CREATE_IRQCHIP.patch)
@@ -0,0 +1,81 @@
+commit 3ddea128ad75bd33e88780fe44f44c3717369b98
+Author: Marcelo Tosatti <mtosatti at redhat.com>
+Date: Thu Oct 29 13:44:15 2009 -0200
+
+ KVM: x86: disallow multiple KVM_CREATE_IRQCHIP
+
+ Otherwise kvm will leak memory on multiple KVM_CREATE_IRQCHIP.
+ Also serialize multiple accesses with kvm->lock.
+
+ Signed-off-by: Marcelo Tosatti <mtosatti at redhat.com>
+ Signed-off-by: Avi Kivity <avi at redhat.com>
+
+diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h
+index c025a23..be399e2 100644
+--- a/arch/x86/kvm/irq.h
++++ b/arch/x86/kvm/irq.h
+@@ -86,7 +86,11 @@ static inline struct kvm_pic *pic_irqchip(struct kvm *kvm)
+
+ static inline int irqchip_in_kernel(struct kvm *kvm)
+ {
+- return pic_irqchip(kvm) != NULL;
++ int ret;
++
++ ret = (pic_irqchip(kvm) != NULL);
++ smp_rmb();
++ return ret;
+ }
+
+ void kvm_pic_reset(struct kvm_kpic_state *s);
+diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
+index 719f31e..97f6f95 100644
+--- a/arch/x86/kvm/x86.c
++++ b/arch/x86/kvm/x86.c
+@@ -2362,25 +2362,39 @@ long kvm_arch_vm_ioctl(struct file *filp,
+ if (r)
+ goto out;
+ break;
+- case KVM_CREATE_IRQCHIP:
++ case KVM_CREATE_IRQCHIP: {
++ struct kvm_pic *vpic;
++
++ mutex_lock(&kvm->lock);
++ r = -EEXIST;
++ if (kvm->arch.vpic)
++ goto create_irqchip_unlock;
+ r = -ENOMEM;
+- kvm->arch.vpic = kvm_create_pic(kvm);
+- if (kvm->arch.vpic) {
++ vpic = kvm_create_pic(kvm);
++ if (vpic) {
+ r = kvm_ioapic_init(kvm);
+ if (r) {
+- kfree(kvm->arch.vpic);
+- kvm->arch.vpic = NULL;
+- goto out;
++ kfree(vpic);
++ goto create_irqchip_unlock;
+ }
+ } else
+- goto out;
++ goto create_irqchip_unlock;
++ smp_wmb();
++ kvm->arch.vpic = vpic;
++ smp_wmb();
+ r = kvm_setup_default_irq_routing(kvm);
+ if (r) {
++ mutex_lock(&kvm->irq_lock);
+ kfree(kvm->arch.vpic);
+ kfree(kvm->arch.vioapic);
+- goto out;
++ kvm->arch.vpic = NULL;
++ kvm->arch.vioapic = NULL;
++ mutex_unlock(&kvm->irq_lock);
+ }
++ create_irqchip_unlock:
++ mutex_unlock(&kvm->lock);
+ break;
++ }
+ case KVM_CREATE_PIT:
+ u.pit_config.flags = KVM_PIT_SPEAKER_DUMMY;
+ goto create_pit;
Modified: dists/squeeze/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.29.8.patch
==============================================================================
--- dists/squeeze/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.29.8.patch Fri May 4 06:25:21 2012 (r18978)
+++ dists/squeeze/linux-2.6/debian/patches/features/all/vserver/vs2.3.0.36.29.8.patch Fri May 4 06:33:24 2012 (r18979)
@@ -28257,10 +28257,10 @@
--- a/security/commoncap.c 2009-12-03 20:03:02.000000000 +0100
+++ a/security/commoncap.c 2011-06-10 13:03:02.000000000 +0200
-@@ -27,6 +27,7 @@
- #include <linux/sched.h>
+@@ -28,6 +28,7 @@
#include <linux/prctl.h>
#include <linux/securebits.h>
+ #include <linux/personality.h>
+#include <linux/vs_context.h>
/*
Modified: dists/squeeze/linux-2.6/debian/patches/series/44
==============================================================================
--- dists/squeeze/linux-2.6/debian/patches/series/44 Fri May 4 06:25:21 2012 (r18978)
+++ dists/squeeze/linux-2.6/debian/patches/series/44 Fri May 4 06:33:24 2012 (r18979)
@@ -1,2 +1,10 @@
+ bugfix/x86/x86-mm-Fix-pgd_lock-deadlock.patch
+ debian/revert-autofs-work-around-unhappy-compat-problem-on-x86-64.patch
++ bugfix/all/block-Fix-io_context-leak-after-clone-with-CLONE_IO.patch
++ bugfix/all/block-Fix-io_context-leak-after-failure-of-clone-with-CLONE_IO.patch
++ bugfix/x86/KVM-disallow-multiple-KVM_CREATE_IRQCHIP.patch
++ bugfix/x86/KVM-Ensure-all-vcpus-are-consistent-with-in-kernel-irqchip-settings.patch
++ bugfix/all/hugepages-fix-use-after-free-bug-in-quota-handling.patch
++ bugfix/all/fcaps-clear-the-same-personality-flags-as-suid-when-fcaps-are-used.patch
++ bugfix/all/security-fix-compile-error-in-commoncap.c.patch
++ bugfix/all/jbd2-clear-BH_Delay-BH_Unwritten-in-journal_unmap_buffer.patch
More information about the Kernel-svn-changes
mailing list