[kernel] r15569 - in dists/sid/linux-2.6/debian: . patches/features/all/openvz

Maximilian Attems maks at alioth.debian.org
Tue Apr 27 14:04:44 UTC 2010


Author: maks
Date: Tue Apr 27 14:04:39 2010
New Revision: 15569

Log:
update openvz patch

Modified:
   dists/sid/linux-2.6/debian/changelog
   dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch

Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog	Tue Apr 27 11:26:09 2010	(r15568)
+++ dists/sid/linux-2.6/debian/changelog	Tue Apr 27 14:04:39 2010	(r15569)
@@ -27,7 +27,7 @@
 
   [ maximilian attems]
   * [ia64] Built in fbcon.
-  * Update openvz patch to 6b5607eeec54. (closes: #574598)
+  * Update openvz patch to c05f95fcb04e. (closes: #574598)
   * Reenable nouveau autoloading.
   * reiserfs: Fix permissions on .reiserfs_priv. CVE-2010-1146
   * libata,ata_piix: detect and clear spurious IRQs.

Modified: dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch	Tue Apr 27 11:26:09 2010	(r15568)
+++ dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch	Tue Apr 27 14:04:39 2010	(r15569)
@@ -1,3 +1,1794 @@
+commit c05f95fcb04e896c898218d12a8f37c43d2f9cc6
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date:   Tue Apr 27 15:10:13 2010 +0400
+
+    OpenVZ kernel 2.6.32-avdeyev released
+    
+    Named after Sergei Vasilyevich Avdeyev - a Russian cosmonaut.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b4a419d9abd11e3efd02e9fccd4a14180866cf99
+Merge: 455792e 5bf3475
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date:   Tue Apr 27 14:01:27 2010 +0400
+
+    Merged linux-2.6.32.12
+    
+    Conflicts:
+    
+    	Makefile
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 455792e7712fac15bba7ca187c244f30c9d0e825
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Thu Apr 22 19:08:13 2010 +0400
+
+    ipv6: fix sysctl unregistering order
+    
+    call addrconf_ifdown for loopback at last last ipv6 addr delete with how=0
+    to fix sysctl tables undergister ordering: all other interfaces attach their
+    sysctl paths to lo's, so unregister lo sysctl tables only at namespace destroy.
+    
+    https://bugzilla.sw.ru/show_bug.cgi?id=473430
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fa86dba2b6213e770f102d1e688f6527d759aecf
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Mon Apr 5 15:43:18 2010 +0400
+
+    ve: fix ve task state percpu counters
+    
+    Counters overlap detection for ve tasks in running/uninterraprible/iowait state
+    was broken due to type mismatch:
+    nr_{running/unin..e/iowait}_ve() uses _long_ for summing _int_ percpu counters.
+    
+    As result, it broke ve loadavg calculation after first int overlap.
+    
+    This patch expand all this percpu counters to unsigned long.
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=1396
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b484e22d951a02bd7ce25aaac396742766142790
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Mon Apr 5 15:41:30 2010 +0400
+
+    check flags on parsed structure
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=1464
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d8a86ef5a6c747ddb2896696269c0feef5d6fe1e
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Mon Apr 5 15:38:29 2010 +0400
+
+    CPT: check signal curr_target at restore
+    
+    set signal curr_target to current if right task was not found.
+    fix oops after broken restore.
+    
+    "curr_target" controls round robin signal target balance over process
+    threads, there no reasons to care about migration accuracy.
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=1467
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 61845b781db7d86180977270c73f6ea3885485f3
+Author: Pavel Emelyanov <xemul at openvzorg>
+Date:   Mon Apr 5 15:35:58 2010 +0400
+
+    cpt: Don't mind the tsk->splice_pipe cache at cpt time
+    
+    This field is just a cache for sendfile systemcall. It can be dropped
+    safely during migration - the first sendfile after restore will create
+    it back.
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=881
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fcd86ff706b309999e526dc4a37e9de88ec051fb
+Author: Peter Volkov <pva at gentoo.org>
+Date:   Sun Mar 28 18:04:44 2010 +0400
+
+    Fix /proc/kmsg permissions with capabilities active
+    
+    Whenever application sets cap_sys_admin=ep it is unable to read
+    /proc/kmsg with EPERM. This patch makes /proc/kmsg readable on HN.
+    http://bugzilla.openvz.org/show_bug.cgi?id=1360
+    
+    Signed-off-by: Peter Volkov <pva at gentoo.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 8c6af363b89ebf94d3982d786dd21c64fb41528f
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Fri Mar 12 15:58:35 2010 +0300
+
+    quota: fix compilation 32-bit compat quota, remove size checks.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 26aeb82fc7ef70e83a4e0640fcb77c7b6f31d81b
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Fri Mar 12 15:58:34 2010 +0300
+
+    x86: fix compilation for 32-bit kernel
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 92875e3c49a15885ffbf40cbb0f2bd82cf423e43
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Mon Mar 1 13:03:59 2010 +0300
+
+    CPT: update image version to CPT_VERSION_27_3
+    
+    sync cpt minor version with rhel5 branch
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f7dd75ba9debbd60b12eec93128a5742d6876d28
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Mon Mar 1 12:56:27 2010 +0300
+
+    CPT: ignore deleted linked chr blk fifo nodes
+    
+    Ignore unlinked but referenced pipes, character and block device nodes.
+    Restore process will create it itself.
+    
+    Bug #455855
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d7c68b191825cbbf6c7a40a75d38d09330b3abca
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Mon Mar 1 12:55:36 2010 +0300
+
+    CPT: Dump fake hardlinks on inotify watch's inodes
+    
+    When a watch is attached to unlinked and closed file it
+    will not be restored, since the inode will not be in image.
+    
+    To fix this the proposal is to create a fake link on the
+    inode in a temp dir and dump it.
+    
+    Bug #454944
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7cf74bdd35d9559c671362cf8ce7016bb51aedaa
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Mon Mar 1 12:52:42 2010 +0300
+
+    CPT: Open hardlinked files only if is set 'hardlinked_on'
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 52c2eb6da3f09f44d652eb7156a793b5f50e8e08
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Mon Mar 1 12:52:09 2010 +0300
+
+    CPT: Add ioctl CPT_HARDLNK_ON for rst
+    
+    vzctl have to call ioctl CPT_HARDLNK_ON to enable open hardlinked
+    files by kernel during restore.
+    
+    This protection is needed to prevent mix new kernel + old vzctl (which
+    doesn't do cleaning). In other words, prevent creating/open files
+    which will not be removed, and therefore this issue can lead to
+    security problem.
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 72dfa44429c57c924ec4ac4d25d9ef6a343ddade
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Mon Mar 1 12:51:39 2010 +0300
+
+    CPT: Add CPT_DENTRY_HARDLINKED flag to cpt_file_image
+    
+    This flag tells that file was hardlinked.
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 80d2ce353aa41820eca28c15abd6c1421d537736
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Mon Mar 1 12:49:48 2010 +0300
+
+    CPT: Create hard links to "deleted but referenced" during checkpoint
+    
+    For "deleted but referenced" files, kernel creates hard link in
+    directory (that was set via CPT_LINKDIR_ADD) in format:
+    
+               .cpt_hardlink.xxxxxxxx
+    
+        x - digit, from 0 to 9
+    
+    Note - this policy is used only when no other ways of dumping unlined
+    file helped.
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c24ab545f53ae07a2bfb3a6df100b56d49b57281
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Mon Mar 1 12:47:30 2010 +0300
+
+    CPT: Add ioctl CPT_LINKDIR_ADD for cpt
+    
+    vzctl have to call ioctl CPT_LINKDIR_ADD to tell kernel where
+    create hardlinked files during checkpoint. Without this ioctl
+    kernel assumes that creating hardlinked files is off.
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d4ef97ff64464126b459ef8d9a0adbb95fb9dc09
+Author: Konstantin Khorenko <khorenko at openvz.org>
+Date:   Sat Feb 27 16:58:11 2010 +0300
+
+    CPT: stop the migration if shm restoration failed
+    
+    Bug #268163
+    
+    Signed-off-by: Konstantin Khorenko <khorenko at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 089c01a6503ec6fc1ce66841d049bb65aa3c212c
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date:   Sat Feb 27 16:58:11 2010 +0300
+
+    CPT: restart local_kernel_thread in case of -ERESTARTNOINTR
+    
+    This is essential in case of migration to SLM node.
+    
+    We can bump into situation when SLM refuses to fork during the
+    undumping process because it thinks that subgroup's resources
+    are to be redistributed. When this happens fork is delayed with
+    the -ERESTARTNOINTR error and the undumping process fails.
+    
+    As Den (den@) noticed userspace is not intented to see the
+    -ERESTARTNOINTR error so we should handle this situation in the
+    kernel. According to the logic in the do_signal() function the
+    interrupted system call is immediately restarted in case of the
+    -ERESTARTNOINTR error.
+    
+    We borrow this policy and apply it to the local_kernel_thread()
+    cpt helper function.
+    
+    [ xemul: this is quite a rare case, so simple cond_resched()
+             is OK here all the more so the redistribution should
+             happen in a timer ]
+    
+    Bug #116787
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 8551a850a459df659d7b14a66dfc8cf6da5065d6
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:11 2010 +0300
+
+    CPT: save/restore only classic task flags
+    
+    Task flags were restored as they were saved in image. That is not correct as
+    flags are differs in 2.6.9, 2.6.16 and 2.6.18 kernels.
+    Actually we just need to save/restore only classic flags (PF_EXITING, PF_DEAD,
+    PF_FORKNOEXEC, PF_SUPERPRIV, PF_DUMPCORE and PF_SIGNALED).
+    
+    The problems can occure because during migration from 2.6.9 to 2.6.18 kernel
+    flag PF_USED_MATH was not restored on tsk->flags correctly.
+    
+    In 2.6.9 kernel there was field tsk->used_math for this purpose, in 2.6.18
+    kernel it is transformed into one of the tsk->flags.
+    
+    And it was a bug, that after restore of fpu state and PF_USED_MATH flag, it
+    was cleared by "tsk->flags = ti->cpt_flags & ~PF_FROZEN", as old cpt_flags do
+    not contain PF_USED_MATH flag.
+    
+    Bugs #115977 #115980 #115982
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 75f2abfa9f92fc7ac512a8ed9a34c2df0edd133d
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:11 2010 +0300
+
+    CPT: udp sockets restore fix
+    
+    Some applications (like ntpd) set on udp sockets sk_reuse to 1. So any other
+    applications can bind to the same port. During restore we must skip this
+    check and restore and bind all sockets. On IPv6 we must also force DAD
+    (Duplicate Address Detection) procedure to be sure that IFA_F_TENTATIVE flag
+    will be cleared on IPv6 address and socket can be binded to it.
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=784
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit ba94d3fa2bb8636a7dceaa01fbf6fecdb8edacd5
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:11 2010 +0300
+
+    CPT: screw up udev bindmounts knot
+    
+    Ubuntu's udev on boot does:
+    
+            if ! mountpoint -q /dev; then
+                # initramfs didn't mount /dev, so we'll need to do that
+                mount -n --bind /dev /etc/udev
+                mount -n -t tmpfs -o mode=0755 udev /dev
+                mkdir -m 0700 -p /dev/.static/dev
+                mount -n --move /etc/udev /dev/.static/dev
+            fi
+    
+    So, workaround is dumping "/dev" as bindmount's source.
+    
+    Bug #120852
+    http://bugzilla.openvz.org/show_bug.cgi?id=1198
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit faa9a6dd94c072b38c8f963ce314fc1d6ff69ddf
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:10 2010 +0300
+
+    CPT: restore dead tasks proc files
+    
+    If some process opened /proc/<pid><somefile> and process with <pid> will die
+    after some time then checkpoint fails with error:
+    
+          Can not dump VE: Invalid argument
+          Error: d_path cannot be looked up /proc/125/cmdline
+    
+    The fix is to catch this situation at the dump time, mark the image respectively
+    and restore a fake file on restore.
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=1047
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 977418edceabb4705f5012e562d4e5e04a19f138
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:10 2010 +0300
+
+    CPT: adjust vfsmounts restore order
+    
+    Idea is: Dump parent before dump his children
+    
+    This order is needed during checkpoint/restore:
+    
+       mount  /A   /B  -o bind
+       mount  none /C  -t tmpfs
+       mkdir  /C/D
+       mount  /B   /C/D --move
+    
+    After this, checkpoint (w/o this patch) will dump vfsmounts in order:
+    
+          - vfsmount, bind to /A, mounted to /C/D
+          - vfsmount, mounted to /C (tmpfs)
+    and will restore in the same order, that causes error.
+    
+    Bug #132951
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c42b985195cc8e7c2bbeb644e92d98a066aacc18
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:10 2010 +0300
+
+    CPT: dont cpt requiresdev fs
+    
+    Don't allow chkpnt VE with mounted ext2/ext3, etc filesystems.
+    
+    Allow checkpoint only for mounted nodev and "external" filesystem.
+    
+    This check protects from error on restore:
+       CPT ERR: ffff810007113000,102 :-2 mounting /root/some_dir ext3 40000000
+    
+    as do_one_mount() doesn't pass mntdev to mount().
+    
+    [xemul: actually, the reason we don't support filesystems other than
+            virtual and tmpfs is because we simply can't (easily) get the
+            mount options for them to cpt and restore ]
+    
+    Bug #131737
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a1d028ce2f1e87b5d64fb9fb7ed46740c1d73ed2
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:10 2010 +0300
+
+    CPT: Restore information about tcp listening sockets
+    
+    Not all options are important. Only missed ipv6only can cause
+    error if other application want to listen the same port for IPv4 any address.
+    
+    tp->XXX are inherited by children (noticed by Alexey Kuznetsov), so we need also
+    to restore these options.
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    
+    Comment from Alexey:
+    It [everything before] was not OK. The feature which are broken are important,
+    but not actually critical except for ipv6only.
+    
+    F.e. DEFER_ACCEPT is broken -> but nobody will notice, it just will not
+    be deferred.
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 6364b5498e48bcb600472bb2fafb865206f35068
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:10 2010 +0300
+
+    CPT: put 'expect' after insert to the 'conntrack'
+    
+    During restore conntrack, we need to put expect after allocating
+    ip_conntrack_expect and do something with one.  Expect will be
+    freed or immediate (if nobody has this expect) or during cleanup/timer
+    hooks. Otherwise expect never will be freed.
+    
+    Note: Approaches for kernels 2.6.18 and 2.6.9 are different. For example
+    see help() in "net/ipv4/netfilter/ip_conntrack_netbios_ns.c"
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b3d4348ca6322edad5a0a0d56b15d1eb8db718bd
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:09 2010 +0300
+
+    CPT: Fix ip_conntrack_ftp usage counter leak
+    
+    Function ip_conntrack_helper_find_get() gets module counter. So put a
+    conntrack after putting in the hash and handling the conntrack's expect
+    list.
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 74e373eeb5e71b1c8253c04bee92250e5f6640cf
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:58:08 2010 +0300
+
+    CPT: dump and restore global snmp statistics
+    
+    Per device exists for ipv6 only and is probably not used now, but
+    anyway - I'll do it later.
+    
+    This patch adds new section CPT_SECT_SNMP_STATS that is populated
+    with CPT_OBJ_BITS set of objects - one for each type of statistics.
+    Objects have variable length. Stats are stored as a plain array of
+    __u32 numbers and thus the order in which stats types are stored is
+    implicitly hard-coded.
+    
+    In case we do not have an IPV6 turned on all ipv6 stats are dumped
+    as CPT_OBJ_BITS/CPT_CONTENT_VOID and are skipped on restore.
+    
+    When we restore from an image with more stats in any type, the not
+    supported ones are dropped with a warning.
+    
+    Stats add 28K to image file.
+    
+    Bug #113930
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 3b0f4b2e0503c157d596d7426ffcba01e30e930f
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:08 2010 +0300
+
+    CPT: Fix memory corruption if cpt_family is wrong.
+    
+    During restore, if parent socket is AF_INET but cpt_family is
+    wrong (non initialized, see bug ##95113), then consider request as
+    related to AF_INET6 is not right and leads to memory corruption.
+    
+    As there are a lot of buggy images, so we can't check only on values
+    AF_INET and AF_INET6.
+    
+    Desicion:
+     - Check request on AF_INET6 first, and consider
+       request as AF_INET by default.
+     - Additionally checkup for AF_INET6 request (protect from
+       random value cpt_family == AF_INET6)
+    
+    Bug #118912
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Acked-by: Denis V. Lunev <den at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 4a7ddd3db9a8030d514d120341bffd904ef57315
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:58:07 2010 +0300
+
+    CPT: fix restoring of /dev/null opened early by init
+    
+    The problem is the following:
+    * init from fc9 starts and opens /dev/null for its stdin, stdout
+      and stderr
+    * udev starts and overmounts /dev with tmpfs
+    
+    After this cpt cannot dump this ve, since one process holds a file,
+    that is inaccessible from ve root.
+    
+    The proposed solution is the following:
+    1. allow for /dev/null to be over-mounted
+    2. restore init's file in two stages:
+       stage1: *before* we restored mounts restore init's 0, 1 and
+       2 file descriptors, since most likely (in fc9 case - definitely)
+       init opened them before any other manipulations with fs;
+       stage2: restore the rest files later, at usual time to make
+       sore that e.g. sockets etc are restored properly.
+    
+    Comment from Alexey:
+    
+    ACK.
+    
+    Though this is really ugly, it really produces 100% correct result
+    for this particular situation.
+    
+    Bug #116261
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 937a5462e54d42a70ca0a66c7d3147d02ff40767
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:58:07 2010 +0300
+
+    CPT: lock sock before restoring its synwait queue
+    
+    This new socket already has all the necessary TCP timers armed,
+    so tcp_keepalive_timer can fire during the rst_restore_synwait_queue
+    and (for the latter being lockless) can spoil the queue.
+    
+    Bug #118912
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c5d30bd0194b026df7684e08f1b6e8e77d06305c
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:58:07 2010 +0300
+
+    CPT: sysctl randomize_va_space
+    
+    implement checkpointing for virtualized sysctl kernel.randomize_va_space.
+    
+    reuse existing unused pad1 field in cpt_veinfo_image.
+    0 -> image without rnd_va_space virtualization (default value is used)
+    1 -> rnd = 0
+    2 -> rnd = 1
+    etc...
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit bbdcbaadf794e4a6c579cdac4c92ecc278d7606c
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:07 2010 +0300
+
+    CPT: add check for presence of module slm_dmprst if SLM is enabled
+    
+    Add a check in "checks" for presence of module slm_dmprst if SLM is enabled.
+    Check will be performed for both source and destination nodes. Changes in
+    vzmigrate are not needed.
+    
+    Bug #114312
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 04c139f6c20e5c80a19db1439f8cd2f7e2715b4e
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:07 2010 +0300
+
+    CPT: add diagnostics in case of iptables-restore fail
+    
+    It is not clear right now what is wrong if iptables-restore fails.
+    Add some diagnostics in case of error.
+    
+    Bug #95952
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f06677625bf53b6aad0a3742b5f01d1376715e1d
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:58:06 2010 +0300
+
+    CPT: Check that VE is not running on restore.
+    
+    Bug #99679
+    
+    Signed-off-by: Denis V. Lunev <den at parallels.com>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit dcda94043007a5d005e92c2df31ba63eeb1b8a70
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:06 2010 +0300
+
+    CPT: fix check in decode_tuple()
+    
+    Tuple structure can be used as a mask and protonum can be 0xffff in 2.6.9
+    kernel. In 2.6.18 kernel all masks for protonum are 0xff and 0xffff will
+    be shrunken to 0xff.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 5a889e32263292bec6e2d4c2710ee41985f35716
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:06 2010 +0300
+
+    CPT: fix restore of conntrack expect timer
+    
+    One more fix of restore conntrack procedure.
+    Following code:
+    
+    if (ct->helper->timeout && !del_timer(&exp->timeout)) {
+    	...
+    }
+    
+    can lead to oops, as exp->timeout is not initialized at this point.
+    
+    Actually this optimization is not needed at all.
+    If expectation is dying, then we will let it die by its own death.
+    
+    Also in ip_conntrack_expect_insert() there is an initialization of
+    exp->timeout. And we can't just do add_timer() after that (as in add_timer()
+    we have BUG_ON(timer_pending(timer))), we must do mod_timer() instead.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 19dce010faff8960e80b1778afa9f4ad07dd365f
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:06 2010 +0300
+
+    CPT: restore mark value on conntracks
+    
+    Restore mark value in conntracks as it is needed for connmark module.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7ec63fdedf332db285f71d857cf395da8cf674d5
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:06 2010 +0300
+
+    CPT: convert conntrack tuple from 2.6.9 kernel image
+    
+    Add conversion for conntrack tuple from 2.6.9 kernel image.
+    Check for correct value is added in decode_tuple().
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c34d6367f6cc5ee7f60fdee828c41de7b633a779
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:06 2010 +0300
+
+    CPT: convert conntrack image from 2.6.9 to 2.6.18
+    
+    CPT structure in image file for conntracks is different in 2.6.9 and 2.6.18
+    kernels (array cpt_help_data was enlarged in the middle of the structure), so
+    conntracks from 2.6.9 kernel are restored incorrectly on 2.6.18 kernel and
+    lead to kernel oops.
+    
+    A simple conversion from 2.6.9 to 2.6.18 is introduced to restore conntracks
+    correctly on 2.6.18 kernel.
+    
+    Bug #113290
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 21644501b4651df2c7f271cae528f1996fc23a8d
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:05 2010 +0300
+
+    CPT: create kernel threads in VE0 context
+    
+    In current implementation master process which performs checkpointing has
+    owner_env set to VE0 and exec_env set to VE. All auxiliary kernel threads
+    are created with exec_env set to VE and owner_env set to VE0, so after the
+    do_fork_pid() we have the follwing:
+    
+     * new thread has owner_env == ve0, exec env == ve
+     * its pid belongs to ve (pid->veid != 0)
+    
+    That is why if ve_enter() in thread fails, then we hit BUG_ON in
+     release_task -> detach_pid -> free_pid
+    sequence, since task owner env != pid's veid.
+    
+    When enter succeeds the task's owner env becomes ve and this BUG_ON
+    is not triggered.
+    
+    To solve this problem exec_env is switched to VE before kernel thread
+    creation and switched back after. Veid is passed to kernel via args. All
+    kernel threads are created with CLONE_VFORK to be sure that parent
+    process will not exit before doing exec() in thread.
+    
+    Bug #97124
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 686bb3916a1247b46893078f8d87b8df6b1e305a
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:05 2010 +0300
+
+    CPT: restore rlimits correctly during 32bit-64bit migration
+    
+    During 32bit to 64bit migration rlimits were restored incorrectly due to
+    different size of long on 32bit and 64bit archs. Now simple conversion is
+    introduced in case of 32bit-64bit migration. Infinity values are restored as
+    infinity values. Error is returned if value greater than RLIM_INFINITY32 is
+    found in dump during restore on 32bit arch.
+    
+    Bug #111965
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c3e4a29b420b871a6543955728b1f8a5de75e955
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:05 2010 +0300
+
+    CPT: restore packet control block from kernels with and without IPv6
+    
+    More generic mechanism for restoring packet control blocks. Unfortunately we
+    do not save length of control block in dump and we can only try to calculate
+    it during restore. This method is based on knowledge that the flags value in
+    TCP control block is not zero for all packets in queue.
+    Since this image version TCP control block will be saved in IPv6 form
+    regardless to IPv6 config option.
+    Restore of control block is splitted in 4 ways for any IPv6 and non-IPv6
+    kernel combinations.
+    Check is added to be sure that all control block were restored in the same
+    way. If it will be found that some control blocks were restored incorrectly,
+    then undump process will be terminated.
+    
+    Bug #111370.
+    
+    Merged 4 patches sent earlier:
+    1. Increase image version.
+    2. Save TCP control block regardless to IPv6 config option.
+    3. Restore of control block is splitted in 4 ways...
+    4. Add appropriate comment on TCP control block restore procedure.
+    
+    [xemul:
+    	Added do { } while (0) around macro body
+    	Mention Alexey in comment about skb_cb->flags being non-zero
+    ]
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1f218bb8d606af3b95cd089b68b44800f91ac7d1
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:05 2010 +0300
+
+    CPT: add binfmt_misc fs in supported list
+    
+    Just add binfmt_misc in list of supported file systems. With this small
+    quick fix migration will be allowed, but all binfmt_misc entries will
+    be dropped during migration.
+    
+    This fix is only for the first time. Later will be implemented generic
+    mechanism for checkpointing/restore of external modules. And this quick
+    fix will be replaced with full support for binfmt_misc in CPT.
+    
+    Bugs #100709, #101061
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 85da0ddab187bb9e6000ba6c98b7454095055799
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:05 2010 +0300
+
+    CPT: relax check for several bind mounts on the same mount point
+    
+    Relax check for special bind mounts which mounted several times on the same
+    mount point. We need to check only dentry, mount check can be skipped in this
+    case.
+    We can't remove completely mount check as there are exist cases when we need
+    to check mnt too. E.g. /dev is mounted with NODEV over /dev and some file is
+    opened from underlying mount. If mount check is removed, then we will be able
+    to checkpoint such state, but we will not be able to restore it.
+    
+    Correct sollution will be to dump/restore whole mount tree with overmounts.
+    But we can't implement this right now for number of reasons.
+    
+    Bug #84310
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit bc4769bb4acc7547f4e537b23a093019e78652d7
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:04 2010 +0300
+
+    CPT: fix reopen dentries procedure
+    
+    Dentries were not reopened correctly during checkpointing and restore.
+    Two bugs fixed:
+    1. In case of huge files (more then 2Gb) dentry_open() returns -EFBIG if
+       O_LARGEFILE flag is not set. This flag should be used for temporary files
+       used during checkpointing and restore process.
+       Bug #99544
+     https://bugzilla.sw.ru/show_bug.cgi?id=99544
+    
+    2. In dump_content_regular() we have following code:
+      file = dentry_open(dget(file->f_dentry),
+    		  mntget(file->f_vfsmnt), O_RDONLY);
+      if (IS_ERR(file)) {
+    	  cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+    	  eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(file));
+    	  return PTR_ERR(file);
+      }
+    
+      Which results in kernel oops if dentry_open() returns error
+      (e.g. -EFBIG because of bug #99544)
+    
+      Bug #99542
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 08b8f8ba476ec8e67b2eac74028fa5f4a3586c2f
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:04 2010 +0300
+
+    CPT: fix save/restore of open requests
+    
+    Open requests were saved and restored sometimes incorrectly:
+    
+    1. Family of open request was not saved (commented out)
+    2. Restore was broken, would crash because rsk_ops was cleared by memset.
+    3. And finally, all the coded restoring open requests was skipped.
+    
+    Tested with http_load.
+    
+    Bug #95113
+    http://bugzilla.openvz.org/show_bug.cgi?id=784
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 0a6789976c6ff602e11a4f00123ae70b62738f21
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:04 2010 +0300
+
+    cpt: add lost dcache_lock protection around __d_path()
+    
+    Protect __d_path() call with dcache_lock spinlock.
+    Protect other checks with env->op_sem semaphore.
+    
+    Bug #98833
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 22c792c3605e5d0f916308678319e25eb18cf4a6
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:58:04 2010 +0300
+
+    cpt: fix restore of inotify on symlink
+    
+    Inside VE file /etc/mtab is a symlink to /proc/mounts.
+    FreeNX server with KDE creates inotify on /etc/mtab file.
+    To restore such inotify we need to obtain dentry with path_lookup() and
+    restore inotify on it.
+    
+    Bug #96464
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 66a6c3e51c35096b204b8866ee50afe0b1d13d59
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:58:04 2010 +0300
+
+    quota: compat layer for compat quota
+    
+    This patch implements compatibility quotactls for old quota tools.
+    
+    replace:
+    diff-fs-quotcompat-ia32emul-fix-20050921
+    diff-fs-quotcompat-comp-fix-20080710
+    diff-fs-quotcompat-xencomp-fix-20080806
+    diff-fs-quota-compat-proper-split-20081027
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1b04f79cb59f8cd8fb1ca26e19a6a4e8295a088f
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:58:03 2010 +0300
+
+    ve: Don't check for CAP_SETVEID - use more ... imagination
+    
+    This patch:
+       The proposed check correctly detects the root in ve0.
+       However, we lose the ability to create containers with
+       some fancy tool, that has the CAP_SETVEID capability
+       *only*, but we don't have such.
+    
+       The cap itself is declared to be obsoleted, but there's
+       no need in rewriting vzctl in a rush - things will still
+       work. If we'll want to manipulate audit caps from the
+       vzctl we'll make it via features.
+    
+    Overall history:
+    
+       Don't ban CAP_AUDIT_XXX capabilities in container to make the
+       dbus-daemon work.
+    
+       After two (maybe tree) days of brain storm me and Den finally
+       gave birth to this solution. So...
+    
+       First of all AUDIT will be banned in container. Since dbus refused
+       not to set audit caps we don't want it to mess with it in any case.
+    
+       Next step is to note, that CAP_AUDIT_CONTROL coincides with the
+       CAP_VE_ADMIN, which is not that bad (besides, dbus doesn't try to
+       set this one up) and we leave one alone.
+    
+       And finally - the CAP_AUDIT_WRITE, which coincides with the most
+       delicate one - CAP_SETVEID. The latter one is explicitly dropped
+       on container start and there's no way to set one (dbus tries this
+       and fails) back. Simple "don't clear it" solution is too dangerous.
+    
+       TO handle *this* case we
+       1. replace all checks to capable(CAP_SETVEID) to more complicated,
+          but still matching ve0's root only;
+       2. don't ban the CAP_SETVEID (== CAP_AUDIT_WRITE == the_one_dbus_needs);
+       3. remember, that this capability is present on ve startup and thus
+          we automatically have the CAP_AUDIT_WRITE required by dbus;
+       4. carefully handle the case, when we enter container in do_env_create
+          and try to call fairsched system calls.
+    
+       That's it. No fraud, just manual dexterity ;)
+    
+       Bug #117448
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 153eca7d4bf56bd34e7c5957b1ff8ec331713a0b
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:58:03 2010 +0300
+
+    fairsched: Sanitize fairsched manipulations on ve startup
+    
+    First of all we won't be able to call them after we fix
+    capability checks. Second of it is that taking the fairsched
+    mutex 4 times on startup is an overkill.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit e2fb9c79fd348a0603c4b881c4e1f179945b55b5
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:58:03 2010 +0300
+
+    ms: lutime lchmod syscalls
+    
+    Add possibility to change owner/permissions on symbolic links
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 0b7042d24abe59baba84a78e37b95a88624f9308
+Author: Konstantin Khorenko <khorenko at openvz.org>
+Date:   Sat Feb 27 16:58:02 2010 +0300
+
+    ve-net: permit changing of netdev's tx_queue_len from inside a CT
+    
+    In particular it makes OpenVPN happy.
+    
+    Bug #457318
+    
+    Signed-off-by: Konstantin Khorenko <khorenko at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit eb3139203f525babc452556dd5071c73382050dd
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:58:02 2010 +0300
+
+    venet: Core support for external ip filtering
+    
+    Allow VE emit packets with configured source IP address.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 98ec6de33c046e4f053c6b21152d3e07bead7804
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date:   Sat Feb 27 16:58:01 2010 +0300
+
+    vzethdev: stat tx dropped acount
+    
+    Veth get_stats() should return the number of tx_dropped packets
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 57a5848f98e677abefa203f9ad5f1b4bf3d28ace
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:58:01 2010 +0300
+
+    venet: add TSO support in venet and vzethdev
+    
+    venet and veth support checksumming and scatter-gather features, but TSO
+    feature still wasn't added.
+    
+    TSO increases bandwidth up to 50% or appreciably decreases CPU usage.
+    
+    Approach is the same as for checksumming:
+         1. TSO is off by default
+         2. For veth: tso can be enabled/disabled in VE or VE0 for
+            pair {veth in VE, veth in VE0}
+         3. For venet: tso can be enabled/disabled only in VE0 (for
+            all venet devices at once)
+    
+    To use this feature just enable:
+            1. Tx checksumming: ethtool -K DEVNAME tx on
+            2. Scatter-gather:  ethtool -K DEVNAME sg on
+            3. TSO:  ethtool -K DEVNAME ts on
+    
+    Some performance info (tested via netperf):
+    
+     1. Traffic VE->VE0 (via venet), TCP STREAM test, message size 32K, socket size 256K:
+    
+       TSO off       2300 10^6 bits/s
+       TSO on        5600 10^6 bits/s
+    
+    Notes:
+        Admins need to set TSO on {venet,veth} only if physical ethernet device supports TSO.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f0fe2ba7ff9d91a2bfef1ec95fddbeada5be14d3
+Author: Vasily Averin <vvs at openvz.org>
+Date:   Sat Feb 27 16:58:01 2010 +0300
+
+    ve: Kill not-yet-closed TCP sockets on VE stop herder
+    
+    Idea proposed by Alexey Kuznetsov <alexey at openvz.org>
+    tcp_v4_kill_ve_sockets() can hangs in loop because NFS can hold some sockets in
+    host node rpciod/nfsdiod queues.
+    This patch resets such sockets if it's possible or delays its cleanup.
+    
+    changes in 20090429: fixed wrong locking and another xemul@ notices
+    Bug #429296
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 5ad4c74a16b2f9812a1d79287bba724243454ecc
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:58:00 2010 +0300
+
+    bc: compat system calls for bc and fairsched
+    
+    correct UB_MAXVALUE convertion and wire compat syscalls
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 20fd4dd54736b40a815ad07d34c4339d5c627f7e
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:58:00 2010 +0300
+
+    ub-dcache: sleep in dput
+    
+    ub: dentry->dentry_bc.d_ub is unreliable after the sleep
+    
+    d_kill can sleep inside. In this case dentry->dentry_bc.d_ub saved before
+    is unreliable as we can have dcache accounting on event during sleep. In this
+    case we'll have saved ub == NULL and OOPS/leak inside dcache_uncharge.
+    
+    Another problem here is that we should decrement inuse count on the
+    dentry appropriately.
+    
+    Bug #116095
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 76038f85b0523d4d2a48b20b5443a81dee3531e4
+Author: Cyrill Gorcunov <gorcunov at openvz.org>
+Date:   Sat Feb 27 16:58:00 2010 +0300
+
+    ve-fs: implement "ve-xattr-policy" sysctl entry
+    
+    "ve-xattr-policy" sysctl entry allows to control how to react on xattr
+    change from inside of a container.
+    
+    There are three options allowed:
+    
+    0 - accept any xattr modifications (VE0 always and VE by default)
+    1 - ignore
+    2 - reject
+    
+    Note that any other value assigned to "ve-xattr-policy"
+    leads to "accept" policy being applied without any warning.
+    
+    The sysctl is placed at /proc/sys/fs/ve-xattr-policy on HW node.
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=1050
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 5cab8bf42b5da73a02d5288951aeeec8fd8b4716
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date:   Sat Feb 27 16:57:59 2010 +0300
+
+    ve-kmsg: printk va copy add
+    
+    Copy args variable in ve_printk() function
+    
+    x64 can corrupt va_list after return from the called function.
+    
+    Bug #440939
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b55fc66f70948758037a4639e8a63663792ec1f5
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:57:59 2010 +0300
+
+    ve-kmsg: printk lockdep fixup
+    
+    printk: fix lockdep warnings if kernel compiled with CONFIG_LOCKDEP
+    
+    vprintk() to VE causes:
+    
+      =====================================
+      [ BUG: lock held at task exit time! ]
+      -------------------------------------
+      iptables/8203 is exiting with locks still held!
+      1 lock held by iptables/8203:
+       #0: (sk_lock-AF_INET){--..}, at: [<ffffffff81213341>] ip_setsockopt+0x61/0xa0
+    
+      stack backtrace:
+    
+      Call Trace:
+       [<ffffffff8100b78a>] show_trace+0xca/0x3b0
+       [<ffffffff8100ba85>] dump_stack+0x15/0x20
+       [<ffffffff8105e469>] debug_check_no_locks_held+0x89/0xa0
+       [<ffffffff8103aa7e>] do_exit+0xe2e/0xe80
+       [<ffffffff8103aba0>] sys_exit_group+0x0/0x20
+       [<0000000000000001>]
+    
+    Note: to reproduce this you can type in VE:
+       iptables -A INPUT -m tcp --dport 22 -j DROP
+    
+    Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 84ac295d2315ecf649e3910735d81e8d217396c3
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:58 2010 +0300
+
+    ve-proc: mangle mounts devname harder
+    
+    mounts: show /dev/xxx devices near ve root mounts, rather than just xxx
+    Required for fixing autofs in rhel5 container:
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 454ad87b41380655cb31a85f682ddb8289e8e1f9
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:58 2010 +0300
+
+    ve-sysctl: randomize_va_space
+    
+    virtualize sysctl kernel.randomize_va_space
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a44c3498bcf70065a85236b7daa77fe0320313f2
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:58 2010 +0300
+
+    ve-sysctl: add proc_dointvec_ve helper
+    
+    add generic method for proc access to per ve int values.
+    
+    extra1 field of ctl_table contains data field offset from ve_struct begin.
+    without CONFIG_VE use address from .data field.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 34e6684b531637ad4fd34502d32f6e3c74e2dac6
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:57 2010 +0300
+
+    ve: drop oom immunity at enter
+    
+    At CT enter switch to default OOM adjustment level if task is OOM-immune.
+    
+    This is a very bad idea to have OOM-unkillable tasks inside container,
+    because all forked tasks inherit this setting.
+    
+    Proc interface for changing OOM adjustment (/proc/<pid>/oom_adj)
+    allready restricted in CT by diff-ve-oom-adjust-20070604.
+    
+    On some systems sshd got OOM protection at start and not drop it after fork.
+    (example: ssh root at HN -> vzctl enter -> restart apache -- apache now OOM immune)
+    (example from xemul@: ssh root at HN vzctl start - VE is now OOM immune)
+    
+    http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=480020
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c7cf5c388378abf4d6e8e2e18c6c815eccab4fd7
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:57:57 2010 +0300
+
+    ms: ext4 use get host
+    
+    Force ext4 page fault handlers use ->get_host callbacks
+    This is required not to use vzfs file in ->page_mkwrite callback.
+    Bug #454968
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a7de88181858ae8f9ec51cee11ae7f955e76430d
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:57:57 2010 +0300
+
+    nfs: disable nfs-v2
+    
+    nfs: disable NFSv2 as it is broken
+    According to Alexey: "who is going to turn v2 on, having
+    a v3, which works better, nearby?"
+    
+    Bug #114720
+    
+    Signed-off-by: Denis V. Lunev <den at parallels.com>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7805f36534f20e530fb84e83a360993ec78f3bb6
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:57:56 2010 +0300
+
+    ve: vfs sillyrename
+    
+    i_nlink count on private inodes after silly rename is 1. So, virtual inodes
+    gain i_nlink == 1 and remains in unused_list instead of to be cleaned.
+    
+    Bug #114672 #112999
+    
+    Signed-off-by: Denis V. Lunev <den at parallels.com>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d252a93b32d6d251fcc73863b75b91edaa801b95
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:57:56 2010 +0300
+
+    mm mmap zero length kludge
+    
+    Return -EINVAL in case of zero length file to all applications except
+    rpm. For (legacy) rpm address will be returned.
+    
+    Such hack is introduced just not to break compatibility with old
+    tools, sorry :(
+    
+    Bug #74964
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 437d113149802cb91254246f29134e3ade55e411
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date:   Sat Feb 27 16:57:56 2010 +0300
+
+    nfs: use file private macro
+    
+    Minor fix to nfs, which allows to use vzfs over nfs mounts.
+    
+    It survives fsstress test. I think normal vzfs tests can be started
+    asap to catch the pointes of possile misbehaviour.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 3c07eb700d9bbe7fd6b7dcf52103faf58ef4a035
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:55 2010 +0300
+
+    vzdq: cleanup fake qmblk destroy
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 8d622018ad2a3d025576578c0838c18ebfd3fdab
+Author: Konstantin Ozerkov <kozerkov at openvz.org>
+Date:   Sat Feb 27 16:57:55 2010 +0300
+
+    vzdq: qmblk dq_sem to mutex
+    
+    vzquota: replace quota master block semaphore with mutex
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 769b3bbe8d7859d168b42daa35720f12372e10db
+Author: Konstantin Ozerkov <kozerkov at openvz.org>
+Date:   Sat Feb 27 16:57:54 2010 +0300
+
+    vzdq: vz_quota sem to mutex
+    
+    vzquota: replace master lock semaphore with mutex
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 085883fb2366ae47c84fb18aa50f832e93ab56aa
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:54 2010 +0300
+
+    vzdq: vzaquota proc nlink
+    
+    Produce correct nlink count for /proc/vz/vzaquota
+    
+    Use count mounpoints accessible from VE as upper estimate for
+    count subdirectories inside /proc/vz/vzaquot.
+    Concept stolen from vzdq_aquotd_readdir.
+    
+    Disable enumation in VE0 for performance reason (like in _readdir and _lookup)
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b9a8ce596cba9f5161769ca0408c71f8e6a059c7
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date:   Sat Feb 27 16:57:54 2010 +0300
+
+    vzdq: swap noquota
+    
+    swap_inode did not do anything for inodes not covered by vzquota,
+    which was wrong. F.e. mkdir, which creates inode with i_blocks!=0,
+    triggered message "detached inode not in creation".
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 20d11fba2ae882456b343ae78f466e27cc19d000
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date:   Sat Feb 27 16:57:54 2010 +0300
+
+    vzdq: nfs support
+    
+    It works differently and requires different interface.
+    Block accounting and quota check are separate now, we account
+    without checks and check for space in places, where an operation
+    could allocate more space.
+    
+    Chunk-by-chunk:
+    
+    1. Added new operation - swap_inode. Normally, virtual inode
+       is created/accounted/checked simultaneously. It is impossible for NFS.
+       So, each operation creating a new inode starts from allocating
+       space in quota using a dummy inode. If the operation succeeds and real
+       inode is created, we swap quota accounting information.
+       TODO: optimize out dummy inode. All that we need is qlnk.
+    
+    2. DQUOT_CHECK_SPACE() to check that quota is not full.
+    
+    3. DQUOT_SYNC_BLOCKS() to resync i_blocks obtained from NFS server
+       with our accounting.
+    
+    4. is_nfs_root(). NFS does not have root inode. Instead each mount
+       has pointer to a disconnected inode. vzquota has to undestand this.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fd4f6b28860495f939f10abfaec8f255797a4fe8
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date:   Sat Feb 27 16:57:53 2010 +0300
+
+    vzdq: fix oops is inode_drop_call
+    
+    I suppose this happens when vzcache moves to template a file,
+    which was not under vzquota.
+    Bug #97782
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 71208971e69657168517194564e045781b054526
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:57:53 2010 +0300
+
+    simfs: statfs on root
+    
+    Do not use s_root dentry of underlying for statfs
+    The real problem is that s_root on the NFS super block is a crap.
+    Unfortunately, the original dentry (which is asked to be statfs-ed)
+    is not available at this point. The only visible solution for this
+    is to use the dentry to which simfs is point to.
+    
+    Signed-off-by: Denis V. Lunev <den at parallels.com>
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 11d902b2933c3292b8e1305e38e37c6419cb9cf2
+Author: Konstant Khorenko <khorenko at openvz.org>
+Date:   Sat Feb 27 16:57:52 2010 +0300
+
+    virtinfo hook in daemonize
+    
+    #427726
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 95a5273372efb164d0b3a4ab6eefca8b671d13e4
+Author: Andrey Mirkin <major at openvz.org>
+Date:   Sat Feb 27 16:57:52 2010 +0300
+
+    virtinfo add cpttest
+    
+    Add VIRTINFO_SCP_TEST event to virtinfo calls
+    
+    This will be responsible for checking CPT features
+    during checkpoint/restore process.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit e2e5984d43c91b3aa674123af73849e9643bffb3
+Author: Konstantin Khorenko <khorenko at openvz.org>
+Date:   Sat Feb 27 16:57:52 2010 +0300
+
+    ve-proc: fake sysrq trigger
+    
+    Add dummy /proc/sysrq-trigger file inside a Container
+    
+    Oracle 11g Release 1 RAC tries to open one and refuses to start on fail.
+    Writing to the file inside a CT leads to nothing, first 10 writes are logged.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fc17c7e942ccbcf6909ef9fdb7c4f170acaf1d72
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date:   Sat Feb 27 16:57:51 2010 +0300
+
+    ve-proc: add devices
+    
+    Proc: add empty /proc/devices to CT
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 3cfd7ac2a553a88af0053a59ac9870f1ce82760f
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:57:51 2010 +0300
+
+    ve: decrease ve_struct size in case of huge nr_cpus
+    
+    kstat_lat_pcpu_struct contains array of NR_CPUS elements.
+    Replace it with alloc_percpu data which helps to keep ve_struct
+    relatively small and prevents allocation fails of huge order.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 010370ec6b62618648c8b8882d3887e5e4073fc8
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date:   Mon Apr 26 17:22:10 2010 +0400
+
+    percpu: Return ve0/ub0 percpu-s back
+    
+    With the DEFINE_PER_CPU and init-s made in proper place we can
+    use them as alloc_percpu-ed ones.
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 541c4b4da4f9c522593f3fd622e5d20fa6a6b294
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:51 2010 +0300
+
+    ve: fix fs umount at ct stop
+    
+    Don't umount some mount multiple times on ct stop
+    
+    umount_tree kill argument must be empty list,
+    otherwise it can detach each vfsmount multiple times and
+    produce negative d_mounted count on mountpoint dentry.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 543578c2947332cda5aea3b195c4d6a80a3d317b
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:50 2010 +0300
+
+    ve: ptys idr mem leak
+    
+    Plug minor memory leak in idr_layer_cache slab on ve start-stop
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 965adae71aaa774796aeac8087806b77bbb0709f
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:50 2010 +0300
+
+    ve: tmpfs virtualize default size
+    
+    set default size to half of physpages from meminfo
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 79c0a2ab51af39b665f7e8162c26c5573eca1872
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:57:50 2010 +0300
+
+    ve: meminfo dont use subub
+    
+    Get parent UB instead of sub-group one to calculate usage
+    
+    Signed-off-by: Denis V. Lunev <den at openvz.org>
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 223f044cc32146df3a5f6dc61aab2bd053277de8
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:50 2010 +0300
+
+    ve: move veinfo to vzmon
+    
+    Since some people wish to run openvz w/o venet device, but
+    vzlist tool relies on /proc/vz/veinfo file presence, vzmon
+    module is a better place for this file.
+    
+    http://bugzilla.openvz.org/show_bug.cgi?id=394
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f267ef18a62f50bd5293a876e43b89467c8253f4
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:57:49 2010 +0300
+
+    ve: virtualize binfmt-misc
+    
+    Nothing special. SUN jdk complains since can't use binfmt.
+    Not serious and java surely works fine w/o it, but just to
+    make it and its users happy let's virtualize binfmt_misc.
+    
+    Signed-off-by: Pavel Emelianov <xemul at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1ff4faada1dabfdc4592e2824ce53a357373c83e
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:49 2010 +0300
+
+    bc: pb hash cookie
+    
+    add random hash cookie to ub to use in pb_hash instead of non-random ub_uid
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 31f588463c8294df47ff6357829b286abd580782
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date:   Sat Feb 27 16:57:49 2010 +0300
+
+    bc: uncharge files harder
+    
+    There is a chance when we do not start uncharging because
+    ub_barrier_farnr() is not hit for UB_NUMFILE and ub_barrier_farsz()
+    is not hit for UB_KMEMSIZE (SLM for example set ubc barrier to a
+    huge value).
+    
+    This fact can lead us to the situation when two tasks are able
+    to consume all of UB_NUMFILE and UB_KMEMSIZE despite they close
+    opened files.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 76cd7c1686940c2eeef94926e978b8893f9bb9e2
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:48 2010 +0300
+
+    ve: show proc swaps in ct
+    
+    Fill the size/used values with the ones from the meminfo virtinfo notifier.
+    
+    Show one fake swap partition (/dev/null) with the same size/used as in
+    /proc/meminfo. If --meminfo == none show overall swap statisctics from HN.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit bf8c54dbd1c7b09abdab952da58e1f2c8f439ea4
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:48 2010 +0300
+
+    ve: mangle swapinfo
+    
+    Fill swap size/usage with data from UB_SWAPPAGES in meminfo notifier.
+    Don't show swap if the limit is unlimited (default state).
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1c2b5b4b1cbaafa707cb56da94dd5099dbdcc73d
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:48 2010 +0300
+
+    cpt: bc resources array
+    
+    restore only bc resources really presented in cpt image.
+    
+    store UB_RESOURCES in cpt_beancounter_image while checkpointing.
+    (leave all new added resources with default limits filled at bc alloc)
+    
+    change cpt_content of cpt_beancounter_image to CPT_CONTENT_ARRAY to detect
+    structure version without bumping cpt image version, because in old images
+    __cpt_pad field (reused for cpt_ub_resources) uninitilized.
+    
+    add missed error handling inside rst_undump_ubc -- toss errors
+    from restore_one_bc to higher level.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7b8bbb51527e58abadcd0eeb3e7103ba4048a57f
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:47 2010 +0300
+
+    bc-swap: add swappages bc resource
+    
+    The limit value will be used as configured CT swap size to show
+    in /proc/swaps and /proc/meminfo. Default is UB_MAXVALUE
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit e7416bee163fb262076d9b7dfa93c0dbf304891d
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:57:47 2010 +0300
+
+    bc-rss: show how much page beancounters each bc has
+    
+    Essentially, this is the per-UB rss value calculated
+    (unline physpages and privvmpages) w/o taking sharing
+    into account.
+    
+    With this statistics (shown via /proc/bc/XXX/vmaux:rss)
+    we can evaluate the portion of pages, that are shared
+    accross beancounters (i.e. CTs) like this:
+    
+    (\sum (bc.rss + bc.tmpfs_respages) - \sum (bc.physpages)) /
+    	(\sum (bc.rss + bc.tmpfs_respages))
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b03577fcbea66508aca033f9c9c78bc060c02c24
+Author: Denis Lunev <den at openvz.org>
+Date:   Sat Feb 27 16:57:47 2010 +0300
+
+    bc-ioacct: define page_io_mark in right place
+    
+    fix compilation without CONFIG_BC_IO_ACCOUNTING
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 35fe6d0b31e36227f572550dff53154491760fb1
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date:   Sat Feb 27 16:57:47 2010 +0300
+
+    bc-ioprio: sys_ioprio_set lost unlock
+    
+    sys_ioprio_set() may exit without releasing tasklist_lock. Fix it.
+    
+    Acked-by: Pavel Emelyanov <xemul at openvz.org>
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 2cba7730c015206352563731d9f25cd027bd88f5
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:45 2010 +0300
+
+    ve-proc: fix root entry nlink
+    
+    * Add entries from local tree, similar as in proc_getattr;
+    * Use per-ve process count for VE's root, rather than the
+      total number of processes in the system.
+    
+    All of the above is an upper estimation, that is perfectly
+    fine with 'find' utlity.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a2a22de6b8939570239c99973d3be7fb2eb4e70a
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:45 2010 +0300
+
+    ve-proc: fix nlink in getattr
+    
+    Fix nlink correction in proc_getattr
+    and change it right in the stat buffer insted of inode nlink
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f665309226859e081bcae5c0c7fd3a3bdd9ecfbc
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:45 2010 +0300
+
+    bc-proc: bc nlink count
+    
+    Override getattr callback on /proc/bc and ubc entries to get correct nlink.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 85051b1c71ad37949ef448ff8ddb342b75d706b0
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:45 2010 +0300
+
+    bc-proc: add bc and sub-bc counters
+    
+    Add counter of ubc, protected with ub_hash_lock.
+    Needed for correct proc n_link calculation.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d5ee7014d3f4995249cdadf3d00d1be778a3b10a
+Author: Pavel Emelianov <xemul at openvz.org>
+Date:   Sat Feb 27 16:57:44 2010 +0300
+
+    bc-proc: fix sub-bc inode number
+    
+    fix subbeancounter inode number calculations in /proc/bc
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b93ef081a586e08e226273599bcf7800907c731b
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:44 2010 +0300
+
+    simfs: compilation without quota
+    
+    fix simfs compilation if CONFIG_QUOTA=n
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 4fa1e482478bcde0552e9a97db1ddca620ebbe05
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date:   Sat Feb 27 16:57:43 2010 +0300
+
+    sysrq: smp nmi show regs v2
+    
+    Rework nmi show regs, make it clean and tollerable to nmi ipi losts.
+    
+    Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit cab0d970b18692b61e62e2095392e63c5097bf29
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date:   Mon Apr 26 15:09:43 2010 +0400
+
+    sysrq: revert nmi ipi callback
+    
+    next patch will implement this in less intrusive manner,
+    and without deadlocks at nmi ipi loss
+    
+    Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
 commit 6b5607eeec54fcef60c25fa7a72bc30f69446933
 Author: Pavel Emelyanov <xemul at openvz.org>
 Date:   Fri Apr 16 12:34:01 2010 +0400
@@ -2799,14 +4590,14 @@
 +library.  If this is what you want to do, use the GNU Library General
 +Public License instead of this License.
 diff --git a/Makefile b/Makefile
-index 78611d9..6c58263 100644
+index 573578f..12ba193 100644
 --- a/Makefile
 +++ b/Makefile
 @@ -2,6 +2,7 @@ VERSION = 2
  PATCHLEVEL = 6
  SUBLEVEL = 32
  EXTRAVERSION =
-+VZVERSION = atkov
++VZVERSION = avdeyev
  NAME = Man-Eating Seals of Antiquity
  
  # *DOCUMENTATION*
@@ -2849,7 +4640,7 @@
 +
 +source "kernel/bc/Kconfig"
 diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
-index 5294d84..cd218a8 100644
+index 5294d84..a920d42 100644
 --- a/arch/x86/ia32/ia32entry.S
 +++ b/arch/x86/ia32/ia32entry.S
 @@ -617,7 +617,7 @@ ia32_sys_call_table:
@@ -2870,6 +4661,32 @@
  	.quad quiet_ni_syscall	/* query_module */
  	.quad sys_poll
  	.quad compat_sys_nfsservctl
+@@ -841,4 +841,25 @@ ia32_sys_call_table:
+ 	.quad compat_sys_pwritev
+ 	.quad compat_sys_rt_tgsigqueueinfo	/* 335 */
+ 	.quad sys_perf_event_open
++	.rept 500-(.-ia32_sys_call_table)/8
++		.quad sys_ni_syscall
++	.endr
++	.quad sys_fairsched_mknod	/* 500 */
++	.quad sys_fairsched_rmnod
++	.quad sys_fairsched_chwt
++	.quad sys_fairsched_mvpr
++	.quad sys_fairsched_rate
++	.quad sys_fairsched_vcpus	/* 505 */
++	.quad sys_ni_syscall
++	.quad sys_ni_syscall
++	.quad sys_ni_syscall
++	.quad sys_ni_syscall
++	.quad sys_getluid		/* 510 */
++	.quad sys_setluid
++	.quad compat_sys_setublimit
++	.quad compat_sys_ubstat
++	.quad sys_ni_syscall
++	.quad sys_ni_syscall		/* 515 */
++	.quad sys_lchmod
++	.quad compat_sys_lutime
+ ia32_syscall_end:
 diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
 index 016218c..f368a9a 100644
 --- a/arch/x86/ia32/sys_ia32.c
@@ -2922,21 +4739,6 @@
  #define compat_arch_setup_additional_pages	syscall32_setup_pages
  
  extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
-index 139d4c1..5fd7d01 100644
---- a/arch/x86/include/asm/nmi.h
-+++ b/arch/x86/include/asm/nmi.h
-@@ -25,6 +25,10 @@ extern void release_perfctr_nmi(unsigned int);
- extern int reserve_evntsel_nmi(unsigned int);
- extern void release_evntsel_nmi(unsigned int);
- 
-+typedef int (*nmi_callback_t)(struct pt_regs *regs, int cpu);
-+void set_nmi_ipi_callback(nmi_callback_t callback);
-+void unset_nmi_ipi_callback(void);
-+
- extern void setup_apic_nmi_watchdog(void *);
- extern void stop_apic_nmi_watchdog(void *);
- extern void disable_timer_nmi_watchdog(void);
 diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
 index 271de94..e255a04 100644
 --- a/arch/x86/include/asm/pgalloc.h
@@ -3017,10 +4819,10 @@
  #endif
  	rdtscll(ret);
 diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
-index 6fb3c20..e7a2442 100644
+index 6fb3c20..c870519 100644
 --- a/arch/x86/include/asm/unistd_32.h
 +++ b/arch/x86/include/asm/unistd_32.h
-@@ -342,10 +342,20 @@
+@@ -342,10 +342,22 @@
  #define __NR_pwritev		334
  #define __NR_rt_tgsigqueueinfo	335
  #define __NR_perf_event_open	336
@@ -3034,6 +4836,8 @@
 +#define __NR_setluid		511
 +#define __NR_setublimit		512
 +#define __NR_ubstat		513
++#define __NR_lchmod		516
++#define __NR_lutime		517
  
  #ifdef __KERNEL__
  
@@ -3043,10 +4847,10 @@
  #define __ARCH_WANT_IPC_PARSE_VERSION
  #define __ARCH_WANT_OLD_READDIR
 diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
-index 8d3ad0a..dc19a9c 100644
+index 8d3ad0a..15bc00e 100644
 --- a/arch/x86/include/asm/unistd_64.h
 +++ b/arch/x86/include/asm/unistd_64.h
-@@ -661,6 +661,26 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
+@@ -661,6 +661,30 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
  __SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
  #define __NR_perf_event_open			298
  __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
@@ -3070,10 +4874,14 @@
 +__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
 +#define __NR_fairsched_rate			508
 +__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
++#define __NR_lchmod				509
++__SYSCALL(__NR_lchmod, sys_lchmod)
++#define __NR_lutime				510
++__SYSCALL(__NR_lutime, sys_lutime)
  
  #ifndef __NO_STUBS
  #define __ARCH_WANT_OLD_READDIR
-@@ -685,6 +705,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+@@ -685,6 +709,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
  #define __ARCH_WANT_SYS_RT_SIGSUSPEND
  #define __ARCH_WANT_SYS_TIME
  #define __ARCH_WANT_COMPAT_SYS_TIME
@@ -3094,32 +4902,10 @@
   * Given a pointer to the vDSO image, find the pointer to VDSO32_name
   * as that symbol is defined in the vDSO sources or linker script.
 diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
-index 7ff61d6..e5c7f78 100644
+index 7ff61d6..ee58297 100644
 --- a/arch/x86/kernel/apic/nmi.c
 +++ b/arch/x86/kernel/apic/nmi.c
-@@ -386,6 +386,21 @@ void touch_nmi_watchdog(void)
- }
- EXPORT_SYMBOL(touch_nmi_watchdog);
- 
-+void smp_show_regs(struct pt_regs *regs, void *info)
-+{
-+	static DEFINE_SPINLOCK(show_regs_lock);
-+
-+	if (regs == NULL)
-+		return;
-+
-+	spin_lock(&show_regs_lock);
-+	bust_spinlocks(1);
-+	printk("----------- IPI show regs -----------");
-+	show_regs(regs);
-+	bust_spinlocks(0);
-+	spin_unlock(&show_regs_lock);
-+}
-+
- notrace __kprobes int
- nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
- {
-@@ -435,10 +450,10 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+@@ -435,10 +435,10 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
  	if (!touched && __get_cpu_var(last_irq_sum) == sum) {
  		/*
  		 * Ayiee, looks like this CPU is stuck ...
@@ -3132,40 +4918,30 @@
  			/*
  			 * die_nmi will return ONLY if NOTIFY_STOP happens..
  			 */
+diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
+index bb62b3e..ce8a3f5 100644
+--- a/arch/x86/kernel/cpu/transmeta.c
++++ b/arch/x86/kernel/cpu/transmeta.c
+@@ -1,6 +1,7 @@
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/init.h>
++#include <linux/sched.h>
+ #include <asm/processor.h>
+ #include <asm/msr.h>
+ #include "cpu.h"
 diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 2d8a371..155d6c6 100644
+index 2d8a371..0d1ce00 100644
 --- a/arch/x86/kernel/dumpstack.c
 +++ b/arch/x86/kernel/dumpstack.c
-@@ -303,6 +303,21 @@ void die(const char *str, struct pt_regs *regs, long err)
- 	oops_end(flags, regs, sig);
- }
- 
-+/*
-+ * Voyager doesn't implement these
-+ */
-+void __attribute__((weak)) smp_show_regs(struct pt_regs *regs, void *info)
-+{
-+}
-+
-+#ifdef CONFIG_SMP
-+int __attribute__((weak))
-+smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
-+{
-+	return 0;
-+}
-+#endif
-+
- void notrace __kprobes
- die_nmi(char *str, struct pt_regs *regs, int do_panic)
- {
-@@ -319,6 +334,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
- 	printk(KERN_EMERG "%s", str);
+@@ -320,6 +320,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
  	printk(" on CPU%d, ip %08lx, registers:\n",
  		smp_processor_id(), regs->ip);
-+	smp_nmi_call_function(smp_show_regs, NULL, 1);
  	show_registers(regs);
++	nmi_show_regs(regs, 1);
  	oops_end(flags, regs, 0);
  	if (do_panic || panic_on_oops)
+ 		panic("Non maskable interrupt");
 diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
 index f7dd2a7..24c02de 100644
 --- a/arch/x86/kernel/dumpstack_32.c
@@ -3506,107 +5282,21 @@
  	if (syscall_get_nr(current, regs) >= 0) {
  		/* Restart the system call - no handlers present */
 diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
-index ec1de97..b74f73d 100644
+index ec1de97..29df6fd 100644
 --- a/arch/x86/kernel/smp.c
 +++ b/arch/x86/kernel/smp.c
-@@ -22,6 +22,7 @@
- #include <linux/interrupt.h>
- #include <linux/cpu.h>
- 
-+#include <linux/nmi.h>
- #include <asm/mtrr.h>
- #include <asm/tlbflush.h>
- #include <asm/mmu_context.h>
-@@ -146,6 +147,89 @@ void native_send_call_func_ipi(const struct cpumask *mask)
- 	free_cpumask_var(allbutself);
+@@ -221,6 +221,11 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
+ 	irq_exit();
  }
  
-+static DEFINE_SPINLOCK(nmi_call_lock);
-+static struct nmi_call_data_struct {
-+	smp_nmi_function func;
-+	void *info;
-+	atomic_t started;
-+	atomic_t finished;
-+	cpumask_t cpus_called;
-+	int wait;
-+} *nmi_call_data;
-+
-+static int smp_nmi_callback(struct pt_regs *regs, int cpu)
-+{
-+	smp_nmi_function func;
-+	void *info;
-+	int wait;
-+
-+	func = nmi_call_data->func;
-+	info = nmi_call_data->info;
-+	wait = nmi_call_data->wait;
-+	ack_APIC_irq();
-+	/* prevent from calling func() multiple times */
-+	if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
-+		return 0;
-+	/*
-+	 * notify initiating CPU that I've grabbed the data and am
-+	 * about to execute the function
-+	 */
-+	mb();
-+	atomic_inc(&nmi_call_data->started);
-+	/* at this point the nmi_call_data structure is out of scope */
-+	irq_enter();
-+	func(regs, info);
-+	irq_exit();
-+	if (wait)
-+		atomic_inc(&nmi_call_data->finished);
-+
-+	return 1;
-+}
-+
-+/*
-+ * This function tries to call func(regs, info) on each cpu.
-+ * Func must be fast and non-blocking.
-+ * May be called with disabled interrupts and from any context.
-+ */
-+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
++void send_nmi_ipi_allbutself(void)
 +{
-+	struct nmi_call_data_struct data;
-+	int cpus;
-+
-+	cpus = num_online_cpus() - 1;
-+	if (!cpus)
-+		return 0;
-+
-+	data.func = func;
-+	data.info = info;
-+	data.wait = wait;
-+	atomic_set(&data.started, 0);
-+	atomic_set(&data.finished, 0);
-+	cpus_clear(data.cpus_called);
-+	/* prevent this cpu from calling func if NMI happens */
-+	cpu_set(smp_processor_id(), data.cpus_called);
-+
-+	if (!spin_trylock(&nmi_call_lock))
-+		return -1;
-+
-+	nmi_call_data = &data;
-+	set_nmi_ipi_callback(smp_nmi_callback);
-+	mb();
-+
-+	/* Send a message to all other CPUs and wait for them to respond */
-+	apic->send_IPI_allbutself(APIC_DM_NMI);
-+	while (atomic_read(&data.started) != cpus)
-+		barrier();
-+
-+	unset_nmi_ipi_callback();
-+	if (wait)
-+		while (atomic_read(&data.finished) != cpus)
-+			barrier();
-+	spin_unlock(&nmi_call_lock);
-+
-+	return 0;
++	apic->send_IPI_allbutself(NMI_VECTOR);
 +}
 +
- /*
-  * this function calls the 'stop' function on all other CPUs in the system.
-  */
+ struct smp_ops smp_ops = {
+ 	.smp_prepare_boot_cpu	= native_smp_prepare_boot_cpu,
+ 	.smp_prepare_cpus	= native_smp_prepare_cpus,
 diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
 index 28e963d..54a0ecf 100644
 --- a/arch/x86/kernel/smpboot.c
@@ -3625,10 +5315,10 @@
  	start_ip = setup_trampoline();
  
 diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
-index 76d70a4..0defa11 100644
+index 76d70a4..477e261 100644
 --- a/arch/x86/kernel/syscall_table_32.S
 +++ b/arch/x86/kernel/syscall_table_32.S
-@@ -336,3 +336,22 @@ ENTRY(sys_call_table)
+@@ -336,3 +336,24 @@ ENTRY(sys_call_table)
  	.long sys_pwritev
  	.long sys_rt_tgsigqueueinfo	/* 335 */
  	.long sys_perf_event_open
@@ -3650,52 +5340,23 @@
 +	.long sys_setublimit
 +	.long sys_ubstat
 +	.long sys_ni_syscall
-+	.long sys_ni_syscall
++	.long sys_ni_syscall		/* 515 */
++	.long sys_lchmod
++	.long sys_lutime
 diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index 7e37dce..e1ceccb 100644
+index 7e37dce..d1fd061 100644
 --- a/arch/x86/kernel/traps.c
 +++ b/arch/x86/kernel/traps.c
-@@ -385,6 +385,13 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
- 	printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
- }
- 
-+static int dummy_nmi_callback(struct pt_regs *regs, int cpu)
-+{
-+	return 0;
-+}
-+
-+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
-+
- static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
- {
- 	unsigned char reason = 0;
-@@ -439,12 +446,24 @@ do_nmi(struct pt_regs *regs, long error_code)
- 
- 	inc_irq_stat(__nmi_count);
- 
--	if (!ignore_nmis)
--		default_do_nmi(regs);
-+	if (!ignore_nmis) {
-+		if (!nmi_ipi_callback(regs, smp_processor_id()))
-+			default_do_nmi(regs);
-+	}
- 
- 	nmi_exit();
- }
- 
-+void set_nmi_ipi_callback(nmi_callback_t callback)
-+{
-+	nmi_ipi_callback = callback;
-+}
-+
-+void unset_nmi_ipi_callback(void)
-+{
-+	nmi_ipi_callback = dummy_nmi_callback;
-+}
-+
- void stop_nmi(void)
- {
- 	acpi_nmi_disable();
+@@ -405,7 +405,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+ 		 * Ok, so this is none of the documented NMI sources,
+ 		 * so it must be the NMI watchdog.
+ 		 */
+-		if (nmi_watchdog_tick(regs, reason))
++		if (nmi_watchdog_tick(regs, reason) +
++				do_nmi_show_regs(regs, cpu))
+ 			return;
+ 		if (!do_nmi_callback(regs, cpu))
+ 			unknown_nmi_error(reason, regs);
 diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
 index f379309..6c44e77 100644
 --- a/arch/x86/kernel/tsc_sync.c
@@ -4762,34 +6423,32 @@
  
  #else
 diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
-index 44203ff..2f26e57 100644
+index 44203ff..4288c77 100644
 --- a/drivers/char/sysrq.c
 +++ b/drivers/char/sysrq.c
-@@ -37,6 +37,8 @@
+@@ -37,7 +37,10 @@
  #include <linux/vt_kern.h>
  #include <linux/workqueue.h>
  #include <linux/hrtimer.h>
 +#include <linux/kallsyms.h>
 +#include <linux/slab.h>
  #include <linux/oom.h>
++#include <linux/nmi.h>
  
  #include <asm/ptrace.h>
-@@ -250,8 +252,14 @@ static struct sysrq_key_op sysrq_showallcpus_op = {
+ #include <asm/irq_regs.h>
+@@ -250,8 +253,8 @@ static struct sysrq_key_op sysrq_showallcpus_op = {
  static void sysrq_handle_showregs(int key, struct tty_struct *tty)
  {
  	struct pt_regs *regs = get_irq_regs();
+-	if (regs)
+-		show_regs(regs);
 +
-+	bust_spinlocks(1);
- 	if (regs)
- 		show_regs(regs);
-+	bust_spinlocks(0);
-+#if defined(__i386__) || defined(__x86_64__)
-+	smp_nmi_call_function(smp_show_regs, NULL, 1);
-+#endif
++	nmi_show_regs(regs, 0);
  	perf_event_print_debug();
  }
  static struct sysrq_key_op sysrq_showregs_op = {
-@@ -303,6 +311,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
+@@ -303,6 +306,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
  static void sysrq_handle_showmem(int key, struct tty_struct *tty)
  {
  	show_mem();
@@ -4797,7 +6456,7 @@
  }
  static struct sysrq_key_op sysrq_showmem_op = {
  	.handler	= sysrq_handle_showmem,
-@@ -318,7 +327,7 @@ static void send_sig_all(int sig)
+@@ -318,7 +322,7 @@ static void send_sig_all(int sig)
  {
  	struct task_struct *p;
  
@@ -4806,7 +6465,7 @@
  		if (p->mm && !is_global_init(p))
  			/* Not swapper, init nor kernel thread */
  			force_sig(sig, p);
-@@ -394,7 +403,267 @@ static struct sysrq_key_op sysrq_unrt_op = {
+@@ -394,7 +398,267 @@ static struct sysrq_key_op sysrq_unrt_op = {
  /* Key Operations table and lock */
  static DEFINE_SPINLOCK(sysrq_key_table_lock);
  
@@ -5075,7 +6734,7 @@
  	&sysrq_loglevel_op,		/* 0 */
  	&sysrq_loglevel_op,		/* 1 */
  	&sysrq_loglevel_op,		/* 2 */
-@@ -417,7 +686,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
+@@ -417,7 +681,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
  	&sysrq_term_op,			/* e */
  	&sysrq_moom_op,			/* f */
  	/* g: May be registered for the kernel debugger */
@@ -5087,7 +6746,7 @@
  	NULL,				/* h - reserved for help */
  	&sysrq_kill_op,			/* i */
  #ifdef CONFIG_BLOCK
-@@ -449,8 +722,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
+@@ -449,8 +717,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
  	/* y: May be registered on sparc64 for global register dump */
  	NULL,				/* y */
  	&sysrq_ftrace_dump_op,		/* z */
@@ -5099,7 +6758,7 @@
  /* key2index calculation, -1 on invalid index */
  static int sysrq_key_table_key2index(int key)
  {
-@@ -460,6 +736,10 @@ static int sysrq_key_table_key2index(int key)
+@@ -460,6 +731,10 @@ static int sysrq_key_table_key2index(int key)
  		retval = key - '0';
  	else if ((key >= 'a') && (key <= 'z'))
  		retval = key + 10 - 'a';
@@ -5110,7 +6769,7 @@
  	else
  		retval = -1;
  	return retval;
-@@ -470,21 +750,21 @@ static int sysrq_key_table_key2index(int key)
+@@ -470,21 +745,21 @@ static int sysrq_key_table_key2index(int key)
   */
  struct sysrq_key_op *__sysrq_get_key_op(int key)
  {
@@ -5139,7 +6798,7 @@
  }
  
  /*
-@@ -507,25 +787,25 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
+@@ -507,25 +782,25 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
  	 */
  	orig_log_level = console_loglevel;
  	console_loglevel = 7;
@@ -5171,7 +6830,7 @@
  			if (sysrq_key_table[i]) {
  				int j;
  
-@@ -555,7 +835,7 @@ void handle_sysrq(int key, struct tty_struct *tty)
+@@ -555,7 +830,7 @@ void handle_sysrq(int key, struct tty_struct *tty)
  EXPORT_SYMBOL(handle_sysrq);
  
  static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p,
@@ -5180,9 +6839,13 @@
  {
  
  	int retval;
-@@ -592,11 +872,16 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
+@@ -591,12 +866,29 @@ EXPORT_SYMBOL(unregister_sysrq_key);
+ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
  				   size_t count, loff_t *ppos)
  {
++	struct ve_struct *cur = get_exec_env();
++	static int pnum = 10;
++
  	if (count) {
 -		char c;
 +		int i, cnt;
@@ -5195,13 +6858,31 @@
 -		__handle_sysrq(c, NULL, 0);
 +
 +
-+		for (i = 0; i < cnt && c[i] != '\n'; i++)
++		for (i = 0; i < cnt && c[i] != '\n'; i++) {
++			if (!ve_is_super(cur))	{
++				if (!pnum)
++					continue;
++				printk("SysRq: CT#%u sent '%c' magic key.\n",
++						cur->veid, c[i]);
++				pnum--;
++				continue;
++			}
 +			__handle_sysrq(c[i], NULL, 0);
++		}
  	}
  	return count;
  }
+@@ -607,7 +899,7 @@ static const struct file_operations proc_sysrq_trigger_operations = {
+ 
+ static int __init sysrq_init(void)
+ {
+-	proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
++	proc_create("sysrq-trigger", S_IWUSR, &glob_proc_root, &proc_sysrq_trigger_operations);
+ 	return 0;
+ }
+ module_init(sysrq_init);
 diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
-index 05cab2c..f973a9f 100644
+index 53ffcfc..2571f59 100644
 --- a/drivers/char/tty_io.c
 +++ b/drivers/char/tty_io.c
 @@ -96,6 +96,8 @@
@@ -5287,7 +6968,7 @@
  {
  	struct tty_struct *tty;
  	int retval;
-@@ -1705,7 +1729,7 @@ void tty_release_dev(struct file *filp)
+@@ -1707,7 +1731,7 @@ void tty_release_dev(struct file *filp)
  
  static int __tty_open(struct inode *inode, struct file *filp)
  {
@@ -5296,7 +6977,7 @@
  	int noctty, retval;
  	struct tty_driver *driver;
  	int index;
-@@ -1729,6 +1753,7 @@ retry_open:
+@@ -1731,6 +1755,7 @@ retry_open:
  		}
  		driver = tty_driver_kref_get(tty->driver);
  		index = tty->index;
@@ -5304,7 +6985,7 @@
  		filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
  		/* noctty = 1; */
  		/* FIXME: Should we take a driver reference ? */
-@@ -1738,6 +1763,12 @@ retry_open:
+@@ -1740,6 +1765,12 @@ retry_open:
  #ifdef CONFIG_VT
  	if (device == MKDEV(TTY_MAJOR, 0)) {
  		extern struct tty_driver *console_driver;
@@ -5317,7 +6998,7 @@
  		driver = tty_driver_kref_get(console_driver);
  		index = fg_console;
  		noctty = 1;
-@@ -1746,6 +1777,12 @@ retry_open:
+@@ -1748,6 +1779,12 @@ retry_open:
  #endif
  	if (device == MKDEV(TTYAUX_MAJOR, 1)) {
  		struct tty_driver *console_driver = console_device(&index);
@@ -5330,7 +7011,7 @@
  		if (console_driver) {
  			driver = tty_driver_kref_get(console_driver);
  			if (driver) {
-@@ -1780,7 +1817,7 @@ got_driver:
+@@ -1782,7 +1819,7 @@ got_driver:
  		if (retval)
  			tty = ERR_PTR(retval);
  	} else
@@ -5339,7 +7020,7 @@
  
  	mutex_unlock(&tty_mutex);
  	tty_driver_kref_put(driver);
-@@ -2076,6 +2113,8 @@ static int tioccons(struct file *file)
+@@ -2078,6 +2115,8 @@ static int tioccons(struct file *file)
  {
  	if (!capable(CAP_SYS_ADMIN))
  		return -EPERM;
@@ -5348,7 +7029,7 @@
  	if (file->f_op->write == redirected_tty_write) {
  		struct file *f;
  		spin_lock(&redirect_lock);
-@@ -2656,7 +2695,7 @@ void __do_SAK(struct tty_struct *tty)
+@@ -2658,7 +2697,7 @@ void __do_SAK(struct tty_struct *tty)
  	/* Now kill any processes that happen to have the
  	 * tty open.
  	 */
@@ -5357,7 +7038,7 @@
  		if (p->signal->tty == tty) {
  			printk(KERN_NOTICE "SAK: killed process %d"
  			    " (%s): task_session(p)==tty->session\n",
-@@ -2688,7 +2727,7 @@ void __do_SAK(struct tty_struct *tty)
+@@ -2690,7 +2729,7 @@ void __do_SAK(struct tty_struct *tty)
  			spin_unlock(&p->files->file_lock);
  		}
  		task_unlock(p);
@@ -5366,7 +7047,7 @@
  	read_unlock(&tasklist_lock);
  #endif
  }
-@@ -2755,6 +2794,7 @@ void initialize_tty_struct(struct tty_struct *tty,
+@@ -2757,6 +2796,7 @@ void initialize_tty_struct(struct tty_struct *tty,
  	tty->ops = driver->ops;
  	tty->index = idx;
  	tty_line_name(driver, idx, tty->name);
@@ -5374,7 +7055,7 @@
  }
  
  /**
-@@ -2847,6 +2887,7 @@ struct tty_driver *alloc_tty_driver(int lines)
+@@ -2849,6 +2889,7 @@ struct tty_driver *alloc_tty_driver(int lines)
  		driver->magic = TTY_DRIVER_MAGIC;
  		driver->num = lines;
  		/* later we'll move allocation of tables here */
@@ -5382,7 +7063,7 @@
  	}
  	return driver;
  }
-@@ -2881,6 +2922,7 @@ static void destruct_tty_driver(struct kref *kref)
+@@ -2883,6 +2924,7 @@ static void destruct_tty_driver(struct kref *kref)
  		kfree(p);
  		cdev_del(&driver->cdev);
  	}
@@ -5390,7 +7071,7 @@
  	kfree(driver);
  }
  
-@@ -2955,6 +2997,7 @@ int tty_register_driver(struct tty_driver *driver)
+@@ -2957,6 +2999,7 @@ int tty_register_driver(struct tty_driver *driver)
  	}
  
  	mutex_lock(&tty_mutex);
@@ -5398,7 +7079,7 @@
  	list_add(&driver->tty_drivers, &tty_drivers);
  	mutex_unlock(&tty_mutex);
  
-@@ -3128,3 +3171,43 @@ static int __init tty_init(void)
+@@ -3130,3 +3173,43 @@ static int __init tty_init(void)
  	return 0;
  }
  module_init(tty_init);
@@ -6228,10 +7909,10 @@
  }
 diff --git a/drivers/net/venet_core.c b/drivers/net/venet_core.c
 new file mode 100644
-index 0000000..5aeb82b
+index 0000000..317fbb0
 --- /dev/null
 +++ b/drivers/net/venet_core.c
-@@ -0,0 +1,775 @@
+@@ -0,0 +1,864 @@
 +/*
 + *  venet_core.c
 + *
@@ -6321,6 +8002,86 @@
 +	return NULL;
 +}
 +
++struct ext_entry_struct *venet_ext_lookup(struct ve_struct *ve,
++		struct ve_addr_struct *addr)
++{
++	struct ext_entry_struct *entry;
++
++	if (ve->veip == NULL)
++		return NULL;
++
++	list_for_each_entry (entry, &ve->veip->ext_lh, list)
++		if (memcmp(&entry->addr, addr, sizeof(*addr)) == 0)
++			return entry;
++	return NULL;
++}
++
++int venet_ext_add(struct ve_struct *ve, struct ve_addr_struct *addr)
++{
++	struct ext_entry_struct *entry, *found;
++	int err;
++
++	if (ve->veip == NULL)
++		return -ENONET;
++
++	entry = kzalloc(sizeof(struct ext_entry_struct), GFP_KERNEL);
++	if (entry == NULL)
++		return -ENOMEM;
++
++	write_lock_irq(&veip_hash_lock);
++	err = -EADDRINUSE;
++	found = venet_ext_lookup(ve, addr);
++	if (found != NULL)
++		goto out_unlock;
++
++	entry->addr = *addr;
++	list_add(&entry->list, &ve->veip->ext_lh);
++	err = 0;
++	entry = NULL;
++out_unlock:
++	write_unlock_irq(&veip_hash_lock);
++	if (entry != NULL)
++		kfree(entry);
++	return err;
++}
++
++int venet_ext_del(struct ve_struct *ve, struct ve_addr_struct *addr)
++{
++	struct ext_entry_struct *found;
++	int err;
++
++	if (ve->veip == NULL)
++		return -ENONET;
++
++	err = -EADDRNOTAVAIL;
++	write_lock_irq(&veip_hash_lock);
++	found = venet_ext_lookup(ve, addr);
++	if (found == NULL)
++		goto out;
++
++	list_del(&found->list);
++	kfree(found);
++	err = 0;
++out:
++	write_unlock_irq(&veip_hash_lock);
++	return err;
++}
++
++void venet_ext_clean(struct ve_struct *ve)
++{
++	struct ext_entry_struct *entry, *tmp;
++
++	if (ve->veip == NULL)
++		return;
++
++	write_lock_irq(&veip_hash_lock);
++	list_for_each_entry_safe (entry, tmp, &ve->veip->ext_lh, list) {
++		list_del(&entry->list);
++		kfree(entry);
++	}
++	write_unlock_irq(&veip_hash_lock);
++}
++
 +struct veip_struct *veip_find(envid_t veid)
 +{
 +	struct veip_struct *ptr;
@@ -6348,6 +8109,7 @@
 +	INIT_LIST_HEAD(&ptr->ip_lh);
 +	INIT_LIST_HEAD(&ptr->src_lh);
 +	INIT_LIST_HEAD(&ptr->dst_lh);
++	INIT_LIST_HEAD(&ptr->ext_lh);
 +	ptr->veid = veid;
 +	list_add(&ptr->list, &veip_lh);
 +	return ptr;
@@ -6641,6 +8403,20 @@
 +	return venet_set_op(dev, data, ethtool_op_set_tx_csum);
 +}
 +
++static int
++venet_op_set_tso(struct net_device *dev, u32 data)
++{
++	if (!ve_is_super(get_exec_env()))
++		return -EPERM;
++
++	if (data)
++		common_features |= NETIF_F_TSO;
++	else
++		common_features &= ~NETIF_F_TSO;
++
++	return venet_set_op(dev, data, ethtool_op_set_tso);
++}
++
 +#define venet_op_set_rx_csum venet_op_set_tx_csum
 +
 +static struct ethtool_ops venet_ethtool_ops = {
@@ -6651,6 +8427,7 @@
 +	.get_rx_csum = ethtool_op_get_tx_csum,
 +	.set_rx_csum = venet_op_set_rx_csum,
 +	.get_tso = ethtool_op_get_tso,
++	.set_tso = venet_op_set_tso,
 +};
 +
 +static void venet_cpt(struct net_device *dev,
@@ -6685,15 +8462,10 @@
 +}
 +
 +#ifdef CONFIG_PROC_FS
-+static int veinfo_seq_show(struct seq_file *m, void *v)
++static void veaddr_seq_print(struct seq_file *m, struct ve_struct *ve)
 +{
-+	struct ve_struct *ve;
 +	struct ip_entry_struct *entry;
 +
-+	ve = list_entry((struct list_head *)v, struct ve_struct, ve_list);
-+
-+	seq_printf(m, "%10u %5u %5u", ve->veid,
-+                                ve->class_id, atomic_read(&ve->pcounter));
 +	read_lock(&veip_hash_lock);
 +	if (ve->veip == NULL)
 +		goto unlock;
@@ -6711,29 +8483,8 @@
 +	}
 +unlock:
 +	read_unlock(&veip_hash_lock);
-+	seq_putc(m, '\n');
-+	return 0;
-+}
-+
-+static struct seq_operations veinfo_seq_op = {
-+	.start	= ve_seq_start,
-+	.next	=  ve_seq_next,
-+	.stop	=  ve_seq_stop,
-+	.show	=  veinfo_seq_show,
-+};
-+
-+static int veinfo_open(struct inode *inode, struct file *file)
-+{
-+        return seq_open(file, &veinfo_seq_op);
 +}
 +
-+static struct file_operations proc_veinfo_operations = {
-+	.open		= veinfo_open,
-+	.read		= seq_read,
-+	.llseek		= seq_lseek,
-+	.release	= seq_release,
-+};
-+
 +static void *veip_seq_start(struct seq_file *m, loff_t *pos)
 +{
 +	loff_t l;
@@ -6804,7 +8555,7 @@
 +	struct ve_addr_struct addr;
 +
 +	err = -EPERM;
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		goto out;
 +
 +	err = sockaddr_to_veaddr(uaddr, addrlen, &addr);
@@ -6829,6 +8580,28 @@
 +		case VE_IP_DEL:
 +			err = veip_entry_del(veid, &addr);
 +			break;
++		case VE_IP_EXT_ADD:
++			ve = get_ve_by_id(veid);
++			err = -ESRCH;
++			if (!ve)
++				goto out;
++
++			down_read(&ve->op_sem);
++			err = venet_ext_add(ve, &addr);
++			up_read(&ve->op_sem);
++			put_ve(ve);
++			break;
++		case VE_IP_EXT_DEL:
++			ve = get_ve_by_id(veid);
++			err = -ESRCH;
++			if (!ve)
++				goto out;
++
++			down_read(&ve->op_sem);
++			err = venet_ext_del(ve, &addr);
++			up_read(&ve->op_sem);
++			put_ve(ve);
++			break;
 +		default:
 +			err = -EINVAL;
 +	}
@@ -6940,6 +8713,7 @@
 +	struct net_device *dev;
 +
 +	env = (struct ve_struct *)data;
++	venet_ext_clean(env);
 +	veip_stop(env);
 +
 +	dev = env->_venet_dev;
@@ -6976,11 +8750,6 @@
 +		return err;
 +
 +#ifdef CONFIG_PROC_FS
-+	de = proc_create("veinfo", S_IFREG | S_IRUSR, glob_proc_vz_dir,
-+			&proc_veinfo_operations);
-+	if (de == NULL)
-+		printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
-+
 +	de = proc_create("veip", S_IFREG | S_IRUSR, proc_vz_dir,
 +			&proc_veip_operations);
 +	if (de == NULL)
@@ -6989,17 +8758,18 @@
 +
 +	ve_hook_register(VE_SS_CHAIN, &venet_ve_hook);
 +	vzioctl_register(&venetcalls);
++	vzmon_register_veaddr_print_cb(veaddr_seq_print);
 +	return 0;
 +}
 +
 +__exit void venet_exit(void)
 +{
++	vzmon_unregister_veaddr_print_cb(veaddr_seq_print);
 +	vzioctl_unregister(&venetcalls);
 +	ve_hook_unregister(&venet_ve_hook);
 +
 +#ifdef CONFIG_PROC_FS
 +	remove_proc_entry("veip", proc_vz_dir);
-+	remove_proc_entry("veinfo", glob_proc_vz_dir);
 +#endif
 +	venet_stop(get_ve0());
 +	veip_cleanup();
@@ -7021,10 +8791,10 @@
  
 diff --git a/drivers/net/vzethdev.c b/drivers/net/vzethdev.c
 new file mode 100644
-index 0000000..e073e3e
+index 0000000..ed8ed97
 --- /dev/null
 +++ b/drivers/net/vzethdev.c
-@@ -0,0 +1,741 @@
+@@ -0,0 +1,749 @@
 +/*
 + *  veth.c
 + *
@@ -7278,6 +9048,7 @@
 +		stats->tx_bytes   += dev_stats->tx_bytes;
 +		stats->rx_packets += dev_stats->rx_packets;
 +		stats->tx_packets += dev_stats->tx_packets;
++		stats->tx_dropped += dev_stats->tx_dropped;
 +	}
 +
 +	return stats;
@@ -7418,6 +9189,12 @@
 +	return veth_set_op(dev, data, ethtool_op_set_tx_csum);
 +}
 +
++static int
++veth_op_set_tso(struct net_device *dev, u32 data)
++{
++	return veth_set_op(dev, data, ethtool_op_set_tso);
++}
++
 +#define veth_op_set_rx_csum veth_op_set_tx_csum
 +
 +static struct ethtool_ops veth_ethtool_ops = {
@@ -7428,6 +9205,7 @@
 +	.get_rx_csum = ethtool_op_get_tx_csum,
 +	.set_rx_csum = veth_op_set_rx_csum,
 +	.get_tso = ethtool_op_get_tso,
++	.set_tso = veth_op_set_tso,
 +};
 +
 +static void veth_cpt(struct net_device *dev,
@@ -7799,16 +9577,15 @@
  	starget->id = id;
  	starget->channel = channel;
 diff --git a/fs/Kconfig b/fs/Kconfig
-index 64d44ef..998c68e 100644
+index 64d44ef..f48e240 100644
 --- a/fs/Kconfig
 +++ b/fs/Kconfig
-@@ -63,6 +63,15 @@ source "fs/autofs/Kconfig"
+@@ -63,6 +63,14 @@ source "fs/autofs/Kconfig"
  source "fs/autofs4/Kconfig"
  source "fs/fuse/Kconfig"
  
 +config SIM_FS
 +	tristate "VPS filesystem"
-+	depends on VZ_QUOTA
 +	default m
 +	help
 +	  This file system is a part of Virtuozzo. It intoduces a fake
@@ -8202,11 +9979,257 @@
  	if (retval < 0) {
  		send_sig(SIGKILL, current, 0);
  		goto out;
+diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
+index c4e8353..8180165 100644
+--- a/fs/binfmt_misc.c
++++ b/fs/binfmt_misc.c
+@@ -28,6 +28,7 @@
+ #include <linux/mount.h>
+ #include <linux/syscalls.h>
+ #include <linux/fs.h>
++#include <linux/ve_proto.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -35,8 +36,15 @@ enum {
+ 	VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
+ };
+ 
++#ifdef CONFIG_VE
++#define bm_entries(ve)		((ve)->bm_entries)
++#define bm_enabled(ve)		((ve)->bm_enabled)
++#else
+ static LIST_HEAD(entries);
+ static int enabled = 1;
++#define bm_entries(ve)		(entries)
++#define bm_enabled(ve)		(enabled)
++#endif
+ 
+ enum {Enabled, Magic};
+ #define MISC_FMT_PRESERVE_ARGV0 (1<<31)
+@@ -56,21 +64,30 @@ typedef struct {
+ } Node;
+ 
+ static DEFINE_RWLOCK(entries_lock);
++#ifdef CONFIG_VE
++#define bm_fs_type(ve)		(*(ve)->bm_fs_type)
++#define bm_mnt(ve)		((ve)->bm_mnt)
++#define bm_entry_count(ve)	((ve)->bm_entry_count)
++#else
+ static struct file_system_type bm_fs_type;
+ static struct vfsmount *bm_mnt;
+ static int entry_count;
++#define bm_fs_type(ve)		(bm_fs_type)
++#define bm_mnt(ve)		(bm_mnt)
++#define bm_entry_count(ve)	(bm_entry_count)
++#endif
+ 
+ /* 
+  * Check if we support the binfmt
+  * if we do, return the node, else NULL
+  * locking is done in load_misc_binary
+  */
+-static Node *check_file(struct linux_binprm *bprm)
++static Node *check_file(struct ve_struct *ve, struct linux_binprm *bprm)
+ {
+ 	char *p = strrchr(bprm->interp, '.');
+ 	struct list_head *l;
+ 
+-	list_for_each(l, &entries) {
++	list_for_each(l, &bm_entries(ve)) {
+ 		Node *e = list_entry(l, Node, list);
+ 		char *s;
+ 		int j;
+@@ -111,9 +128,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+ 	char *iname_addr = iname;
+ 	int retval;
+ 	int fd_binary = -1;
++	struct ve_struct *ve = get_exec_env();
+ 
+ 	retval = -ENOEXEC;
+-	if (!enabled)
++	if (!bm_enabled(ve))
+ 		goto _ret;
+ 
+ 	retval = -ENOEXEC;
+@@ -122,7 +140,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+ 
+ 	/* to keep locking time low, we copy the interpreter string */
+ 	read_lock(&entries_lock);
+-	fmt = check_file(bprm);
++	fmt = check_file(ve, bprm);
+ 	if (fmt)
+ 		strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
+ 	read_unlock(&entries_lock);
+@@ -507,7 +525,7 @@ static void bm_clear_inode(struct inode *inode)
+ 	kfree(inode->i_private);
+ }
+ 
+-static void kill_node(Node *e)
++static void kill_node(struct ve_struct *ve, Node *e)
+ {
+ 	struct dentry *dentry;
+ 
+@@ -523,7 +541,7 @@ static void kill_node(Node *e)
+ 		dentry->d_inode->i_nlink--;
+ 		d_drop(dentry);
+ 		dput(dentry);
+-		simple_release_fs(&bm_mnt, &entry_count);
++		simple_release_fs(&bm_mnt(ve), &bm_entry_count(ve));
+ 	}
+ }
+ 
+@@ -562,7 +580,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
+ 		case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+ 			mutex_lock(&root->d_inode->i_mutex);
+ 
+-			kill_node(e);
++			kill_node(get_exec_env(), e);
+ 
+ 			mutex_unlock(&root->d_inode->i_mutex);
+ 			dput(root);
+@@ -587,6 +605,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
+ 	struct dentry *root, *dentry;
+ 	struct super_block *sb = file->f_path.mnt->mnt_sb;
+ 	int err = 0;
++	struct ve_struct *ve = get_exec_env();
+ 
+ 	e = create_entry(buffer, count);
+ 
+@@ -610,7 +629,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
+ 	if (!inode)
+ 		goto out2;
+ 
+-	err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count);
++	err = simple_pin_fs(&bm_fs_type(ve), &bm_mnt(ve), &bm_entry_count(ve));
+ 	if (err) {
+ 		iput(inode);
+ 		inode = NULL;
+@@ -623,7 +642,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
+ 
+ 	d_instantiate(dentry, inode);
+ 	write_lock(&entries_lock);
+-	list_add(&e->list, &entries);
++	list_add(&e->list, &bm_entries(ve));
+ 	write_unlock(&entries_lock);
+ 
+ 	err = 0;
+@@ -649,26 +668,31 @@ static const struct file_operations bm_register_operations = {
+ static ssize_t
+ bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+ {
+-	char *s = enabled ? "enabled\n" : "disabled\n";
++	struct ve_struct *ve = get_exec_env();
++	char *s = bm_enabled(ve) ? "enabled\n" : "disabled\n";
+ 
+ 	return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
+ }
+ 
++static void dm_genocide(struct ve_struct *ve)
++{
++	while (!list_empty(&bm_entries(ve)))
++		kill_node(ve, list_entry(bm_entries(ve).next, Node, list));
++}
++
+ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
+ 		size_t count, loff_t *ppos)
+ {
++	struct ve_struct *ve = get_exec_env();
+ 	int res = parse_command(buffer, count);
+ 	struct dentry *root;
+ 
+ 	switch (res) {
+-		case 1: enabled = 0; break;
+-		case 2: enabled = 1; break;
++		case 1: bm_enabled(ve) = 0; break;
++		case 2: bm_enabled(ve) = 1; break;
+ 		case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+ 			mutex_lock(&root->d_inode->i_mutex);
+-
+-			while (!list_empty(&entries))
+-				kill_node(list_entry(entries.next, Node, list));
+-
++			dm_genocide(ve);
+ 			mutex_unlock(&root->d_inode->i_mutex);
+ 			dput(root);
+ 		default: return res;
+@@ -719,6 +743,53 @@ static struct file_system_type bm_fs_type = {
+ 	.kill_sb	= kill_litter_super,
+ };
+ 
++#ifdef CONFIG_VE
++static void __ve_binfmt_init(struct ve_struct *ve, struct file_system_type *fs)
++{
++	ve->bm_fs_type = fs;
++	INIT_LIST_HEAD(&ve->bm_entries);
++	ve->bm_enabled = 1;
++	ve->bm_mnt = NULL;
++	ve->bm_entry_count = 0;
++}
++
++static int ve_binfmt_init(void *x)
++{
++	struct ve_struct *ve = x;
++	struct file_system_type *fs_type;
++	int err;
++
++	err = register_ve_fs_type(ve, &bm_fs_type, &fs_type, NULL);
++	if (err == 0)
++		__ve_binfmt_init(ve, fs_type);
++
++	return err;
++}
++
++static void ve_binfmt_fini(void *x)
++{
++	struct ve_struct *ve = x;
++
++	/*
++	 * no locks since exec_ve is dead and noone will
++	 * mess with bm_xxx fields any longer
++	 */
++	if (!ve->bm_fs_type)
++		return;
++	dm_genocide(ve);
++	unregister_ve_fs_type(ve->bm_fs_type, NULL);
++	kfree(ve->bm_fs_type);
++	ve->bm_fs_type = NULL;
++}
++
++static struct ve_hook ve_binfmt_hook = {
++	.init		= ve_binfmt_init,
++	.fini		= ve_binfmt_fini,
++	.priority	= HOOK_PRIO_FS,
++	.owner		= THIS_MODULE,
++};
++#endif
++
+ static int __init init_misc_binfmt(void)
+ {
+ 	int err = register_filesystem(&bm_fs_type);
+@@ -727,11 +798,17 @@ static int __init init_misc_binfmt(void)
+ 		if (err)
+ 			unregister_filesystem(&bm_fs_type);
+ 	}
++
++	if (!err) {
++		__ve_binfmt_init(get_ve0(), &bm_fs_type);
++		ve_hook_register(VE_SS_CHAIN, &ve_binfmt_hook);
++	}
+ 	return err;
+ }
+ 
+ static void __exit exit_misc_binfmt(void)
+ {
++	ve_hook_unregister(&ve_binfmt_hook);
+ 	unregister_binfmt(&misc_format);
+ 	unregister_filesystem(&bm_fs_type);
+ }
 diff --git a/fs/block_dev.c b/fs/block_dev.c
-index 34e2d20..b170595 100644
+index 9b9e3dc..fe0cca1 100644
 --- a/fs/block_dev.c
 +++ b/fs/block_dev.c
-@@ -1601,7 +1601,7 @@ int __invalidate_device(struct block_device *bdev)
+@@ -1602,7 +1602,7 @@ int __invalidate_device(struct block_device *bdev)
  		 * hold).
  		 */
  		shrink_dcache_sb(sb);
@@ -8240,7 +10263,7 @@
  }
  
 diff --git a/fs/compat.c b/fs/compat.c
-index 6c19040..204915d 100644
+index 6c19040..5141257 100644
 --- a/fs/compat.c
 +++ b/fs/compat.c
 @@ -26,6 +26,7 @@
@@ -8270,7 +10293,29 @@
  /*
   * Not all architectures have sys_utime, so implement this in terms
   * of sys_utimes.
-@@ -269,6 +282,8 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
+@@ -91,6 +104,21 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __
+ 	return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
+ }
+ 
++asmlinkage long compat_sys_lutime(char __user * filename,
++		struct compat_utimbuf __user *t)
++{
++	struct timespec tv[2];
++
++	if (t) {
++		if (get_user(tv[0].tv_sec, &t->actime) ||
++		    get_user(tv[1].tv_sec, &t->modtime))
++			return -EFAULT;
++		tv[0].tv_nsec = 0;
++		tv[1].tv_nsec = 0;
++	}
++	return do_utimes(AT_FDCWD, filename, t ? tv : NULL, AT_SYMLINK_NOFOLLOW);
++}
++
+ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, struct compat_timespec __user *t, int flags)
+ {
+ 	struct timespec tv[2];
+@@ -269,6 +297,8 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
  		struct kstatfs tmp;
  		error = vfs_statfs(path.dentry, &tmp);
  		if (!error)
@@ -8279,7 +10324,7 @@
  			error = put_compat_statfs(buf, &tmp);
  		path_put(&path);
  	}
-@@ -287,6 +302,8 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
+@@ -287,6 +317,8 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
  		goto out;
  	error = vfs_statfs(file->f_path.dentry, &tmp);
  	if (!error)
@@ -8288,7 +10333,7 @@
  		error = put_compat_statfs(buf, &tmp);
  	fput(file);
  out:
-@@ -337,6 +354,8 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
+@@ -337,6 +369,8 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
  		struct kstatfs tmp;
  		error = vfs_statfs(path.dentry, &tmp);
  		if (!error)
@@ -8297,7 +10342,7 @@
  			error = put_compat_statfs64(buf, &tmp);
  		path_put(&path);
  	}
-@@ -358,6 +377,8 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
+@@ -358,6 +392,8 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
  		goto out;
  	error = vfs_statfs(file->f_path.dentry, &tmp);
  	if (!error)
@@ -8306,7 +10351,7 @@
  		error = put_compat_statfs64(buf, &tmp);
  	fput(file);
  out:
-@@ -1469,6 +1490,10 @@ int compat_do_execve(char * filename,
+@@ -1469,6 +1505,10 @@ int compat_do_execve(char * filename,
  	bool clear_in_exec;
  	int retval;
  
@@ -8331,7 +10376,7 @@
  			current->comm, current->pid,
  			(int)fd, (unsigned int)cmd, buf,
 diff --git a/fs/dcache.c b/fs/dcache.c
-index a100fa3..48c4d04 100644
+index a100fa3..7fce87d 100644
 --- a/fs/dcache.c
 +++ b/fs/dcache.c
 @@ -26,6 +26,7 @@
@@ -8375,16 +10420,7 @@
  	/*drops the locks, at that point nobody can reach this dentry */
  	dentry_iput(dentry);
  	if (IS_ROOT(dentry))
-@@ -214,21 +223,31 @@ static struct dentry *d_kill(struct dentry *dentry)
- 
- void dput(struct dentry *dentry)
- {
-+	struct user_beancounter *ub;
-+	unsigned long d_ubsize;
-+
- 	if (!dentry)
- 		return;
- 
+@@ -220,15 +229,22 @@ void dput(struct dentry *dentry)
  repeat:
  	if (atomic_read(&dentry->d_count) == 1)
  		might_sleep();
@@ -8414,7 +10450,7 @@
  
  	/*
  	 * AV: ->d_delete() is _NOT_ allowed to block now.
-@@ -244,8 +263,12 @@ repeat:
+@@ -244,8 +260,12 @@ repeat:
    		dentry->d_flags |= DCACHE_REFERENCED;
  		dentry_lru_add(dentry);
    	}
@@ -8427,20 +10463,23 @@
  	return;
  
  unhash_it:
-@@ -253,9 +276,18 @@ unhash_it:
+@@ -253,9 +273,21 @@ unhash_it:
  kill_it:
  	/* if dentry was on the d_lru list delete it from there */
  	dentry_lru_del(dentry);
 +
-+	ub = dentry->dentry_bc.d_ub;
-+	d_ubsize = dentry->dentry_bc.d_ubsize;
- 	dentry = d_kill(dentry);
--	if (dentry)
-+	preempt_disable();
 +	if (unlikely(ub_dentry_on)) {
-+		uncharge_dcache(ub, d_ubsize);
++		struct user_beancounter *ub;
++
++		ub = dentry->dentry_bc.d_ub;
++		BUG_ON(!ub_dput_testzero(dentry));
++		uncharge_dcache(ub, dentry->dentry_bc.d_ubsize);
 +		put_beancounter(ub);
 +	}
++
+ 	dentry = d_kill(dentry);
+-	if (dentry)
++	preempt_disable();
 +	if (dentry) 
  		goto repeat;
 +	preempt_enable();
@@ -9482,10 +11521,10 @@
  
  /*
 diff --git a/fs/ext3/super.c b/fs/ext3/super.c
-index 427496c..a7a6210 100644
+index ca3068f..0c4978f 100644
 --- a/fs/ext3/super.c
 +++ b/fs/ext3/super.c
-@@ -2988,7 +2988,7 @@ static struct file_system_type ext3_fs_type = {
+@@ -2986,7 +2986,7 @@ static struct file_system_type ext3_fs_type = {
  	.name		= "ext3",
  	.get_sb		= ext3_get_sb,
  	.kill_sb	= kill_block_super,
@@ -9494,6 +11533,27 @@
  };
  
  static int __init init_ext3_fs(void)
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 16efcee..3833fe9 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5770,9 +5770,14 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ 	int ret = -EINVAL;
+ 	void *fsdata;
+ 	struct file *file = vma->vm_file;
+-	struct inode *inode = file->f_path.dentry->d_inode;
+-	struct address_space *mapping = inode->i_mapping;
++	struct inode *inode;
++	struct address_space *mapping;
++
++	if (file->f_op->get_host)
++		file = file->f_op->get_host(file);
+ 
++	inode = file->f_path.dentry->d_inode;
++	mapping = inode->i_mapping;
+ 	/*
+ 	 * Get i_alloc_sem to stop truncates messing with the inode. We cannot
+ 	 * get i_mutex because we are already holding mmap_sem.
 diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
 index b63d193..0ae6e52 100644
 --- a/fs/ext4/ioctl.c
@@ -10135,7 +12195,7 @@
  	fuse_sysfs_cleanup();
  	fuse_fs_cleanup();
 diff --git a/fs/inode.c b/fs/inode.c
-index 4d8e3be..5460538 100644
+index 4d8e3be..ab63b5f 100644
 --- a/fs/inode.c
 +++ b/fs/inode.c
 @@ -8,10 +8,13 @@
@@ -10364,8 +12424,17 @@
  		}
  	}
  #endif
+@@ -1258,7 +1339,7 @@ int generic_detach_inode(struct inode *inode)
+ 		if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ 			list_move(&inode->i_list, &inode_unused);
+ 		inodes_stat.nr_unused++;
+-		if (sb->s_flags & MS_ACTIVE) {
++		if (sb->s_flags & MS_ACTIVE && !(inode->i_flags & S_NOUNUSE)) {
+ 			spin_unlock(&inode_lock);
+ 			return 0;
+ 		}
 diff --git a/fs/ioprio.c b/fs/ioprio.c
-index c7c0b28..c14af3f 100644
+index c7c0b28..2a7e8ae 100644
 --- a/fs/ioprio.c
 +++ b/fs/ioprio.c
 @@ -26,6 +26,7 @@
@@ -10389,7 +12458,7 @@
  
  	switch (class) {
  		case IOPRIO_CLASS_RT:
-@@ -137,17 +141,23 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
+@@ -137,17 +141,25 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
  			if (!user)
  				break;
  
@@ -10407,15 +12476,17 @@
  				free_uid(user);
  			break;
 +		case IOPRIO_WHO_UBC:
-+			if (class != IOPRIO_CLASS_BE)
-+				return -ERANGE;
++			if (class != IOPRIO_CLASS_BE) {
++				ret = -ERANGE;
++				break;
++			}
 +
 +			ret = 0; /* bc_set_ioprio(who, data); */
 +			break;
  		default:
  			ret = -EINVAL;
  	}
-@@ -192,9 +202,9 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+@@ -192,9 +204,9 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
  {
  	struct task_struct *g, *p;
  	struct user_struct *user;
@@ -10426,7 +12497,7 @@
  
  	read_lock(&tasklist_lock);
  	switch (which) {
-@@ -230,7 +240,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+@@ -230,7 +242,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
  			if (!user)
  				break;
  
@@ -10435,7 +12506,7 @@
  				if (__task_cred(p)->uid != user->uid)
  					continue;
  				tmpio = get_task_ioprio(p);
-@@ -240,7 +250,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+@@ -240,7 +252,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
  					ret = tmpio;
  				else
  					ret = ioprio_best(ret, tmpio);
@@ -11147,7 +13218,7 @@
   		return 0;
   
 diff --git a/fs/namespace.c b/fs/namespace.c
-index bdc3cb4..2536eff 100644
+index bdc3cb4..d811360 100644
 --- a/fs/namespace.c
 +++ b/fs/namespace.c
 @@ -29,6 +29,7 @@
@@ -11182,7 +13253,24 @@
  		atomic_set(&mnt->mnt_count, 1);
  		INIT_LIST_HEAD(&mnt->mnt_hash);
  		INIT_LIST_HEAD(&mnt->mnt_child);
-@@ -629,6 +633,7 @@ repeat:
+@@ -517,7 +521,7 @@ static void commit_tree(struct vfsmount *mnt)
+ 	touch_mnt_namespace(n);
+ }
+ 
+-static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
++struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
+ {
+ 	struct list_head *next = p->mnt_mounts.next;
+ 	if (next == &p->mnt_mounts) {
+@@ -532,6 +536,7 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
+ 	}
+ 	return list_entry(next, struct vfsmount, mnt_child);
+ }
++EXPORT_SYMBOL(next_mnt);
+ 
+ static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
+ {
+@@ -629,6 +634,7 @@ repeat:
  		spin_unlock(&vfsmount_lock);
  		acct_auto_close_mnt(mnt);
  		security_sb_umount_close(mnt);
@@ -11190,7 +13278,7 @@
  		goto repeat;
  	}
  }
-@@ -789,15 +794,48 @@ static void show_type(struct seq_file *m, struct super_block *sb)
+@@ -789,15 +795,50 @@ static void show_type(struct seq_file *m, struct super_block *sb)
  	}
  }
  
@@ -11223,9 +13311,10 @@
 -	int err = 0;
 +	int err;
  	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-+	char *path_buf, *path;
- 
+-
 -	mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++	char *path_buf, *path;
++
 +	err = prepare_mnt_root_mangle(&mnt_path, &path_buf, &path);
 +	if (err < 0)
 +		return (err == -EACCES ? 0 : err);
@@ -11233,8 +13322,10 @@
 +	if (ve_is_super(get_exec_env()) ||
 +	    !(mnt->mnt_sb->s_type->fs_flags & FS_MANGLE_PROC))
 +		mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
-+	else
++	else {
++		seq_puts(m, "/dev/");
 +		mangle(m, mnt->mnt_sb->s_type->name);
++	}
  	seq_putc(m, ' ');
 -	seq_path(m, &mnt_path, " \t\n\\");
 +	mangle(m, path);
@@ -11242,7 +13333,7 @@
  	seq_putc(m, ' ');
  	show_type(m, mnt->mnt_sb);
  	seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
-@@ -884,18 +922,27 @@ static int show_vfsstat(struct seq_file *m, void *v)
+@@ -884,18 +925,27 @@ static int show_vfsstat(struct seq_file *m, void *v)
  {
  	struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
  	struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -11273,7 +13364,7 @@
  	seq_putc(m, ' ');
  
  	/* file system type */
-@@ -1107,6 +1154,34 @@ static int do_umount(struct vfsmount *mnt, int flags)
+@@ -1107,6 +1157,36 @@ static int do_umount(struct vfsmount *mnt, int flags)
  	return retval;
  }
  
@@ -11296,8 +13387,10 @@
 +	}
 +
 +	while (!list_empty(&kill)) {
++		LIST_HEAD(kill2);
 +		mnt = list_entry(kill.next, struct vfsmount, mnt_list);
-+		umount_tree(mnt, 1, &umount_list);
++		umount_tree(mnt, 1, &kill2);
++		list_splice(&kill2, &umount_list);
 +	}
 +	spin_unlock(&vfsmount_lock);
 +	up_write(&namespace_sem);
@@ -11308,7 +13401,7 @@
  /*
   * Now umount can handle mount points as well as block devices.
   * This is important for filesystems which use unnamed block devices.
-@@ -1130,7 +1205,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
+@@ -1130,7 +1210,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
  		goto dput_and_out;
  
  	retval = -EPERM;
@@ -11317,7 +13410,7 @@
  		goto dput_and_out;
  
  	retval = do_umount(path.mnt, flags);
-@@ -1156,7 +1231,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
+@@ -1156,7 +1236,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
  
  static int mount_is_safe(struct path *path)
  {
@@ -11326,7 +13419,7 @@
  		return 0;
  	return -EPERM;
  #ifdef notyet
-@@ -1425,6 +1500,8 @@ static int do_change_type(struct path *path, int flag)
+@@ -1425,6 +1505,8 @@ static int do_change_type(struct path *path, int flag)
  
  	if (path->dentry != path->mnt->mnt_root)
  		return -EINVAL;
@@ -11335,7 +13428,7 @@
  
  	down_write(&namespace_sem);
  	if (type == MS_SHARED) {
-@@ -1447,7 +1524,7 @@ static int do_change_type(struct path *path, int flag)
+@@ -1447,7 +1529,7 @@ static int do_change_type(struct path *path, int flag)
   * do loopback mount.
   */
  static int do_loopback(struct path *path, char *old_name,
@@ -11344,7 +13437,7 @@
  {
  	struct path old_path;
  	struct vfsmount *mnt = NULL;
-@@ -1477,6 +1554,7 @@ static int do_loopback(struct path *path, char *old_name,
+@@ -1477,6 +1559,7 @@ static int do_loopback(struct path *path, char *old_name,
  	if (!mnt)
  		goto out;
  
@@ -11352,7 +13445,7 @@
  	err = graft_tree(mnt, path);
  	if (err) {
  		LIST_HEAD(umount_list);
-@@ -1520,7 +1598,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
+@@ -1520,7 +1603,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
  	int err;
  	struct super_block *sb = path->mnt->mnt_sb;
  
@@ -11361,7 +13454,7 @@
  		return -EPERM;
  
  	if (!check_mnt(path->mnt))
-@@ -1529,6 +1607,9 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
+@@ -1529,6 +1612,9 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
  	if (path->dentry != path->mnt->mnt_root)
  		return -EINVAL;
  
@@ -11371,7 +13464,7 @@
  	down_write(&sb->s_umount);
  	if (flags & MS_BIND)
  		err = change_mount_flags(path->mnt, flags);
-@@ -1562,7 +1643,7 @@ static int do_move_mount(struct path *path, char *old_name)
+@@ -1562,7 +1648,7 @@ static int do_move_mount(struct path *path, char *old_name)
  	struct path old_path, parent_path;
  	struct vfsmount *p;
  	int err = 0;
@@ -11380,7 +13473,7 @@
  		return -EPERM;
  	if (!old_name || !*old_name)
  		return -EINVAL;
-@@ -1570,6 +1651,10 @@ static int do_move_mount(struct path *path, char *old_name)
+@@ -1570,6 +1656,10 @@ static int do_move_mount(struct path *path, char *old_name)
  	if (err)
  		return err;
  
@@ -11391,7 +13484,7 @@
  	down_write(&namespace_sem);
  	while (d_mountpoint(path->dentry) &&
  	       follow_down(path))
-@@ -1627,6 +1712,7 @@ out:
+@@ -1627,6 +1717,7 @@ out:
  	up_write(&namespace_sem);
  	if (!err)
  		path_put(&parent_path);
@@ -11399,7 +13492,7 @@
  	path_put(&old_path);
  	return err;
  }
-@@ -1644,7 +1730,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
+@@ -1644,7 +1735,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
  		return -EINVAL;
  
  	/* we need capabilities... */
@@ -11408,7 +13501,7 @@
  		return -EPERM;
  
  	lock_kernel();
-@@ -1685,6 +1771,11 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
+@@ -1685,6 +1776,11 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
  		goto unlock;
  
  	newmnt->mnt_flags = mnt_flags;
@@ -11420,7 +13513,7 @@
  	if ((err = graft_tree(newmnt, path)))
  		goto unlock;
  
-@@ -1959,7 +2050,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
+@@ -1959,7 +2055,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
  		retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
  				    data_page);
  	else if (flags & MS_BIND)
@@ -11429,7 +13522,7 @@
  	else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
  		retval = do_change_type(&path, flags);
  	else if (flags & MS_MOVE)
-@@ -2122,6 +2213,7 @@ out_dir:
+@@ -2122,6 +2218,7 @@ out_dir:
  out_type:
  	return ret;
  }
@@ -11437,7 +13530,7 @@
  
  /*
   * pivot_root Semantics:
-@@ -2281,7 +2373,7 @@ void __init mnt_init(void)
+@@ -2281,7 +2378,7 @@ void __init mnt_init(void)
  	init_rwsem(&namespace_sem);
  
  	mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
@@ -11447,7 +13540,7 @@
  	mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
  
 diff --git a/fs/nfs/client.c b/fs/nfs/client.c
-index 99ea196..986fe94 100644
+index 69d6a46..b9a8f89 100644
 --- a/fs/nfs/client.c
 +++ b/fs/nfs/client.c
 @@ -125,6 +125,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
@@ -11514,7 +13607,7 @@
  		if (clp->rpc_ops != data->rpc_ops)
  			continue;
 diff --git a/fs/nfs/super.c b/fs/nfs/super.c
-index 4bf23f6..79e65e4 100644
+index 4bf23f6..253438f 100644
 --- a/fs/nfs/super.c
 +++ b/fs/nfs/super.c
 @@ -53,6 +53,9 @@
@@ -11619,7 +13712,19 @@
  #ifdef CONFIG_NFS_V4
  	unregister_filesystem(&nfs4_fs_type);
  #endif
-@@ -2079,6 +2135,10 @@ static int nfs_compare_super(struct super_block *sb, void *data)
+@@ -1794,6 +1850,11 @@ static int nfs_validate_mount_data(void *options,
+ 		goto out_v3_not_compiled;
+ #endif /* !CONFIG_NFS_V3 */
+ 
++	if (!(args->flags & NFS_MOUNT_VER3)) {
++		printk("NFSv2 is broken and not supported\n");
++		return -EPROTONOSUPPORT;
++	}
++
+ 	return 0;
+ 
+ out_no_data:
+@@ -2079,6 +2140,10 @@ static int nfs_compare_super(struct super_block *sb, void *data)
  	struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
  	int mntflags = sb_mntdata->mntflags;
  
@@ -11630,7 +13735,7 @@
  	if (!nfs_compare_super_address(old, server))
  		return 0;
  	/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
-@@ -2107,6 +2167,11 @@ static int nfs_get_sb(struct file_system_type *fs_type,
+@@ -2107,6 +2172,11 @@ static int nfs_get_sb(struct file_system_type *fs_type,
  		.mntflags = flags,
  	};
  	int error = -ENOMEM;
@@ -11642,7 +13747,7 @@
  
  	data = nfs_alloc_parsed_mount_data(3);
  	mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
-@@ -2237,6 +2302,11 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+@@ -2237,6 +2307,11 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
  		.mntflags = flags,
  	};
  	int error;
@@ -11968,7 +14073,7 @@
  		goto path_put_and_out;
  
 diff --git a/fs/open.c b/fs/open.c
-index 4f01e06..23011b6 100644
+index 4f01e06..77f73fc 100644
 --- a/fs/open.c
 +++ b/fs/open.c
 @@ -25,6 +25,7 @@
@@ -12070,7 +14175,51 @@
  	if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
  		error = -EFAULT;
  	fput(file);
-@@ -707,6 +731,7 @@ out_release:
+@@ -630,14 +654,20 @@ out:
+ 	return err;
+ }
+ 
+-SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
++static int do_fchmodat(int dfd, const char __user *filename, mode_t mode, int flag)
+ {
+ 	struct path path;
+ 	struct inode *inode;
+ 	int error;
+ 	struct iattr newattrs;
++	int follow;
+ 
+-	error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
++	error = -EINVAL;
++	if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
++		goto out;
++
++	follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
++	error = user_path_at(dfd, filename, follow, &path);
+ 	if (error)
+ 		goto out;
+ 	inode = path.dentry->d_inode;
+@@ -659,9 +689,19 @@ out:
+ 	return error;
+ }
+ 
++SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
++{
++	return do_fchmodat(dfd, filename, mode, 0);
++}
++
+ SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
+ {
+-	return sys_fchmodat(AT_FDCWD, filename, mode);
++	return do_fchmodat(AT_FDCWD, filename, mode, 0);
++}
++
++SYSCALL_DEFINE2(lchmod, const char __user *, filename, mode_t, mode)
++{
++	return do_fchmodat(AT_FDCWD, filename, mode, AT_SYMLINK_NOFOLLOW);
+ }
+ 
+ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
+@@ -707,6 +747,7 @@ out_release:
  out:
  	return error;
  }
@@ -12078,7 +14227,7 @@
  
  SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
  		gid_t, group, int, flag)
-@@ -948,6 +973,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+@@ -948,6 +989,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
  	return filp;
  }
  
@@ -12086,7 +14235,7 @@
  /*
   * dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
   * error.
-@@ -972,6 +998,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
+@@ -972,6 +1014,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
  		return ERR_PTR(-EINVAL);
  	}
  
@@ -12096,7 +14245,7 @@
  	error = -ENFILE;
  	f = get_empty_filp();
  	if (f == NULL) {
-@@ -1062,6 +1091,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
+@@ -1062,6 +1107,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
  	asmlinkage_protect(3, ret, filename, flags, mode);
  	return ret;
  }
@@ -12405,10 +14554,18 @@
  		mmput(mm);
  	return 0;
 diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 6d71c67..de26c5c 100644
+index 13b0378..eb8a70f 100644
 --- a/fs/proc/base.c
 +++ b/fs/proc/base.c
-@@ -156,10 +156,14 @@ static int get_fs_path(struct task_struct *task, struct path *path, bool root)
+@@ -49,6 +49,7 @@
+ 
+ #include <asm/uaccess.h>
+ 
++#include <linux/module.h>
+ #include <linux/errno.h>
+ #include <linux/time.h>
+ #include <linux/proc_fs.h>
+@@ -156,10 +157,14 @@ static int get_fs_path(struct task_struct *task, struct path *path, bool root)
  	fs = task->fs;
  	if (fs) {
  		read_lock(&fs->lock);
@@ -12426,7 +14583,7 @@
  	}
  	task_unlock(task);
  	return result;
-@@ -549,17 +553,31 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
+@@ -550,17 +555,31 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
  static int proc_fd_access_allowed(struct inode *inode)
  {
  	struct task_struct *task;
@@ -12461,7 +14618,7 @@
  }
  
  static int proc_setattr(struct dentry *dentry, struct iattr *attr)
-@@ -1038,6 +1056,8 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
+@@ -1039,6 +1058,8 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
  	if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
  	     oom_adjust != OOM_DISABLE)
  		return -EINVAL;
@@ -12470,7 +14627,7 @@
  
  	task = get_proc_task(file->f_path.dentry->d_inode);
  	if (!task)
-@@ -1294,6 +1314,7 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+@@ -1295,6 +1316,7 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
  	mm->exe_file = new_exe_file;
  	mm->num_exe_file_vmas = 0;
  }
@@ -12478,7 +14635,7 @@
  
  struct file *get_mm_exe_file(struct mm_struct *mm)
  {
-@@ -1332,10 +1353,15 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
+@@ -1333,10 +1355,15 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
  	exe_file = get_mm_exe_file(mm);
  	mmput(mm);
  	if (exe_file) {
@@ -12497,7 +14654,7 @@
  	} else
  		return -ENOENT;
  }
-@@ -1343,13 +1369,14 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
+@@ -1344,13 +1371,14 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
  static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
  {
  	struct inode *inode = dentry->d_inode;
@@ -12514,7 +14671,7 @@
  		goto out;
  
  	error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
-@@ -1384,12 +1411,13 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
+@@ -1385,12 +1413,13 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
  
  static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
  {
@@ -12530,7 +14687,7 @@
  		goto out;
  
  	error = PROC_I(inode)->op.proc_get_link(inode, &path);
-@@ -1640,6 +1668,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+@@ -1641,6 +1670,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
  	struct files_struct *files = NULL;
  	struct file *file;
  	int fd = proc_fd(inode);
@@ -12538,7 +14695,7 @@
  
  	if (task) {
  		files = get_files_struct(task);
-@@ -1652,7 +1681,8 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+@@ -1653,7 +1683,8 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
  		 */
  		spin_lock(&files->file_lock);
  		file = fcheck_files(files, fd);
@@ -12548,7 +14705,7 @@
  			if (path) {
  				*path = file->f_path;
  				path_get(&file->f_path);
-@@ -1670,7 +1700,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+@@ -1671,7 +1702,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
  		spin_unlock(&files->file_lock);
  		put_files_struct(files);
  	}
@@ -12557,7 +14714,7 @@
  }
  
  static int proc_fd_link(struct inode *inode, struct path *path)
-@@ -2457,7 +2487,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+@@ -2458,7 +2489,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
  		struct task_struct *t = task;
  
  		task_io_accounting_add(&acct, &task->signal->ioac);
@@ -12566,6 +14723,42 @@
  			task_io_accounting_add(&acct, &t->ioac);
  
  		unlock_task_sighand(task, &flags);
+@@ -3161,3 +3192,35 @@ static const struct file_operations proc_task_operations = {
+ 	.read		= generic_read_dir,
+ 	.readdir	= proc_task_readdir,
+ };
++
++/* Check whether dentry belongs to a task that already died */
++int proc_dentry_of_dead_task(struct dentry *dentry)
++{
++	if (dentry->d_inode->i_fop == &dummy_proc_pid_file_operations)
++		return 1;
++
++	return (dentry->d_op == &pid_dentry_operations &&
++		 proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first == NULL);
++}
++EXPORT_SYMBOL(proc_dentry_of_dead_task);
++
++/* Place it here to avoid use vzrst module count */
++static ssize_t dummy_proc_pid_read(struct file * file, char __user * buf,
++				 size_t count, loff_t *ppos)
++{
++	return -ESRCH;
++}
++
++static ssize_t dummy_proc_pid_write(struct file * file, const char * buf,
++				  size_t count, loff_t *ppos)
++{
++	return -ESRCH;
++}
++
++struct file_operations dummy_proc_pid_file_operations = {
++	.read		= dummy_proc_pid_read,
++	.write		= dummy_proc_pid_write,
++};
++
++EXPORT_SYMBOL(dummy_proc_pid_file_operations);
++
 diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
 index 82676e3..2ad657d 100644
 --- a/fs/proc/cmdline.c
@@ -12606,8 +14799,39 @@
  	return 0;
  }
  module_init(proc_cpuinfo_init);
+diff --git a/fs/proc/devices.c b/fs/proc/devices.c
+index 59ee7da..d485f24 100644
+--- a/fs/proc/devices.c
++++ b/fs/proc/devices.c
+@@ -2,6 +2,7 @@
+ #include <linux/init.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
++#include <linux/sched.h>
+ 
+ static int devinfo_show(struct seq_file *f, void *v)
+ {
+@@ -25,6 +26,9 @@ static int devinfo_show(struct seq_file *f, void *v)
+ 
+ static void *devinfo_start(struct seq_file *f, loff_t *pos)
+ {
++	if (!ve_is_super(get_exec_env()))
++		return NULL;
++
+ 	if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+ 		return pos;
+ 	return NULL;
+@@ -64,7 +68,7 @@ static const struct file_operations proc_devinfo_operations = {
+ 
+ static int __init proc_devices_init(void)
+ {
+-	proc_create("devices", 0, NULL, &proc_devinfo_operations);
++	proc_create("devices", 0, &glob_proc_root, &proc_devinfo_operations);
+ 	return 0;
+ }
+ module_init(proc_devices_init);
 diff --git a/fs/proc/generic.c b/fs/proc/generic.c
-index fa678ab..56f268b 100644
+index fa678ab..a66517d 100644
 --- a/fs/proc/generic.c
 +++ b/fs/proc/generic.c
 @@ -255,6 +255,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -12637,7 +14861,33 @@
  out:
  	return error;
  }
-@@ -411,28 +418,60 @@ static const struct dentry_operations proc_dentry_operations =
+@@ -274,11 +281,22 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ 			struct kstat *stat)
+ {
+ 	struct inode *inode = dentry->d_inode;
+-	struct proc_dir_entry *de = PROC_I(inode)->pde;
+-	if (de && de->nlink)
+-		inode->i_nlink = de->nlink;
++	struct proc_dir_entry *de = PDE(inode);
++	struct proc_dir_entry *lde = LPDE(inode);
+ 
+ 	generic_fillattr(inode, stat);
++
++	if (de && de->nlink)
++		stat->nlink = de->nlink;
++	/* if dentry is found in both trees and it is a directory
++	 * then inode's nlink count must be altered, because local
++	 * and global subtrees may differ.
++	 * on the other hand, they may intersect, so actual nlink
++	 * value is difficult to calculate - upper estimate is used
++	 * instead of it.
++	 */
++	if (lde && lde != de && lde->nlink > 1)
++		stat->nlink += lde->nlink - 2;
+ 	return 0;
+ }
+ 
+@@ -411,28 +429,60 @@ static const struct dentry_operations proc_dentry_operations =
  	.d_delete	= proc_delete_dentry,
  };
  
@@ -12705,7 +14955,7 @@
  			goto out_unlock;
  		}
  	}
-@@ -446,13 +485,15 @@ out_unlock:
+@@ -446,13 +496,15 @@ out_unlock:
  	}
  	if (de)
  		de_put(de);
@@ -12722,7 +14972,7 @@
  }
  
  /*
-@@ -464,13 +505,14 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
+@@ -464,13 +516,14 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
   * value of the readdir() call, as long as it's non-negative
   * for success..
   */
@@ -12739,7 +14989,7 @@
  
  	ino = inode->i_ino;
  	i = filp->f_pos;
-@@ -491,25 +533,19 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+@@ -491,25 +544,19 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
  			/* fall through */
  		default:
  			spin_lock(&proc_subdir_lock);
@@ -12774,7 +15024,7 @@
  				spin_unlock(&proc_subdir_lock);
  				if (filldir(dirent, de->name, de->namelen, filp->f_pos,
  					    de->low_ino, de->mode >> 12) < 0) {
-@@ -518,10 +554,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+@@ -518,10 +565,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
  				}
  				spin_lock(&proc_subdir_lock);
  				filp->f_pos++;
@@ -12793,7 +15043,7 @@
  			spin_unlock(&proc_subdir_lock);
  	}
  	ret = 1;
-@@ -533,7 +576,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
+@@ -533,7 +587,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
  {
  	struct inode *inode = filp->f_path.dentry->d_inode;
  
@@ -13111,7 +15361,7 @@
  }
  
 diff --git a/fs/proc/root.c b/fs/proc/root.c
-index b080b79..39e1923 100644
+index b080b79..36f59af 100644
 --- a/fs/proc/root.c
 +++ b/fs/proc/root.c
 @@ -42,6 +42,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
@@ -13176,7 +15426,28 @@
  #if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
  	/* just give it a mountpoint */
  	proc_mkdir("openprom", NULL);
-@@ -205,6 +219,22 @@ struct proc_dir_entry proc_root = {
+@@ -141,8 +155,19 @@ void __init proc_root_init(void)
+ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
+ )
+ {
++	struct ve_struct *ve = get_exec_env();
++
+ 	generic_fillattr(dentry->d_inode, stat);
+-	stat->nlink = proc_root.nlink + nr_processes();
++	stat->nlink = glob_proc_root.nlink;
++	if (ve_is_super(ve))
++		stat->nlink += nr_processes();
++#ifdef CONFIG_VE
++	else
++		/* thread count. not really processes count */
++		stat->nlink += atomic_read(&ve->pcounter);
++	/* the same logic as in the proc_getattr */
++	stat->nlink += ve->proc_root->nlink - 2;
++#endif
+ 	return 0;
+ }
+ 
+@@ -205,6 +230,22 @@ struct proc_dir_entry proc_root = {
  	.parent		= &proc_root,
  };
  
@@ -13407,7 +15678,7 @@
 +
 +obj-y				+= vzdquota/
 diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
-index 2ed79a9..acfde60 100644
+index 4fdb0eb..e7aff07 100644
 --- a/fs/quota/dquot.c
 +++ b/fs/quota/dquot.c
 @@ -170,8 +170,9 @@ static struct quota_format_type *find_quota_format(int id)
@@ -13423,7 +15694,7 @@
  	if (!actqf || !try_module_get(actqf->qf_owner)) {
  		int qm;
 diff --git a/fs/quota/quota.c b/fs/quota/quota.c
-index 95c5b42..41a6f18 100644
+index 95c5b42..7d9d4b4 100644
 --- a/fs/quota/quota.c
 +++ b/fs/quota/quota.c
 @@ -18,6 +18,7 @@
@@ -13501,7 +15772,7 @@
  	sb = get_super(bdev);
  	bdput(bdev);
  	if (!sb)
-@@ -379,6 +390,215 @@ static struct super_block *quotactl_block(const char __user *special)
+@@ -379,6 +390,231 @@ static struct super_block *quotactl_block(const char __user *special)
  #endif
  }
  
@@ -13534,6 +15805,21 @@
 +	__kernel_time_t dqb_itime;
 +};
 +
++#ifdef CONFIG_COMPAT
++
++struct compat_compat_dqblk {
++	compat_uint_t	dqb_ihardlimit;
++	compat_uint_t	dqb_isoftlimit;
++	compat_uint_t	dqb_curinodes;
++	compat_uint_t	dqb_bhardlimit;
++	compat_uint_t	dqb_bsoftlimit;
++	compat_u64	dqb_curspace;
++	compat_time_t	dqb_btime;
++	compat_time_t	dqb_itime;
++};
++
++#endif
++
 +struct compat_dqinfo {
 +	unsigned int dqi_bgrace;
 +	unsigned int dqi_igrace;
@@ -13556,6 +15842,7 @@
 +};
 +
 +asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
++
 +static long compat_quotactl(unsigned int cmds, unsigned int type,
 +		const char __user *special, qid_t id,
 +		void __user *addr)
@@ -13717,7 +16004,7 @@
  /*
   * This is the system call interface. This communicates with
   * the user-level programs. Currently this only supports diskquota
-@@ -395,6 +615,11 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
+@@ -395,6 +631,11 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
  	cmds = cmd >> SUBCMDSHIFT;
  	type = cmd & SUBCMDMASK;
  
@@ -13729,6 +16016,62 @@
  	if (cmds != Q_SYNC || special) {
  		sb = quotactl_block(special);
  		if (IS_ERR(sb))
+@@ -459,6 +700,11 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+ 	compat_uint_t data;
+ 	u16 xdata;
+ 	long ret;
++#ifdef CONFIG_QUOTA_COMPAT
++	struct compat_dqblk __user *cdq;
++	struct compat_compat_dqblk __user *compat_cdq;
++	compat_time_t time;
++#endif
+ 
+ 	cmds = cmd >> SUBCMDSHIFT;
+ 
+@@ -519,6 +765,43 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+ 			break;
+ 		ret = 0;
+ 		break;
++#ifdef CONFIG_QUOTA_COMPAT
++	case QC_GETQUOTA:
++		cdq = compat_alloc_user_space(sizeof(struct compat_dqblk));
++		compat_cdq = addr;
++		ret = sys_quotactl(cmd, special, id, cdq);
++		if (ret)
++			break;
++		ret = -EFAULT;
++		if (copy_in_user(compat_cdq, cdq, sizeof(struct compat_compat_dqblk) -
++				offsetof(struct compat_compat_dqblk, dqb_curspace)) ||
++			copy_in_user(&compat_cdq->dqb_curspace, &cdq->dqb_curspace,
++				sizeof(cdq->dqb_curspace)) ||
++			get_user(time, &cdq->dqb_btime) ||
++			put_user(time, &compat_cdq->dqb_btime) ||
++			get_user(time, &cdq->dqb_itime) ||
++			put_user(time, &compat_cdq->dqb_itime))
++			break;
++		ret = 0;
++		break;
++	case QC_SETQUOTA:
++	case QC_SETUSE:
++	case QC_SETQLIM:
++		cdq = compat_alloc_user_space(sizeof(struct compat_dqblk));
++		compat_cdq = addr;
++		ret = -EFAULT;
++		if (copy_in_user(cdq, compat_cdq, sizeof(struct compat_compat_dqblk) -
++				offsetof(struct compat_compat_dqblk, dqb_curspace)) ||
++			copy_in_user(&cdq->dqb_curspace, &compat_cdq->dqb_curspace,
++				sizeof(cdq->dqb_curspace)) ||
++			get_user(time, &compat_cdq->dqb_btime) ||
++			put_user(time, &cdq->dqb_btime) ||
++			get_user(time, &compat_cdq->dqb_itime) ||
++			put_user(time, &cdq->dqb_itime))
++			break;
++		ret = sys_quotactl(cmd, special, id, cdq);
++		break;
++#endif
+ 	default:
+ 		ret = sys_quotactl(cmd, special, id, addr);
+ 	}
 diff --git a/fs/quota/vzdquota/Makefile b/fs/quota/vzdquota/Makefile
 new file mode 100644
 index 0000000..03fdee3
@@ -13741,10 +16084,10 @@
 +vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
 diff --git a/fs/quota/vzdquota/vzdq_file.c b/fs/quota/vzdquota/vzdq_file.c
 new file mode 100644
-index 0000000..0355917
+index 0000000..3ac9f05
 --- /dev/null
 +++ b/fs/quota/vzdquota/vzdq_file.c
-@@ -0,0 +1,928 @@
+@@ -0,0 +1,956 @@
 +/*
 + *
 + * Copyright (C) 2005 SWsoft
@@ -13783,12 +16126,12 @@
 + * File read operation
 + *
 + * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
-+ * perhaps) abuse vz_quota_sem.
-+ * Taking a global semaphore for lengthy and user-controlled operations inside
++ * perhaps) abuse vz_quota_mutex.
++ * Taking a global mutex for lengthy and user-controlled operations inside
 + * VPSs is not a good idea in general.
-+ * In this case, the reasons for taking this semaphore are completely unclear,
++ * In this case, the reasons for taking this mutex are completely unclear,
 + * especially taking into account that the only function that has comments
-+ * about the necessity to be called under this semaphore
++ * about the necessity to be called under this mutex
 + * (create_proc_quotafile) is actually called OUTSIDE it.
 + *
 + * --------------------------------------------------------------------- */
@@ -13817,7 +16160,7 @@
 +	int			type;	/* type of the tree */
 +};
 +
-+/* serialized by vz_quota_sem */
++/* serialized by vz_quota_mutex */
 +static LIST_HEAD(qf_data_head);
 +
 +static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
@@ -14054,8 +16397,8 @@
 +		return -ENOMEM;
 +
 +	qtd = data;
-+	down(&vz_quota_sem);
-+	down(&qtd->qmblk->dq_sem);
++	mutex_lock(&vz_quota_mutex);
++	mutex_lock(&qtd->qmblk->dq_mutex);
 +
 +	res = 0;
 +	tree = QUGID_TREE(qtd->qmblk, qtd->type);
@@ -14094,8 +16437,8 @@
 +out_err:
 +	*start += count;
 +out_dq:
-+	up(&qtd->qmblk->dq_sem);
-+	up(&vz_quota_sem);
++	mutex_unlock(&qtd->qmblk->dq_mutex);
++	mutex_unlock(&vz_quota_mutex);
 +	kfree(tmp);
 +
 +	return res;
@@ -14617,6 +16960,33 @@
 +	return ERR_PTR(-ENOENT);
 +}
 +
++static int vzdq_aquotd_getattr(struct vfsmount *mnt, struct dentry *dentry,
++		struct kstat *stat)
++{
++	struct ve_struct *ve, *old_ve;
++	struct list_head mntlist, *pos;
++
++	generic_fillattr(dentry->d_inode, stat);
++	ve = dentry->d_sb->s_type->owner_env;
++#ifdef CONFIG_VE
++	/*
++	 * The only reason of disabling getattr for the host system is that
++	 * this getattr can be slow and CPU consuming with large number of VPSs
++	 * (or just mount points).
++	 */
++	if (ve_is_super(ve))
++		return 0;
++#endif
++	INIT_LIST_HEAD(&mntlist);
++	old_ve = set_exec_env(ve);
++	if (!vzdq_aquot_buildmntlist(ve, &mntlist))
++		list_for_each(pos, &mntlist)
++			stat->nlink++;
++	vzdq_aquot_releasemntlist(ve, &mntlist);
++	(void)set_exec_env(old_ve);
++	return 0;
++}
++
 +static struct file_operations vzdq_aquotd_file_operations = {
 +	.read		= &generic_read_dir,
 +	.readdir	= &vzdq_aquotd_readdir,
@@ -14624,6 +16994,7 @@
 +
 +static struct inode_operations vzdq_aquotd_inode_operations = {
 +	.lookup		= &vzdq_aquotd_lookup,
++	.getattr	= &vzdq_aquotd_getattr,
 +};
 +
 +
@@ -14675,7 +17046,7 @@
 +}
 diff --git a/fs/quota/vzdquota/vzdq_mgmt.c b/fs/quota/vzdquota/vzdq_mgmt.c
 new file mode 100644
-index 0000000..5e078ed
+index 0000000..bd066de
 --- /dev/null
 +++ b/fs/quota/vzdquota/vzdq_mgmt.c
 @@ -0,0 +1,754 @@
@@ -14764,7 +17135,7 @@
 +	struct vz_quota_stat qstat;
 +	struct vz_quota_master *qmblk;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -EFAULT;
 +	if (!compat) {
@@ -14792,7 +17163,7 @@
 +	if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
 +		err = PTR_ERR(qmblk);
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -14816,7 +17187,7 @@
 +	struct super_block *dqsb;
 +
 +	dqsb = NULL;
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -14856,7 +17227,7 @@
 +		goto out_init;
 +	qmblk->dq_state = VZDQ_WORKING;
 +
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	return 0;
 +
 +out_init:
@@ -14871,7 +17242,7 @@
 +out:
 +	if (dqsb)
 +		vzquota_put_super(dqsb);
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	return err;
 +}
 +
@@ -14889,7 +17260,7 @@
 +	struct vz_quota_master *qmblk;
 +	struct path root;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -14907,14 +17278,14 @@
 +
 +	if (qmblk->dq_sb)
 +		vzquota_put_super(qmblk->dq_sb);
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	qmblk_put(qmblk);
 +	path_put(&root);
 +	return 0;
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	return err;
 +}
 +
@@ -14992,7 +17363,7 @@
 +	int err, ret;
 +	struct vz_quota_master *qmblk;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -15015,7 +17386,7 @@
 +	/* vzquota_destroy will free resources */
 +	qmblk->dq_state = VZDQ_STOPING;
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -15090,7 +17461,7 @@
 +	struct vz_quota_stat qstat;
 +	struct vz_quota_master *qmblk;
 +
-+	down(&vz_quota_sem); /* for hash list protection */
++	mutex_lock(&vz_quota_mutex); /* for hash list protection */
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -15118,7 +17489,7 @@
 +	qmblk_data_write_unlock(qmblk);
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	return err;
 +}
 +
@@ -15133,7 +17504,7 @@
 +	struct vz_quota_stat qstat;
 +	struct vz_quota_master *qmblk;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -15160,7 +17531,7 @@
 +		err = -EFAULT;
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	return err;
 +}
 +
@@ -15354,7 +17725,7 @@
 +		p += len;
 +	}
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	/* traverse master hash table for all records */
 +	for (i = 0; i < vzquota_hash_size; i++) {
@@ -15395,7 +17766,7 @@
 +
 +	*eof = 1; /* checked all hash */
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	len = 0;
 +	if (*start != NULL) {
@@ -15435,10 +17806,10 @@
 +#endif
 diff --git a/fs/quota/vzdquota/vzdq_ops.c b/fs/quota/vzdquota/vzdq_ops.c
 new file mode 100644
-index 0000000..e22d573
+index 0000000..904ff5e
 --- /dev/null
 +++ b/fs/quota/vzdquota/vzdq_ops.c
-@@ -0,0 +1,632 @@
+@@ -0,0 +1,644 @@
 +/*
 + * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
 + * All rights reserved.
@@ -15942,11 +18313,13 @@
 +		 * of vzquota.
 +		 *
 +		 * To be safe, we reacquire vzquota lock.
++		 * The assumption is that it would not hurt to call
++		 * vzquota_inode_drop() more than once, but it must
++		 * be called at least once after S_NOQUOTA is set.
 +		 */
 +		inode_qmblk_lock(inode->i_sb);
 +		inode->i_flags |= S_NOQUOTA;
 +		inode_qmblk_unlock(inode->i_sb);
-+		return;
 +	} else {
 +		loff_t bytes = inode_get_bytes(inode);
 +#ifdef CONFIG_VZ_QUOTA_UGID
@@ -15969,9 +18342,8 @@
 +#endif
 +
 +		vzquota_data_unlock(inode, &data);
-+
-+		vzquota_inode_drop_call(inode);
 +	}
++	vzquota_inode_drop_call(inode);
 +}
 +
 +
@@ -16035,6 +18407,12 @@
 +		NO_QUOTA : QUOTA_OK;
 +}
 +
++static void vzquota_swap_inode(struct inode *inode, struct inode *tmpl)
++{
++	vzquota_inode_swap_call(inode, tmpl);
++}
++
++
 +#else /* CONFIG_VZ_QUOTA_UGID */
 +
 +static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
@@ -16058,6 +18436,8 @@
 +		NO_QUOTA : QUOTA_OK;
 +}
 +
++extern void vzquota_shutdown_super(struct super_block *sb);
++
 +/*
 + * Structure of superblock diskquota operations.
 + */
@@ -16070,6 +18450,9 @@
 +	.free_inode	= vzquota_free_inode,
 +	.transfer	= vzquota_transfer,
 +	.rename		= vzquota_rename,
++
++	.swap_inode	= vzquota_swap_inode,
++	.shutdown	= vzquota_shutdown_super,
 +};
 diff --git a/fs/quota/vzdquota/vzdq_tree.c b/fs/quota/vzdquota/vzdq_tree.c
 new file mode 100644
@@ -16365,10 +18748,10 @@
 +}
 diff --git a/fs/quota/vzdquota/vzdq_ugid.c b/fs/quota/vzdquota/vzdq_ugid.c
 new file mode 100644
-index 0000000..60e0981
+index 0000000..a3e9e8c
 --- /dev/null
 +++ b/fs/quota/vzdquota/vzdq_ugid.c
-@@ -0,0 +1,1220 @@
+@@ -0,0 +1,1216 @@
 +/*
 + * Copyright (C) 2002 SWsoft
 + * All rights reserved.
@@ -16410,10 +18793,6 @@
 +
 +static struct kmem_cache *vz_quota_ugid_cachep;
 +
-+/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
-+ * list on the hash table */
-+extern struct semaphore vz_quota_sem;
-+
 +inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
 +{
 +	if (qugid != VZ_QUOTA_UGBAD)
@@ -16436,7 +18815,7 @@
 +
 +/*
 + * destroy ugid, if it have zero refcount, limits and usage
-+ * must be called under qmblk->dq_sem
++ * must be called under qmblk->dq_mutex
 + */
 +void vzquota_put_ugid(struct vz_quota_master *qmblk,
 +		struct vz_quota_ugid *qugid)
@@ -16483,7 +18862,7 @@
 +}
 +
 +/*
-+ * requires dq_sem
++ * requires dq_mutex
 + */
 +struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
 +			unsigned int quota_id, int type, int flags)
@@ -16541,16 +18920,16 @@
 +}
 +
 +/*
-+ * takes dq_sem, may schedule
++ * takes dq_mutex, may schedule
 + */
 +struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
 +			unsigned int quota_id, int type, int flags)
 +{
 +	struct vz_quota_ugid *qugid;
 +
-+	down(&qmblk->dq_sem);
++	mutex_lock(&qmblk->dq_mutex);
 +	qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
-+	up(&qmblk->dq_sem);
++	mutex_unlock(&qmblk->dq_mutex);
 +
 +	return qugid;
 +}
@@ -16705,7 +19084,7 @@
 +	if (err < 0)
 +		goto out_put;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +	mask2 = 0;
 +	sb->dq_op = &vz_quota_operations2;
 +	sb->s_qcop = &vz_quotactl_operations;
@@ -16724,7 +19103,7 @@
 +			DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED, type);
 +
 +out_sem:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +out_put:
 +	qmblk_put(qmblk);
 +out:
@@ -16738,7 +19117,7 @@
 +	int err;
 +
 +	qmblk = vzquota_find_qmblk(sb);
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +	err = -ESRCH;
 +	if (qmblk == NULL)
 +		goto out;
@@ -16759,7 +19138,7 @@
 +	err = 0;
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
 +		qmblk_put(qmblk);
 +	return err;
@@ -16778,7 +19157,7 @@
 +	int err;
 +
 +	qmblk = vzquota_find_qmblk(sb);
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +	err = -ESRCH;
 +	if (qmblk == NULL)
 +		goto out;
@@ -16807,13 +19186,13 @@
 +	}
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
 +		qmblk_put(qmblk);
 +	return err;
 +}
 +
-+/* must be called under vz_quota_sem */
++/* must be called under vz_quota_mutex */
 +static int __vz_set_dqblk(struct vz_quota_master *qmblk,
 +		int type, qid_t id, struct if_dqblk *di)
 +{
@@ -16882,7 +19261,7 @@
 +	int err;
 +
 +	qmblk = vzquota_find_qmblk(sb);
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +	err = -ESRCH;
 +	if (qmblk == NULL)
 +		goto out;
@@ -16891,7 +19270,7 @@
 +		goto out;
 +	err = __vz_set_dqblk(qmblk, type, id, di);
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
 +		qmblk_put(qmblk);
 +	return err;
@@ -16904,7 +19283,7 @@
 +	int err;
 +
 +	qmblk = vzquota_find_qmblk(sb);
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +	err = -ESRCH;
 +	if (qmblk == NULL)
 +		goto out;
@@ -16919,13 +19298,13 @@
 +	ii->dqi_valid = IIF_ALL;
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
 +		qmblk_put(qmblk);
 +	return err;
 +}
 +
-+/* must be called under vz_quota_sem */
++/* must be called under vz_quota_mutex */
 +static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
 +		int type, struct if_dqinfo *ii)
 +{
@@ -16947,7 +19326,7 @@
 +	int err;
 +
 +	qmblk = vzquota_find_qmblk(sb);
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +	err = -ESRCH;
 +	if (qmblk == NULL)
 +		goto out;
@@ -16956,7 +19335,7 @@
 +		goto out;
 +	err = __vz_set_dqinfo(qmblk, type, ii);
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
 +		qmblk_put(qmblk);
 +	return err;
@@ -17003,8 +19382,8 @@
 +	if (!kbuf)
 +		goto out;
 +
-+	down(&vz_quota_sem);
-+	down(&qmblk->dq_sem);
++	mutex_lock(&vz_quota_mutex);
++	mutex_lock(&qmblk->dq_mutex);
 +	for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
 +		ugid != NULL && count < Q_GETQUOTI_SIZE;
 +		count++)
@@ -17019,8 +19398,8 @@
 +		ugid = vzquota_get_next(qmblk, ugid);
 +		BUG_ON(ugid != NULL && ugid->qugid_type != type);
 +	}
-+	up(&qmblk->dq_sem);
-+	up(&vz_quota_sem);
++	mutex_unlock(&qmblk->dq_mutex);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	err = count;
 +	if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
@@ -17060,7 +19439,7 @@
 +	struct vz_quota_master *qmblk;
 +	int ret;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	ret = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -17114,7 +19493,7 @@
 +		vzquota_put_ugid(qmblk, ugid);
 +	}
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return ret;
 +}
@@ -17127,7 +19506,7 @@
 +	struct dq_info *target;
 +	int err, type;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -17161,7 +19540,7 @@
 +		target->iexpire = dq_info[type].iexpire;
 +	}
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -17230,16 +19609,16 @@
 +	if (k_ugid_buf == NULL)
 +		return -ENOMEM;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
 +	if (qmblk == NULL)
 +		goto out;
 +
-+	down(&qmblk->dq_sem);
++	mutex_lock(&qmblk->dq_mutex);
 +	err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
-+	up(&qmblk->dq_sem);
++	mutex_unlock(&qmblk->dq_mutex);
 +	if (err < 0)
 +		goto out;
 +
@@ -17265,7 +19644,7 @@
 +	}
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	vfree(k_ugid_buf);
 +	return err;
 +}
@@ -17278,7 +19657,7 @@
 +	struct dq_info *target;
 +	int err, type;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -17307,7 +19686,7 @@
 +#endif
 +	}
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -17319,7 +19698,7 @@
 +	struct vz_quota_ugid_stat kinfo;
 +	int err;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -17334,7 +19713,7 @@
 +	if (copy_to_user(info, &kinfo, sizeof(kinfo)))
 +		err = -EFAULT;
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -17346,7 +19725,7 @@
 +	struct vz_quota_ugid_stat kinfo;
 +	int err;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ENOENT;
 +	qmblk = vzquota_find_master(quota_id);
@@ -17366,7 +19745,7 @@
 +	}		
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -17378,7 +19757,7 @@
 +	struct vz_quota_ugid_setlimit lim;
 +	int err;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ESRCH;
 +	qmblk = vzquota_find_master(quota_id);
@@ -17392,7 +19771,7 @@
 +	err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -17404,7 +19783,7 @@
 +	struct vz_quota_ugid_setinfo info;
 +	int err;
 +
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +
 +	err = -ESRCH;
 +	qmblk = vzquota_find_master(quota_id);
@@ -17418,7 +19797,7 @@
 +	err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
 +
 +out:
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +
 +	return err;
 +}
@@ -17509,14 +19888,14 @@
 +	qmblk = vzquota_find_qmblk(sb);
 +	if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
 +		return;
-+	down(&vz_quota_sem);
++	mutex_lock(&vz_quota_mutex);
 +	if (qmblk->dq_flags & VZDQ_USRQUOTA)
 +		sb->s_dquot.flags |= dquot_state_flag(DQUOT_USAGE_ENABLED |
 +				DQUOT_LIMITS_ENABLED, USRQUOTA);
 +	if (qmblk->dq_flags & VZDQ_GRPQUOTA)
 +		sb->s_dquot.flags |= dquot_state_flag(DQUOT_USAGE_ENABLED |
 +				DQUOT_LIMITS_ENABLED, GRPQUOTA);
-+	up(&vz_quota_sem);
++	mutex_unlock(&vz_quota_mutex);
 +	qmblk_put(qmblk);
 +}
 +
@@ -17591,10 +19970,10 @@
 +}
 diff --git a/fs/quota/vzdquota/vzdquot.c b/fs/quota/vzdquota/vzdquot.c
 new file mode 100644
-index 0000000..6f2f22a
+index 0000000..f091943
 --- /dev/null
 +++ b/fs/quota/vzdquota/vzdquot.c
-@@ -0,0 +1,1961 @@
+@@ -0,0 +1,1994 @@
 +/*
 + * Copyright (C) 2001, 2002, 2004, 2005  SWsoft
 + * All rights reserved.
@@ -17639,7 +20018,7 @@
 + * Serializes on/off and all other do_vzquotactl operations.
 + * Protects qmblk hash.
 + */
-+struct semaphore vz_quota_sem;
++struct mutex vz_quota_mutex;
 +
 +/*
 + * Data access locks
@@ -17703,7 +20082,7 @@
 + *
 + * Master hash table handling.
 + *
-+ * SMP not safe, serialied by vz_quota_sem within quota syscalls
++ * SMP not safe, serialied by vz_quota_mutex within quota syscalls
 + *
 + * --------------------------------------------------------------------- */
 +
@@ -17757,7 +20136,7 @@
 +#endif
 +
 +	qmblk->dq_state = VZDQ_STARTING;
-+	init_MUTEX(&qmblk->dq_sem);
++	mutex_init(&qmblk->dq_mutex);
 +	spin_lock_init(&qmblk->dq_data_lock);
 +
 +	qmblk->dq_id = quota_id;
@@ -17811,7 +20190,7 @@
 + * vzquota_find_master - find master record with given id
 + *
 + * Returns qmblk without touching its refcounter.
-+ * Called under vz_quota_sem.
++ * Called under vz_quota_mutex.
 + */
 +struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
 +{
@@ -17830,7 +20209,7 @@
 + * vzquota_free_master - release resources taken by qmblk, freeing memory
 + *
 + * qmblk is assumed to be already taken out from the hash.
-+ * Should be called outside vz_quota_sem.
++ * Should be called outside vz_quota_mutex.
 + */
 +void vzquota_free_master(struct vz_quota_master *qmblk)
 +{
@@ -17912,7 +20291,7 @@
 + * quotas.  We keep a counter of such subtrees and set VZ quota operations or
 + * reset the default ones.
 + *
-+ * Called under vz_quota_sem (from quota_on).
++ * Called under vz_quota_mutex (from quota_on).
 + */
 +int vzquota_get_super(struct super_block *sb)
 +{
@@ -17954,7 +20333,7 @@
 +		__module_get(THIS_MODULE);
 +		up(&sb->s_dquot.dqonoff_sem);
 +	}
-+	/* protected by vz_quota_sem */
++	/* protected by vz_quota_mutex */
 +	__VZ_QUOTA_SBREF(sb)++;
 +	return 0;
 +}
@@ -17962,7 +20341,7 @@
 +/**
 + * quota_put_super - release superblock when one quota tree goes away
 + *
-+ * Called under vz_quota_sem.
++ * Called under vz_quota_mutex.
 + */
 +void vzquota_put_super(struct super_block *sb)
 +{
@@ -18004,28 +20383,17 @@
 +
 +#else
 +
-+struct vzquota_new_sop {
-+	struct super_operations new_op;
-+	const struct super_operations *old_op;
-+};
-+
 +/**
 + * vzquota_shutdown_super - callback on umount
 + */
 +void vzquota_shutdown_super(struct super_block *sb)
 +{
 +	struct vz_quota_master *qmblk;
-+	struct vzquota_new_sop *sop;
 +
 +	qmblk = __VZ_QUOTA_NOQUOTA(sb);
 +	__VZ_QUOTA_NOQUOTA(sb) = NULL;
 +	if (qmblk != NULL)
 +		qmblk_put(qmblk);
-+	sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
-+	sb->s_op = sop->old_op;
-+	kfree(sop);
-+	if (sb->s_op->put_super != NULL)
-+		(*sb->s_op->put_super)(sb);
 +}
 +
 +/**
@@ -18034,12 +20402,11 @@
 + * One superblock can have multiple directory subtrees with different VZ
 + * quotas.
 + *
-+ * Called under vz_quota_sem (from vzquota_on).
++ * Called under vz_quota_mutex (from vzquota_on).
 + */
 +int vzquota_get_super(struct super_block *sb)
 +{
 +	struct vz_quota_master *qnew;
-+	struct vzquota_new_sop *sop;
 +	int err;
 +
 +	mutex_lock(&sb->s_dquot.dqonoff_mutex);
@@ -18059,17 +20426,6 @@
 +	}
 +
 +	if (sb->dq_op != &vz_quota_operations) {
-+		sop = kmalloc(sizeof(*sop), GFP_KERNEL);
-+		if (sop == NULL) {
-+			vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
-+			__VZ_QUOTA_NOQUOTA(sb) = NULL;
-+			goto out_up;
-+		}
-+		memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
-+		sop->new_op.put_super = &vzquota_shutdown_super;
-+		sop->old_op = sb->s_op;
-+		sb->s_op = &sop->new_op;
-+
 +		sb->dq_op = &vz_quota_operations;
 +#ifdef CONFIG_VZ_QUOTA_UGID
 +		sb->s_qcop = &vz_quotactl_operations;
@@ -18115,7 +20471,7 @@
 +/**
 + * vzquota_put_super - one quota tree less on this superblock
 + *
-+ * Called under vz_quota_sem.
++ * Called under vz_quota_mutex.
 + */
 +void vzquota_put_super(struct super_block *sb)
 +{
@@ -18194,12 +20550,12 @@
 +		quid = qlnk->qugid[USRQUOTA];
 +		qgid = qlnk->qugid[GRPQUOTA];
 +		if (quid != NULL || qgid != NULL) {
-+			down(&qmblk->dq_sem);
++			mutex_lock(&qmblk->dq_mutex);
 +			if (qgid != NULL)
 +				vzquota_put_ugid(qmblk, qgid);
 +			if (quid != NULL)
 +				vzquota_put_ugid(qmblk, quid);
-+			up(&qmblk->dq_sem);
++			mutex_unlock(&qmblk->dq_mutex);
 +		}
 +	}
 +#endif
@@ -18315,10 +20671,10 @@
 +		spin_unlock(&dcache_lock);
 +		inode_qmblk_unlock(inode->i_sb);
 +
-+		down(&qmblk->dq_sem);
++		mutex_lock(&qmblk->dq_mutex);
 +		quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
 +		qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
-+		up(&qmblk->dq_sem);
++		mutex_unlock(&qmblk->dq_mutex);
 +
 +		inode_qmblk_lock(inode->i_sb);
 +		spin_lock(&dcache_lock);
@@ -18361,14 +20717,14 @@
 +		qmblk_data_write_unlock(qmblk);
 +		inode_qmblk_unlock(inode->i_sb);
 +
-+		down(&qmblk->dq_sem);
++		mutex_lock(&qmblk->dq_mutex);
 +		if (mask & (1 << USRQUOTA))
 +			quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
 +					USRQUOTA, 0);
 +		if (mask & (1 << GRPQUOTA))
 +			qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
 +					GRPQUOTA, 0);
-+		up(&qmblk->dq_sem);
++		mutex_unlock(&qmblk->dq_mutex);
 +
 +		inode_qmblk_lock(inode->i_sb);
 +		qmblk_data_write_lock(qmblk);
@@ -18529,6 +20885,29 @@
 +	return qmblk;
 +}
 +
++/* NFS root is disconnected dentry. */
++
++static int is_nfs_root(struct inode * inode)
++{
++	struct dentry *de;
++
++	if (inode->i_sb->s_magic != 0x6969)
++		return 0;
++
++	if (list_empty(&inode->i_dentry))
++		return 0;
++
++	list_for_each_entry(de, &inode->i_dentry, d_alias) {
++		if (de->d_parent != de)
++			return 0;
++		if (d_unhashed(de))
++			return 0;
++		if (!(de->d_flags & DCACHE_DISCONNECTED))
++			return 0;
++	}
++	return 1;
++}
++
 +static void vzquota_dbranch_actualize(struct inode *inode,
 +		struct inode *refinode)
 +{
@@ -18539,7 +20918,7 @@
 +	vzquota_qlnk_init(&qlnk);
 +
 +start:
-+	if (inode == inode->i_sb->s_root->d_inode) {
++	if (inode == inode->i_sb->s_root->d_inode || is_nfs_root(inode)) {
 +		/* filesystem root */
 +		atomic_inc(&inode->i_count);
 +		do {
@@ -18594,7 +20973,7 @@
 +	struct inode *pinode;
 +	struct vz_quota_master *qmblk;
 +
-+	if (inode == inode->i_sb->s_root->d_inode) {
++	if (inode == inode->i_sb->s_root->d_inode || is_nfs_root(inode)) {
 +		/* filesystem root */
 +		do {
 +			qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
@@ -18859,6 +21238,39 @@
 +	spin_unlock(&dcache_lock);
 +}
 +
++void vzquota_inode_swap_call(struct inode *inode, struct inode *tmpl)
++{
++	struct vz_quota_master *qmblk;
++
++	__vzquota_inode_init(inode, VZ_QUOTAO_INIT);
++
++	might_sleep();
++
++	inode_qmblk_lock(tmpl->i_sb);
++	if (unlikely(tmpl->i_flags & S_NOQUOTA)) {
++		inode_qmblk_unlock(tmpl->i_sb);
++		return;
++	}
++	__vzquota_inode_init(tmpl, VZ_QUOTAO_INICAL);
++
++	qmblk = INODE_QLNK(tmpl)->qmblk;
++	if (qmblk != VZ_QUOTA_BAD) {
++		void * uq;
++		list_del_init(&INODE_QLNK(tmpl)->list);
++		vzquota_qlnk_swap(INODE_QLNK(tmpl), INODE_QLNK(inode));
++		uq = inode->i_dquot[USRQUOTA];
++		inode->i_dquot[USRQUOTA] = tmpl->i_dquot[USRQUOTA];
++		tmpl->i_dquot[USRQUOTA] = uq;
++		tmpl->i_flags |= S_NOQUOTA;
++		inode_qmblk_unlock(inode->i_sb);
++
++		vzquota_inode_drop(tmpl);
++	} else {
++		inode_qmblk_unlock(tmpl->i_sb);
++	}
++}
++
++
 +/**
 + * vzquota_inode_drop_call - call from DQUOT_DROP
 + */
@@ -19513,7 +21925,7 @@
 +		goto out_ugid;
 +#endif
 +
-+	init_MUTEX(&vz_quota_sem);
++	mutex_init(&vz_quota_mutex);
 +	vzioctl_register(&vzdqcalls);
 +	virtinfo_notifier_register(VITYPE_QUOTA, &quota_notifier_block);
 +#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
@@ -19893,10 +22305,10 @@
  		size_t, sizemask)
 diff --git a/fs/simfs.c b/fs/simfs.c
 new file mode 100644
-index 0000000..2fccd6d
+index 0000000..e21f911
 --- /dev/null
 +++ b/fs/simfs.c
-@@ -0,0 +1,335 @@
+@@ -0,0 +1,339 @@
 +/*
 + *  fs/simfs.c
 + *
@@ -20032,7 +22444,7 @@
 +
 +	err = -ENOSYS;
 +	if (lsb && lsb->s_op && lsb->s_op->statfs)
-+		err = lsb->s_op->statfs(lsb->s_root, &statbuf);
++		err = lsb->s_op->statfs(sb->s_root, &statbuf);
 +	if (err)
 +		return err;
 +
@@ -20074,10 +22486,12 @@
 +	return (err ? NOTIFY_BAD : NOTIFY_OK);
 +}
 +
++#ifdef CONFIG_QUOTA
 +static struct inode *sim_quota_root(struct super_block *sb)
 +{
 +	return sb->s_root->d_inode;
 +}
++#endif
 +
 +/*
 + * NOTE: We need to setup s_bdev field on super block, since sys_quotactl()
@@ -20124,7 +22538,9 @@
 +}
 +
 +static struct super_operations sim_super_ops = {
++#ifdef CONFIG_QUOTA
 +	.get_quota_root	= sim_quota_root,
++#endif
 +};
 +
 +static int sim_fill_super(struct super_block *s, void *data)
@@ -20277,7 +22693,7 @@
  		return inode->i_op->getattr(mnt, dentry, stat);
  
 diff --git a/fs/super.c b/fs/super.c
-index aff046b..a2e26f4 100644
+index aff046b..cce99ab 100644
 --- a/fs/super.c
 +++ b/fs/super.c
 @@ -37,12 +37,15 @@
@@ -20314,7 +22730,13 @@
  		/*
  		 * sget() can have s_umount recursion.
  		 *
-@@ -311,7 +316,7 @@ void generic_shutdown_super(struct super_block *sb)
+@@ -307,11 +312,13 @@ void generic_shutdown_super(struct super_block *sb)
+ 		/* bad name - it should be evict_inodes() */
+ 		invalidate_inodes(sb);
+ 
++		if (sb->dq_op && sb->dq_op->shutdown)
++			sb->dq_op->shutdown(sb);
+ 		if (sop->put_super)
  			sop->put_super(sb);
  
  		/* Forget any remaining inodes */
@@ -20323,7 +22745,7 @@
  			printk("VFS: Busy inodes after unmount of %s. "
  			   "Self-destruct in 5 seconds.  Have a nice day...\n",
  			   sb->s_id);
-@@ -531,17 +536,26 @@ rescan:
+@@ -531,17 +538,26 @@ rescan:
  	spin_unlock(&sb_lock);
  	return NULL;
  }
@@ -20354,7 +22776,7 @@
  	err = vfs_statfs(s->s_root, &sbuf);
  	drop_super(s);
  	if (err)
-@@ -653,6 +667,13 @@ static DEFINE_IDA(unnamed_dev_ida);
+@@ -653,6 +669,13 @@ static DEFINE_IDA(unnamed_dev_ida);
  static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
  static int unnamed_dev_start = 0; /* don't bother trying below it */
  
@@ -20368,7 +22790,7 @@
  int set_anon_super(struct super_block *s, void *data)
  {
  	int dev;
-@@ -672,7 +693,7 @@ int set_anon_super(struct super_block *s, void *data)
+@@ -672,7 +695,7 @@ int set_anon_super(struct super_block *s, void *data)
  	else if (error)
  		return -EAGAIN;
  
@@ -20377,7 +22799,7 @@
  		spin_lock(&unnamed_dev_lock);
  		ida_remove(&unnamed_dev_ida, dev);
  		if (unnamed_dev_start > dev)
-@@ -680,7 +701,7 @@ int set_anon_super(struct super_block *s, void *data)
+@@ -680,7 +703,7 @@ int set_anon_super(struct super_block *s, void *data)
  		spin_unlock(&unnamed_dev_lock);
  		return -EMFILE;
  	}
@@ -20386,7 +22808,7 @@
  	return 0;
  }
  
-@@ -688,8 +709,9 @@ EXPORT_SYMBOL(set_anon_super);
+@@ -688,8 +711,9 @@ EXPORT_SYMBOL(set_anon_super);
  
  void kill_anon_super(struct super_block *sb)
  {
@@ -20856,6 +23278,51 @@
  extern struct kmem_cache *sysfs_dir_cachep;
  
  /*
+diff --git a/fs/utimes.c b/fs/utimes.c
+index e4c75db..86a62a1 100644
+--- a/fs/utimes.c
++++ b/fs/utimes.c
+@@ -40,6 +40,20 @@ SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
+ 
+ #endif
+ 
++SYSCALL_DEFINE2(lutime, char __user *, filename, struct utimbuf __user *, times)
++{
++	struct timespec tv[2];
++
++	if (times) {
++		if (get_user(tv[0].tv_sec, &times->actime) ||
++		    get_user(tv[1].tv_sec, &times->modtime))
++			return -EFAULT;
++		tv[0].tv_nsec = 0;
++		tv[1].tv_nsec = 0;
++	}
++	return do_utimes(AT_FDCWD, filename, times ? tv : NULL, AT_SYMLINK_NOFOLLOW);
++}
++
+ static bool nsec_valid(long nsec)
+ {
+ 	if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
+diff --git a/fs/xattr.c b/fs/xattr.c
+index 6d4f6d3..3243bd7 100644
+--- a/fs/xattr.c
++++ b/fs/xattr.c
+@@ -115,6 +115,15 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ 	struct inode *inode = dentry->d_inode;
+ 	int error;
+ 
++#if defined(CONFIG_VE) && defined(CONFIG_SYSCTL)
++	if (!ve_is_super(get_exec_env())) {
++		if (ve_xattr_policy == VE_XATTR_POLICY_IGNORE)
++			return 0;
++		else if (ve_xattr_policy == VE_XATTR_POLICY_REJECT)
++			return -EPERM;
++	}
++#endif
++
+ 	error = xattr_permission(inode, name, MAY_WRITE);
+ 	if (error)
+ 		return error;
 diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
 index 32c8bd6..cb151a4 100644
 --- a/include/asm-generic/mman.h
@@ -20870,10 +23337,10 @@
  #define MCL_FUTURE	2		/* lock all future mappings */
 diff --git a/include/bc/beancounter.h b/include/bc/beancounter.h
 new file mode 100644
-index 0000000..aabbc72
+index 0000000..7ba4c77
 --- /dev/null
 +++ b/include/bc/beancounter.h
-@@ -0,0 +1,454 @@
+@@ -0,0 +1,453 @@
 +/*
 + *  include/bc/beancounter.h
 + *
@@ -20953,12 +23420,12 @@
 +/* Add new resources here */
 +
 +#define UB_NUMXTENT	23
-+#define UB_RESOURCES	24
++#define UB_SWAPPAGES	24
++#define UB_RESOURCES	25
 +
 +#define UB_UNUSEDPRIVVM	(UB_RESOURCES + 0)
 +#define UB_TMPFSPAGES	(UB_RESOURCES + 1)
-+#define UB_SWAPPAGES	(UB_RESOURCES + 2)
-+#define UB_HELDPAGES	(UB_RESOURCES + 3)
++#define UB_HELDPAGES	(UB_RESOURCES + 2)
 +
 +struct ubparm {
 +	/* 
@@ -21017,7 +23484,7 @@
 +struct page_private {
 +	unsigned long		ubp_unused_privvmpages;
 +	unsigned long		ubp_tmpfs_respages;
-+	unsigned long		ubp_swap_pages;
++	unsigned long		ubp_pbcs;
 +	unsigned long long	ubp_held_pages;
 +};
 +
@@ -21046,7 +23513,6 @@
 +#ifdef CONFIG_BC_DEBUG_KMEM
 +	long	pages_charged;
 +	long	vmalloc_charged;
-+	long	pbcs;
 +#endif
 +	unsigned long	sync;
 +	unsigned long	sync_done;
@@ -21080,6 +23546,7 @@
 +
 +	spinlock_t		ub_lock;
 +	uid_t			ub_uid;
++	unsigned int		ub_cookie;
 +
 +	struct ub_rate_info	ub_limit_rl;
 +	int			ub_oom_noproc;
@@ -21087,8 +23554,8 @@
 +	struct page_private	ppriv;
 +#define ub_unused_privvmpages	ppriv.ubp_unused_privvmpages
 +#define ub_tmpfs_respages	ppriv.ubp_tmpfs_respages
-+#define ub_swap_pages		ppriv.ubp_swap_pages
 +#define ub_held_pages		ppriv.ubp_held_pages
++#define ub_pbcs			ppriv.ubp_pbcs
 +	struct sock_private	spriv;
 +#define ub_rmem_thres		spriv.ubp_rmem_thres
 +#define ub_maxadvmss		spriv.ubp_maxadvmss
@@ -21100,6 +23567,7 @@
 +#define ub_tw_count		spriv.ubp_tw_count
 +
 +	struct user_beancounter *parent;
++	int			ub_childs;
 +	void			*private_data;
 +	unsigned long		ub_aflags;
 +
@@ -21125,6 +23593,8 @@
 +#endif
 +};
 +
++extern int ub_count;
++
 +enum ub_severity { UB_HARD, UB_SOFT, UB_FORCE };
 +
 +#define UB_AFLAG_NOTIF_PAGEIN	0
@@ -21189,16 +23659,12 @@
 +#else /* CONFIG_BEANCOUNTERS */
 +
 +#define ub_percpu_add(ub, field, v)		do {			\
-+		if (ub->ub_percpu == NULL)				\
-+			break;						\
 +		per_cpu_ptr(ub->ub_percpu, get_cpu())->field += (v);	\
 +		put_cpu();						\
 +	} while (0)
 +#define ub_percpu_inc(ub, field) ub_percpu_add(ub, field, 1)
 +
 +#define ub_percpu_sub(ub, field, v)		do {			\
-+		if (ub->ub_percpu == NULL)				\
-+			break;						\
 +		per_cpu_ptr(ub->ub_percpu, get_cpu())->field -= (v);	\
 +		put_cpu();						\
 +	} while (0)
@@ -21491,10 +23957,10 @@
 +#endif /* __dcache_op.h_ */
 diff --git a/include/bc/debug.h b/include/bc/debug.h
 new file mode 100644
-index 0000000..7b1feb6
+index 0000000..58c64f3
 --- /dev/null
 +++ b/include/bc/debug.h
-@@ -0,0 +1,109 @@
+@@ -0,0 +1,103 @@
 +/*
 + *  include/bc/debug.h
 + *
@@ -21588,17 +24054,11 @@
 +			ub_percpu_sub(ub, vmalloc_charged,		\
 +					vm->nr_pages);			\
 +	} while (0)
-+
-+#define inc_pbc_count(ub)	ub_percpu_inc(ub, pbcs)
-+#define dec_pbc_count(ub)	ub_percpu_dec(ub, pbcs)
 +#else
 +#define init_cache_counters()		do { } while (0)
 +#define inc_vmalloc_charged(vm, f)	do { } while (0)
 +#define dec_vmalloc_charged(vm)		do { } while (0)
 +
-+#define inc_pbc_count(ub)		do { } while (0)
-+#define dec_pbc_count(ub)		do { } while (0)
-+
 +#define ub_free_counters(ub)		do { } while (0)
 +#define ub_kmemcache_free(cachep)	do { } while (0)
 +#endif
@@ -21695,7 +24155,7 @@
 +#endif /* _LINUX_UBHASH_H */
 diff --git a/include/bc/io_acct.h b/include/bc/io_acct.h
 new file mode 100644
-index 0000000..d84bf5a
+index 0000000..361b26c
 --- /dev/null
 +++ b/include/bc/io_acct.h
 @@ -0,0 +1,113 @@
@@ -21714,6 +24174,8 @@
 +#ifndef __UB_IO_ACCT_H_
 +#define __UB_IO_ACCT_H_
 +
++#define PAGE_IO_MARK   (0x1UL)
++
 +#ifdef CONFIG_BC_IO_ACCOUNTING
 +#include <bc/beancounter.h>
 +#include <bc/rss_pages.h>
@@ -21748,8 +24210,6 @@
 +extern void ub_io_save_context(struct page *, size_t);
 +extern void ub_io_release_context(struct page *pg, size_t size);
 +
-+#define PAGE_IO_MARK	(0x1UL)
-+
 +static inline struct page_beancounter *iopb_to_pb(struct page_beancounter *pb)
 +{
 +	if (!((unsigned long)pb & PAGE_IO_MARK))
@@ -22876,7 +25336,7 @@
 +
  #endif /* __LINUX__AIO_H */
 diff --git a/include/linux/capability.h b/include/linux/capability.h
-index c8f2a5f..3f85123 100644
+index c8f2a5f..301d709 100644
 --- a/include/linux/capability.h
 +++ b/include/linux/capability.h
 @@ -197,12 +197,9 @@ struct cpu_vfs_cap_data {
@@ -22926,7 +25386,7 @@
  /* Allow setting readahead and flushing buffers on block devices */
  /* Allow setting geometry in floppy driver */
  /* Allow turning DMA on/off in xd driver */
-@@ -340,6 +333,50 @@ struct cpu_vfs_cap_data {
+@@ -340,6 +333,61 @@ struct cpu_vfs_cap_data {
  
  #define CAP_SETFCAP	     31
  
@@ -22938,10 +25398,21 @@
 + */
 +
 +/* Allow access to all information. In the other case some structures will be
-+   hiding to ensure different Virtual Environment non-interaction on the same
-+   node */
++ * hiding to ensure different Virtual Environment non-interaction on the same
++ * node (NOW OBSOLETED)
++ */
 +#define CAP_SETVEID	     29
 +
++#define capable_setveid()	({			\
++		ve_is_super(get_exec_env()) &&		\
++			(capable(CAP_SYS_ADMIN) ||	\
++			 capable(CAP_VE_ADMIN));	\
++	})
++
++/*
++ * coinsides with CAP_AUDIT_CONTROL but we don't care, since
++ * audit is disabled in Virtuozzo
++ */
 +#define CAP_VE_ADMIN	     30
 +
 +#ifdef CONFIG_VE
@@ -22977,7 +25448,7 @@
  /* Override MAC access.
     The base kernel enforces no MAC policy.
     An LSM may enforce a MAC policy, and if it does and it chooses
-@@ -418,7 +455,16 @@ struct cpu_vfs_cap_data {
+@@ -418,7 +466,16 @@ struct cpu_vfs_cap_data {
  #define CAP_INIT_INH_SET    CAP_EMPTY_SET
  
  # define cap_clear(c)         do { (c) = __cap_empty_set; } while (0)
@@ -22994,7 +25465,7 @@
  # define cap_set_init_eff(c)  do { (c) = __cap_init_eff_set; } while (0)
  
  #define cap_raise(c, flag)  ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
-@@ -536,6 +582,10 @@ extern const kernel_cap_t __cap_empty_set;
+@@ -536,6 +593,10 @@ extern const kernel_cap_t __cap_empty_set;
  extern const kernel_cap_t __cap_full_set;
  extern const kernel_cap_t __cap_init_eff_set;
  
@@ -23061,10 +25532,10 @@
 +
 diff --git a/include/linux/cpt_image.h b/include/linux/cpt_image.h
 new file mode 100644
-index 0000000..6ab78b7
+index 0000000..8185d4e
 --- /dev/null
 +++ b/include/linux/cpt_image.h
-@@ -0,0 +1,1799 @@
+@@ -0,0 +1,1842 @@
 +/*
 + *
 + *  include/linux/cpt_image.h
@@ -23187,11 +25658,15 @@
 +#define CPT_VERSION_16		0x200
 +#define CPT_VERSION_18		0x300
 +#define CPT_VERSION_18_1	0x301
++#define CPT_VERSION_18_2	0x302
++#define CPT_VERSION_18_3	0x303
 +#define CPT_VERSION_20		0x400
 +#define CPT_VERSION_24		0x500
 +#define CPT_VERSION_26		0x600
 +#define CPT_VERSION_27		0x700
++#define CPT_VERSION_27_3	0x703
 +#define CPT_VERSION_32		0x800
++#define CPT_CURRENT_VERSION	CPT_VERSION_32
 +	__u16	cpt_os_arch;		/* Architecture */
 +#define CPT_OS_ARCH_I386	0
 +#define CPT_OS_ARCH_EMT64	1
@@ -23238,6 +25713,7 @@
 +#define CPT_BIND_MOUNT		21
 +#define CPT_UNSUPPORTED_NETDEV	22
 +#define CPT_UNSUPPORTED_MISC	23
++#define CPT_SLM_DMPRST		24
 +
 +/* This mask is used to determine whether VE
 +   has some unsupported features or not */
@@ -23291,6 +25767,7 @@
 +	CPT_SECT_VSYSCALL,
 +	CPT_SECT_INOTIFY,
 +	CPT_SECT_SYSV_MSG,
++	CPT_SECT_SNMP_STATS,
 +	CPT_SECT_MAX
 +};
 +
@@ -23380,7 +25857,7 @@
 +
 +	/* later extension */
 +	__u32	last_pid;
-+	__u32	pad1;
++	__u32	rnd_va_space;
 +	__u64	reserved[8];
 +} __attribute__ ((aligned (8)));
 +
@@ -23409,6 +25886,8 @@
 +#define CPT_DENTRY_INOTIFY	0x40
 +#define CPT_DENTRY_FUTEX	0x80
 +#define CPT_DENTRY_TUNTAP	0x100
++#define CPT_DENTRY_PROCPID_DEAD 0x200
++#define CPT_DENTRY_HARDLINKED	0x400
 +#define CPT_DENTRY_SIGNALFD	0x800
 +	__u64	cpt_inode;
 +	__u64	cpt_priv;
@@ -24373,6 +26852,8 @@
 +
 +	__u64	cpt_state;
 +	__u64	cpt_flags;
++#define CPT_TASK_FLAGS_MASK	(PF_EXITING | PF_FORKNOEXEC | \
++				 PF_SUPERPRIV | PF_DUMPCORE | PF_SIGNALED)
 +	__u64	cpt_ptrace;
 +	__u32	cpt_prio;
 +	__u32	cpt_static_prio;
@@ -24771,6 +27252,39 @@
 +	__u32	cpt_mark;
 +} __attribute__ ((aligned (8)));
 +
++/* cpt_ip_conntrack_image struct from 2.6.9 kernel */
++struct cpt_ip_conntrack_image_compat
++{
++	__u64	cpt_next;
++	__u32	cpt_object;
++	__u16	cpt_hdrlen;
++	__u16	cpt_content;
++
++	struct cpt_ipct_tuple cpt_tuple[2];
++	__u64	cpt_status;
++	__u64	cpt_timeout;
++	__u32	cpt_index;
++	__u8	cpt_ct_helper;
++	__u8	cpt_nat_helper;
++	__u16	__cpt_pad1;
++
++	/* union ip_conntrack_proto. Used by tcp and icmp. */
++	__u32	cpt_proto_data[12];
++
++	/* union ip_conntrack_help. Used only by ftp helper. */
++	__u32	cpt_help_data[4];
++
++	/* nat info */
++	__u32	cpt_initialized;
++	__u32	cpt_num_manips;
++	struct  cpt_nat_manip	cpt_nat_manips[6];
++
++	struct	cpt_nat_seq	cpt_nat_seq[2];
++
++	__u32	cpt_masq_index;
++	__u32	__cpt_pad2;
++} __attribute__ ((aligned (8)));
++
 +struct cpt_ubparm
 +{
 +	__u64	barrier;
@@ -24789,7 +27303,7 @@
 +
 +	__u64	cpt_parent;
 +	__u32	cpt_id;
-+	__u32	__cpt_pad;
++	__u32   cpt_ub_resources;
 +	struct	cpt_ubparm	cpt_parms[32 * 2];
 +} __attribute__ ((aligned (8)));
 +
@@ -24866,10 +27380,10 @@
 +#endif /* __CPT_IMAGE_H_ */
 diff --git a/include/linux/cpt_ioctl.h b/include/linux/cpt_ioctl.h
 new file mode 100644
-index 0000000..b8e83cc
+index 0000000..f31b66c
 --- /dev/null
 +++ b/include/linux/cpt_ioctl.h
-@@ -0,0 +1,43 @@
+@@ -0,0 +1,45 @@
 +/*
 + *
 + *  include/linux/cpt_ioctl.h
@@ -24911,6 +27425,8 @@
 +#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
 +
 +#define CPT_ITER	_IOW(CPTCTLTYPE, 23, int)
++#define CPT_LINKDIR_ADD	_IOW(CPTCTLTYPE, 24, int)
++#define CPT_HARDLNK_ON	_IOW(CPTCTLTYPE, 25, int)
 +
 +#endif
 diff --git a/include/linux/dcache.h b/include/linux/dcache.h
@@ -25153,10 +27669,10 @@
  static inline void eventpoll_init_file(struct file *file) {}
 diff --git a/include/linux/fairsched.h b/include/linux/fairsched.h
 new file mode 100644
-index 0000000..e08c84d
+index 0000000..521455c
 --- /dev/null
 +++ b/include/linux/fairsched.h
-@@ -0,0 +1,86 @@
+@@ -0,0 +1,92 @@
 +/*
 + * Fair Scheduler
 + *
@@ -25229,6 +27745,9 @@
 +asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight);
 +asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate);
 +
++int fairsched_new_node(int id, unsigned int vcpus);
++void fairsched_drop_node(int id);
++
 +#else /* CONFIG_VZ_FAIRSCHED */
 +
 +static inline void fairsched_init_early(void) { }
@@ -25237,6 +27756,9 @@
 +static inline void get_task_fairsched_node(struct task_struct *p) { }
 +static inline void put_task_fairsched_node(struct task_struct *p) { }
 +
++static inline int fairsched_new_node(int id, unsigned int vcpus) { return 0; }
++static inline void fairsched_drop_node(int id) { }
++
 +#define	INIT_VZ_FAIRSCHED
 +
 +#endif /* CONFIG_VZ_FAIRSCHED */
@@ -25306,10 +27828,10 @@
 +
  #endif /* __LINUX_FILE_H */
 diff --git a/include/linux/freezer.h b/include/linux/freezer.h
-index 5a361f8..9426083 100644
+index da7e52b..099191c 100644
 --- a/include/linux/freezer.h
 +++ b/include/linux/freezer.h
-@@ -160,6 +160,8 @@ static inline void set_freezable_with_signal(void)
+@@ -163,6 +163,8 @@ static inline void set_freezable_with_signal(void)
  	} while (try_to_freeze());					\
  	__retval;							\
  })
@@ -25319,7 +27841,7 @@
  static inline int frozen(struct task_struct *p) { return 0; }
  static inline int freezing(struct task_struct *p) { return 0; }
 diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 692a3ee..53547b0 100644
+index 9b67805..3fef9ef 100644
 --- a/include/linux/fs.h
 +++ b/include/linux/fs.h
 @@ -53,6 +53,7 @@ struct inodes_stat_t {
@@ -25348,7 +27870,17 @@
  #define FS_REVAL_DOT	16384	/* Check the paths ".", ".." for staleness */
  #define FS_RENAME_DOES_D_MOVE	32768	/* FS will handle d_move()
  					 * during rename() internally.
-@@ -370,7 +375,6 @@ struct inodes_stat_t {
+@@ -235,6 +240,9 @@ struct inodes_stat_t {
+ #define S_SWAPFILE	256	/* Do not truncate: swapon got its bmaps */
+ #define S_PRIVATE	512	/* Inode is fs-internal */
+ 
++/* VZ flags -- These are not upstream! */
++#define S_NOUNUSE	(1 << 17) /* just destroy inode in cleanup */
++
+ /*
+  * Note that nosuid etc flags are inode-specific: setting some file-system
+  * flags just means all the inodes inherit those flags by default. It might be
+@@ -370,7 +378,6 @@ struct inodes_stat_t {
  #include <linux/path.h>
  #include <linux/stat.h>
  #include <linux/cache.h>
@@ -25356,7 +27888,7 @@
  #include <linux/list.h>
  #include <linux/radix-tree.h>
  #include <linux/prio_tree.h>
-@@ -405,6 +409,7 @@ extern int get_max_files(void);
+@@ -405,6 +412,7 @@ extern int get_max_files(void);
  extern int sysctl_nr_open;
  extern struct inodes_stat_t inodes_stat;
  extern int leases_enable, lease_break_time;
@@ -25364,7 +27896,7 @@
  #ifdef CONFIG_DNOTIFY
  extern int dir_notify_enable;
  #endif
-@@ -464,10 +469,15 @@ struct iattr {
+@@ -464,10 +472,15 @@ struct iattr {
  	struct file	*ia_file;
  };
  
@@ -25380,7 +27912,7 @@
  
  /** 
   * enum positive_aop_returns - aop return codes with specific semantics
-@@ -754,6 +764,9 @@ struct inode {
+@@ -754,6 +767,9 @@ struct inode {
  #ifdef CONFIG_QUOTA
  	struct dquot		*i_dquot[MAXQUOTAS];
  #endif
@@ -25390,7 +27922,7 @@
  	struct list_head	i_devices;
  	union {
  		struct pipe_inode_info	*i_pipe;
-@@ -809,6 +822,8 @@ enum inode_i_mutex_lock_class
+@@ -809,6 +825,8 @@ enum inode_i_mutex_lock_class
  	I_MUTEX_QUOTA
  };
  
@@ -25399,7 +27931,7 @@
  /*
   * NOTE: in a 32bit arch with a preemptable kernel and
   * an UP compile the i_size_read/write must be atomic
-@@ -929,6 +944,7 @@ struct file {
+@@ -929,6 +947,7 @@ struct file {
  	struct fown_struct	f_owner;
  	const struct cred	*f_cred;
  	struct file_ra_state	f_ra;
@@ -25407,7 +27939,7 @@
  
  	u64			f_version;
  #ifdef CONFIG_SECURITY
-@@ -945,6 +961,7 @@ struct file {
+@@ -945,6 +964,7 @@ struct file {
  #ifdef CONFIG_DEBUG_WRITECOUNT
  	unsigned long f_mnt_write_state;
  #endif
@@ -25415,7 +27947,7 @@
  };
  extern spinlock_t files_lock;
  #define file_list_lock() spin_lock(&files_lock);
-@@ -1063,6 +1080,9 @@ struct file_lock {
+@@ -1063,6 +1083,9 @@ struct file_lock {
  	fl_owner_t fl_owner;
  	unsigned char fl_flags;
  	unsigned char fl_type;
@@ -25425,7 +27957,7 @@
  	unsigned int fl_pid;
  	struct pid *fl_nspid;
  	wait_queue_head_t fl_wait;
-@@ -1509,6 +1529,7 @@ struct file_operations {
+@@ -1509,6 +1532,7 @@ struct file_operations {
  	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
  	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
  	int (*setlease)(struct file *, long, struct file_lock **);
@@ -25433,7 +27965,7 @@
  };
  
  struct inode_operations {
-@@ -1578,6 +1599,7 @@ struct super_operations {
+@@ -1578,6 +1602,7 @@ struct super_operations {
  #ifdef CONFIG_QUOTA
  	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
  	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
@@ -25441,7 +27973,7 @@
  #endif
  	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
  };
-@@ -1755,8 +1777,14 @@ struct file_system_type {
+@@ -1755,8 +1780,14 @@ struct file_system_type {
  	struct lock_class_key i_mutex_key;
  	struct lock_class_key i_mutex_dir_key;
  	struct lock_class_key i_alloc_sem_key;
@@ -25456,7 +27988,7 @@
  extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
  	int (*fill_super)(struct super_block *, void *, int),
  	struct vfsmount *mnt);
-@@ -1800,6 +1828,11 @@ extern int register_filesystem(struct file_system_type *);
+@@ -1800,13 +1831,20 @@ extern int register_filesystem(struct file_system_type *);
  extern int unregister_filesystem(struct file_system_type *);
  extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
  #define kern_mount(type) kern_mount_data(type, NULL)
@@ -25466,9 +27998,10 @@
 +extern void umount_ve_fs_type(struct file_system_type *local_fs_type);
 +#define kern_umount mntput
  extern int may_umount_tree(struct vfsmount *);
++extern struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root);
  extern int may_umount(struct vfsmount *);
  extern long do_mount(char *, char *, char *, unsigned long, void *);
-@@ -1807,6 +1840,7 @@ extern struct vfsmount *collect_mounts(struct path *);
+ extern struct vfsmount *collect_mounts(struct path *);
  extern void drop_collected_mounts(struct vfsmount *);
  
  extern int vfs_statfs(struct dentry *, struct kstatfs *);
@@ -25476,7 +28009,7 @@
  
  extern int current_umask(void);
  
-@@ -2065,7 +2099,8 @@ extern int check_disk_change(struct block_device *);
+@@ -2065,7 +2103,8 @@ extern int check_disk_change(struct block_device *);
  extern int __invalidate_device(struct block_device *);
  extern int invalidate_partition(struct gendisk *, int);
  #endif
@@ -25486,7 +28019,7 @@
  unsigned long invalidate_mapping_pages(struct address_space *mapping,
  					pgoff_t start, pgoff_t end);
  
-@@ -2477,6 +2512,17 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
+@@ -2478,6 +2517,17 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
  ssize_t simple_attr_write(struct file *file, const char __user *buf,
  			  size_t len, loff_t *ppos);
  
@@ -26009,7 +28542,7 @@
 +
  #endif
 diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 24c3956..d38e63e 100644
+index 24c3956..7bb1cf3 100644
 --- a/include/linux/mm.h
 +++ b/include/linux/mm.h
 @@ -712,6 +712,7 @@ extern void pagefault_out_of_memory(void);
@@ -26040,6 +28573,20 @@
  int set_page_dirty(struct page *page);
  int set_page_dirty_lock(struct page *page);
  int clear_page_dirty_for_io(struct page *page);
+@@ -1294,7 +1297,12 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+ #ifndef CONFIG_MMU
+ #define randomize_va_space 0
+ #else
+-extern int randomize_va_space;
++extern int _randomize_va_space;
++#ifndef CONFIG_VE
++#define randomize_va_space _randomize_va_space
++#else
++#define randomize_va_space (get_exec_env()->_randomize_va_space)
++#endif
+ #endif
+ 
+ const char * arch_vma_name(struct vm_area_struct *vma);
 diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
 index 84a524a..8ecf0ec 100644
 --- a/include/linux/mm_types.h
@@ -26607,8 +29154,21 @@
 +};  
 +    
 +#endif /*_IPT_OWNER_H*/
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index d09db1b..5b36364 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -374,7 +374,7 @@ extern const struct address_space_operations nfs_file_aops;
+ 
+ static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
+ {
+-	return filp->private_data;
++	return file_private(filp);
+ }
+ 
+ static inline struct rpc_cred *nfs_file_cred(struct file *file)
 diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
-index 320569e..8e0d228 100644
+index b26dc51..643e380 100644
 --- a/include/linux/nfs_fs_sb.h
 +++ b/include/linux/nfs_fs_sb.h
 @@ -91,6 +91,7 @@ struct nfs_client {
@@ -26619,6 +29179,17 @@
  };
  
  /*
+diff --git a/include/linux/nmi.h b/include/linux/nmi.h
+index b752e80..ed9d975 100644
+--- a/include/linux/nmi.h
++++ b/include/linux/nmi.h
+@@ -47,4 +47,6 @@ static inline bool trigger_all_cpu_backtrace(void)
+ }
+ #endif
+ 
++extern void nmi_show_regs(struct pt_regs *regs, int in_nmi);
++extern int do_nmi_show_regs(struct pt_regs *regs, int cpu);
+ #endif
 diff --git a/include/linux/notifier.h b/include/linux/notifier.h
 index 44428d2..a3a0a02 100644
 --- a/include/linux/notifier.h
@@ -26763,10 +29334,10 @@
  #endif /* KERNEL */
  
 diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
-index 379eaed..52c8b17 100644
+index 379eaed..80bd26a 100644
 --- a/include/linux/proc_fs.h
 +++ b/include/linux/proc_fs.h
-@@ -103,6 +103,8 @@ struct vmcore {
+@@ -103,9 +103,14 @@ struct vmcore {
  #ifdef CONFIG_PROC_FS
  
  extern void proc_root_init(void);
@@ -26775,7 +29346,13 @@
  
  void proc_flush_task(struct task_struct *task);
  
-@@ -149,6 +151,8 @@ extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *);
++extern int proc_dentry_of_dead_task(struct dentry *dentry);
++extern struct file_operations dummy_proc_pid_file_operations;
++
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ 						struct proc_dir_entry *parent);
+ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
+@@ -149,6 +154,8 @@ extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *);
  extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
  			struct proc_dir_entry *parent);
  
@@ -26784,7 +29361,16 @@
  static inline struct proc_dir_entry *proc_create(const char *name, mode_t mode,
  	struct proc_dir_entry *parent, const struct file_operations *proc_fops)
  {
-@@ -268,6 +272,9 @@ struct proc_inode {
+@@ -184,6 +191,8 @@ extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
+ #define proc_net_fops_create(net, name, mode, fops)  ({ (void)(mode), NULL; })
+ static inline void proc_net_remove(struct net *net, const char *name) {}
+ 
++static inline int proc_dentry_of_dead_task(struct dentry *dentry) { return 0; }
++
+ static inline void proc_flush_task(struct task_struct *task)
+ {
+ }
+@@ -268,6 +277,9 @@ struct proc_inode {
  	struct proc_dir_entry *pde;
  	struct ctl_table_header *sysctl;
  	struct ctl_table *sysctl_entry;
@@ -26794,7 +29380,7 @@
  	struct inode vfs_inode;
  };
  
-@@ -281,6 +288,15 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
+@@ -281,6 +293,15 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
  	return PROC_I(inode)->pde;
  }
  
@@ -26811,7 +29397,7 @@
  {
  	return pde->parent->data;
 diff --git a/include/linux/quota.h b/include/linux/quota.h
-index 8fd8efc..8cd6b71 100644
+index 8fd8efc..5fa291e 100644
 --- a/include/linux/quota.h
 +++ b/include/linux/quota.h
 @@ -173,6 +173,10 @@ enum {
@@ -26834,11 +29420,14 @@
  /* Operations working with dquots */
  struct dquot_operations {
  	int (*initialize) (struct inode *, int);
-@@ -316,9 +322,11 @@ struct dquot_operations {
+@@ -316,9 +322,14 @@ struct dquot_operations {
  	/* get reserved quota for delayed alloc, value returned is managed by
  	 * quota code only */
  	qsize_t *(*get_reserved_space) (struct inode *);
 +	int (*rename) (struct inode *, struct inode *, struct inode *);
++
++	void (*swap_inode) (struct inode *, struct inode *);
++	void (*shutdown) (struct super_block *);
  };
  
  /* Operations handling requests from userspace */
@@ -26846,7 +29435,7 @@
  struct quotactl_ops {
  	int (*quota_on)(struct super_block *, int, int, char *, int);
  	int (*quota_off)(struct super_block *, int, int);
-@@ -331,6 +339,10 @@ struct quotactl_ops {
+@@ -331,6 +342,10 @@ struct quotactl_ops {
  	int (*set_xstate)(struct super_block *, unsigned int, int);
  	int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
  	int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
@@ -26857,7 +29446,7 @@
  };
  
  struct quota_format_type {
-@@ -385,6 +397,10 @@ struct quota_info {
+@@ -385,6 +400,10 @@ struct quota_info {
  	struct inode *files[MAXQUOTAS];		/* inodes of quotafiles */
  	struct mem_dqinfo info[MAXQUOTAS];	/* Information for each quota type */
  	struct quota_format_ops *ops[MAXQUOTAS];	/* Operations for each type */
@@ -26869,7 +29458,7 @@
  
  int register_quota_format(struct quota_format_type *fmt);
 diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
-index a529d86..579a15c 100644
+index a529d86..bdbe1f7 100644
 --- a/include/linux/quotaops.h
 +++ b/include/linux/quotaops.h
 @@ -264,6 +264,19 @@ static inline void vfs_dq_free_inode(struct inode *inode)
@@ -26892,7 +29481,43 @@
  /* Cannot be called inside a transaction */
  static inline int vfs_dq_off(struct super_block *sb, int remount)
  {
-@@ -363,6 +376,12 @@ static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+@@ -274,6 +287,35 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
+ 	return ret;
+ }
+ 
++static __inline__ void DQUOT_SWAP(struct inode *inode, struct inode *tmpl)
++{
++	if (sb_any_quota_active(tmpl->i_sb) &&
++	    tmpl->i_sb->dq_op->swap_inode)
++		tmpl->i_sb->dq_op->swap_inode(inode, tmpl);
++}
++
++static __inline__ int DQUOT_CHECK_SPACE(struct inode *inode)
++{
++	if (vfs_dq_alloc_space_nodirty(inode, 512))
++		return -EDQUOT;
++	vfs_dq_free_space_nodirty(inode, 512);
++	return 0;
++}
++
++static __inline__ void DQUOT_SYNC_BLOCKS(struct inode *inode, blkcnt_t blocks)
++{
++	if (sb_any_quota_active(inode->i_sb)) {
++		if (blocks > inode->i_blocks)
++			inode->i_sb->dq_op->alloc_space(inode,
++							(qsize_t)(blocks-inode->i_blocks)*512,
++							13 /*DQUOT_CMD_FORCE*/);
++		else if (blocks < inode->i_blocks)
++			inode->i_sb->dq_op->free_space(inode, (qsize_t)(inode->i_blocks-blocks)*512);
++	} else
++		inode->i_blocks = blocks;
++}
++
++
+ #else
+ 
+ static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
+@@ -363,6 +405,12 @@ static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
  	return 0;
  }
  
@@ -26905,6 +29530,22 @@
  static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
  {
  	inode_add_bytes(inode, nr);
+@@ -416,6 +464,15 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
+ 	mark_inode_dirty(inode);
+ }	
+ 
++static inline void DQUOT_SWAP(struct inode *inode, struct inode *tmpl)
++{
++}
++
++static inline void DQUOT_SYNC_BLOCKS(struct inode *inode, blkcnt_t blocks)
++{
++	inode->i_blocks = blocks;
++}
++
+ #endif /* CONFIG_QUOTA */
+ 
+ static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
 diff --git a/include/linux/rmap.h b/include/linux/rmap.h
 index cb0ba70..b14f124 100644
 --- a/include/linux/rmap.h
@@ -26919,7 +29560,7 @@
  static inline void page_dup_rmap(struct page *page)
  {
 diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 70abfd3..fa44cc6 100644
+index 70abfd3..d6155c1 100644
 --- a/include/linux/sched.h
 +++ b/include/linux/sched.h
 @@ -94,6 +94,8 @@ struct sched_param {
@@ -26979,15 +29620,7 @@
  
  
  extern void calc_global_load(void);
-@@ -286,6 +313,7 @@ static inline void show_state(void)
- }
- 
- extern void show_regs(struct pt_regs *);
-+extern void smp_show_regs(struct pt_regs *, void *);
- 
- /*
-  * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
-@@ -553,6 +581,9 @@ struct thread_group_cputimer {
+@@ -553,6 +580,9 @@ struct thread_group_cputimer {
  	spinlock_t lock;
  };
  
@@ -26997,7 +29630,7 @@
  /*
   * NOTE! "signal_struct" does not have it's own
   * locking, because a shared signal_struct always
-@@ -1283,6 +1314,7 @@ struct task_struct {
+@@ -1283,6 +1313,7 @@ struct task_struct {
  	unsigned in_execve:1;	/* Tell the LSMs that the process is doing an
  				 * execve */
  	unsigned in_iowait:1;
@@ -27005,7 +29638,7 @@
  
  
  	/* Revert to default priority/policy when forking */
-@@ -1498,6 +1530,14 @@ struct task_struct {
+@@ -1498,6 +1529,14 @@ struct task_struct {
  	struct rcu_head rcu;
  
  	/*
@@ -27020,7 +29653,7 @@
  	 * cache last used pipe for splice
  	 */
  	struct pipe_inode_info *splice_pipe;
-@@ -1542,6 +1582,19 @@ struct task_struct {
+@@ -1542,6 +1581,19 @@ struct task_struct {
  	unsigned long trace_recursion;
  #endif /* CONFIG_TRACING */
  	unsigned long stack_start;
@@ -27040,7 +29673,7 @@
  };
  
  /* Future-safe accessor for struct task_struct's cpus_allowed. */
-@@ -1727,6 +1780,43 @@ extern cputime_t task_utime(struct task_struct *p);
+@@ -1727,6 +1779,43 @@ extern cputime_t task_utime(struct task_struct *p);
  extern cputime_t task_stime(struct task_struct *p);
  extern cputime_t task_gtime(struct task_struct *p);
  
@@ -27084,7 +29717,7 @@
  /*
   * Per process flags
   */
-@@ -1736,6 +1826,7 @@ extern cputime_t task_gtime(struct task_struct *p);
+@@ -1736,6 +1825,7 @@ extern cputime_t task_gtime(struct task_struct *p);
  #define PF_EXITING	0x00000004	/* getting shut down */
  #define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
  #define PF_VCPU		0x00000010	/* I'm a virtual CPU */
@@ -27092,7 +29725,7 @@
  #define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
  #define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
  #define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
-@@ -1872,6 +1963,21 @@ extern unsigned long long
+@@ -1872,6 +1962,21 @@ extern unsigned long long
  task_sched_runtime(struct task_struct *task);
  extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
  
@@ -27114,7 +29747,7 @@
  /* sched_exec is called by processes performing an exec */
  #ifdef CONFIG_SMP
  extern void sched_exec(void);
-@@ -2151,6 +2257,13 @@ extern int disallow_signal(int);
+@@ -2151,6 +2256,13 @@ extern int disallow_signal(int);
  
  extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
  extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
@@ -27128,7 +29761,7 @@
  struct task_struct *fork_idle(int);
  
  extern void set_task_comm(struct task_struct *tsk, char *from);
-@@ -2168,11 +2281,11 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
+@@ -2168,11 +2280,11 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
  }
  #endif
  
@@ -27143,7 +29776,7 @@
  
  extern bool current_is_single_threaded(void);
  
-@@ -2180,10 +2293,10 @@ extern bool current_is_single_threaded(void);
+@@ -2180,10 +2292,10 @@ extern bool current_is_single_threaded(void);
   * Careful: do_each_thread/while_each_thread is a double loop so
   *          'break' will not work as expected - use goto instead.
   */
@@ -27157,7 +29790,7 @@
  	while ((t = next_thread(t)) != g)
  
  /* de_thread depends on thread_group_leader not being a pid based check */
-@@ -2208,8 +2321,14 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+@@ -2208,8 +2320,14 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
  
  static inline struct task_struct *next_thread(const struct task_struct *p)
  {
@@ -27173,7 +29806,7 @@
  }
  
  static inline int thread_group_empty(struct task_struct *p)
-@@ -2254,6 +2373,98 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
+@@ -2254,6 +2372,98 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
  	spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
  }
  
@@ -27690,42 +30323,6 @@
  
  		if (!s)
  			return ZERO_SIZE_PTR;
-diff --git a/include/linux/smp.h b/include/linux/smp.h
-index 39c64ba..7b81017 100644
---- a/include/linux/smp.h
-+++ b/include/linux/smp.h
-@@ -13,6 +13,9 @@
- 
- extern void cpu_idle(void);
- 
-+struct pt_regs;
-+typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
-+
- struct call_single_data {
- 	struct list_head list;
- 	void (*func) (void *info);
-@@ -66,6 +69,8 @@ extern int __cpu_up(unsigned int cpunum);
-  */
- extern void smp_cpus_done(unsigned int max_cpus);
- 
-+extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
-+
- /*
-  * Call a function on all other processors
-  */
-@@ -140,6 +145,12 @@ static inline void smp_send_reschedule(int cpu) { }
- static inline void init_call_single_data(void)
- {
- }
-+static inline int smp_nmi_call_function(smp_nmi_function func,
-+					 void *info, int wait)
-+{
-+	return 0;
-+}
-+
- #endif /* !SMP */
- 
- /*
 diff --git a/include/linux/socket.h b/include/linux/socket.h
 index 3273a0c..87cf3d1 100644
 --- a/include/linux/socket.h
@@ -28205,10 +30802,10 @@
  static inline void get_uts_ns(struct uts_namespace *ns)
 diff --git a/include/linux/ve.h b/include/linux/ve.h
 new file mode 100644
-index 0000000..8f8d083
+index 0000000..e0e045a
 --- /dev/null
 +++ b/include/linux/ve.h
-@@ -0,0 +1,361 @@
+@@ -0,0 +1,367 @@
 +/*
 + *  include/linux/ve.h
 + *
@@ -28340,9 +30937,9 @@
 +	cycles_t	strt_idle_time;
 +	cycles_t	used_time;
 +	seqcount_t	stat_lock;
-+	int		nr_running;
-+	int		nr_unint;
-+	int		nr_iowait;
++	unsigned long	nr_running;
++	unsigned long	nr_unint;
++	unsigned long	nr_iowait;
 +	cputime64_t	user;
 +	cputime64_t	nice;
 +	cputime64_t	system;
@@ -28481,6 +31078,7 @@
 +	struct ve_monitor	*monitor;
 +	struct proc_dir_entry	*monitor_proc;
 +	unsigned long		meminfo_val;
++	int _randomize_va_space;
 +
 +#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) \
 +	|| defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
@@ -28491,6 +31089,14 @@
 +	struct svc_rqst*	_nlmsvc_rqst;
 +#endif
 +
++#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
++	struct file_system_type	*bm_fs_type;
++	struct vfsmount		*bm_mnt;
++	int			bm_enabled;
++	int			bm_entry_count;
++	struct list_head	bm_entries;
++#endif
++
 +	struct nsproxy		*ve_ns;
 +	struct user_namespace	*user_ns;
 +	struct net		*ve_netns;
@@ -28507,10 +31113,7 @@
 +extern struct ve_cpu_stats static_ve_cpu_stats;
 +static inline struct ve_cpu_stats *VE_CPU_STATS(struct ve_struct *ve, int cpu)
 +{
-+	if (ve->cpu_stats == NULL)
-+		return &static_ve_cpu_stats;
-+	else
-+		return per_cpu_ptr(ve->cpu_stats, cpu);
++	return per_cpu_ptr(ve->cpu_stats, cpu);
 +}
 +
 +extern int nr_ve;
@@ -28608,10 +31211,10 @@
 +#endif
 diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h
 new file mode 100644
-index 0000000..3364e33
+index 0000000..8bc4e01
 --- /dev/null
 +++ b/include/linux/ve_proto.h
-@@ -0,0 +1,89 @@
+@@ -0,0 +1,96 @@
 +/*
 + *  include/linux/ve_proto.h
 + *
@@ -28629,6 +31232,13 @@
 +
 +struct ve_struct;
 +
++struct seq_file;
++
++typedef void (*ve_seq_print_t)(struct seq_file *, struct ve_struct *);
++
++void vzmon_register_veaddr_print_cb(ve_seq_print_t);
++void vzmon_unregister_veaddr_print_cb(ve_seq_print_t);
++
 +#ifdef CONFIG_INET
 +void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
 +#ifdef CONFIG_VE_NETDEV
@@ -28803,10 +31413,10 @@
 +#endif
 diff --git a/include/linux/venet.h b/include/linux/venet.h
 new file mode 100644
-index 0000000..1554037
+index 0000000..dd26f11
 --- /dev/null
 +++ b/include/linux/venet.h
-@@ -0,0 +1,86 @@
+@@ -0,0 +1,95 @@
 +/*
 + *  include/linux/venet.h
 + *
@@ -28845,12 +31455,19 @@
 +	struct list_head 	ve_list;
 +};
 +
++struct ext_entry_struct
++{
++	struct list_head	list;
++	struct ve_addr_struct	addr;
++};
++
 +struct veip_struct
 +{
 +	struct list_head	src_lh;
 +	struct list_head	dst_lh;
 +	struct list_head	ip_lh;
 +	struct list_head	list;
++	struct list_head	ext_lh;
 +	envid_t			veid;
 +};
 +
@@ -28884,6 +31501,8 @@
 +int veip_entry_add(struct ve_struct *ve, struct ve_addr_struct *addr);
 +int veip_entry_del(envid_t veid, struct ve_addr_struct *addr);
 +int venet_change_skb_owner(struct sk_buff *skb);
++struct ext_entry_struct *venet_ext_lookup(struct ve_struct *ve,
++		struct ve_addr_struct *addr);
 +
 +extern struct list_head ip_entry_hash_table[];
 +extern rwlock_t veip_hash_lock;
@@ -29092,10 +31711,10 @@
 +#endif /* __LINUX_VIRTINFO_H */
 diff --git a/include/linux/virtinfoscp.h b/include/linux/virtinfoscp.h
 new file mode 100644
-index 0000000..9e7584f
+index 0000000..5661c0d
 --- /dev/null
 +++ b/include/linux/virtinfoscp.h
-@@ -0,0 +1,21 @@
+@@ -0,0 +1,23 @@
 +#ifndef __VIRTINFO_SCP_H__
 +#define __VIRTINFO_SCP_H__
 +
@@ -29114,6 +31733,8 @@
 +#define VIRTINFO_SCP_RSTTSK     0x20
 +#define VIRTINFO_SCP_RSTMM      0x21
 +
++#define VIRTINFO_SCP_TEST	0x30
++
 +#define VIRTNOTIFY_CHANGE       0x100 
 +
 +#endif /* __VIRTINFO_SCP_H__ */
@@ -29502,10 +32123,10 @@
 +#endif /* __LINUX_VZCTL_QUOTA_H__ */
 diff --git a/include/linux/vzctl_venet.h b/include/linux/vzctl_venet.h
 new file mode 100644
-index 0000000..4797a50
+index 0000000..8c02cd4
 --- /dev/null
 +++ b/include/linux/vzctl_venet.h
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,53 @@
 +/*
 + *  include/linux/vzctl_venet.h
 + *
@@ -29533,6 +32154,8 @@
 +	int op;
 +#define VE_IP_ADD	1
 +#define VE_IP_DEL	2
++#define VE_IP_EXT_ADD	3
++#define VE_IP_EXT_DEL	4
 +	struct sockaddr *addr;
 +	int addrlen;
 +};
@@ -29818,10 +32441,10 @@
 +#endif /* _LINUX_VZIPTABLE_DEFS_H */
 diff --git a/include/linux/vzquota.h b/include/linux/vzquota.h
 new file mode 100644
-index 0000000..e16605e
+index 0000000..1dba5fa
 --- /dev/null
 +++ b/include/linux/vzquota.h
-@@ -0,0 +1,379 @@
+@@ -0,0 +1,380 @@
 +/*
 + *
 + * Copyright (C) 2001-2005 SWsoft
@@ -30031,7 +32654,7 @@
 +	struct dq_info		dq_info;	/* grace times and flags */
 +	spinlock_t		dq_data_lock;	/* for dq_stat */
 +
-+	struct semaphore	dq_sem;		/* semaphore to protect 
++	struct mutex		dq_mutex;	/* mutex to protect
 +						   ugid tree */
 +
 +	struct list_head	dq_ilink_list;	/* list of vz_quota_ilink */
@@ -30096,7 +32719,8 @@
 +#define DQUOT_CMD_CHECK		12
 +#define DQUOT_CMD_FORCE		13
 +
-+extern struct semaphore vz_quota_sem;
++extern struct mutex vz_quota_mutex;
++
 +void inode_qmblk_lock(struct super_block *sb);
 +void inode_qmblk_unlock(struct super_block *sb);
 +void qmblk_data_read_lock(struct vz_quota_master *qmblk);
@@ -30106,6 +32730,7 @@
 +
 +/* for quota operations */
 +void vzquota_inode_init_call(struct inode *inode);
++void vzquota_inode_swap_call(struct inode *, struct inode *);
 +void vzquota_inode_drop_call(struct inode *inode);
 +int vzquota_inode_transfer_call(struct inode *, struct iattr *);
 +struct vz_quota_master *vzquota_inode_data(struct inode *inode,
@@ -30193,7 +32818,6 @@
 +int vzquota_proc_init(void);
 +void vzquota_proc_release(void);
 +struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
-+extern struct semaphore vz_quota_sem;
 +
 +void vzaquota_init(void);
 +void vzaquota_fini(void);
@@ -30268,7 +32892,7 @@
 +#endif /* __VZ_RATELIMIT_H__ */
 diff --git a/include/linux/vzstat.h b/include/linux/vzstat.h
 new file mode 100644
-index 0000000..5c23ea4
+index 0000000..c7dfd1f
 --- /dev/null
 +++ b/include/linux/vzstat.h
 @@ -0,0 +1,182 @@
@@ -30310,7 +32934,7 @@
 +	cycles_t avg[3];
 +};
 +struct kstat_lat_pcpu_struct {
-+	struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
++	struct kstat_lat_pcpu_snap_struct *cur;
 +	cycles_t max_snap;
 +	struct kstat_lat_snap_struct last;
 +	cycles_t avg[3];
@@ -30395,7 +33019,7 @@
 +{
 +	struct kstat_lat_pcpu_snap_struct *cur;
 +
-+	cur = &p->cur[cpu];
++	cur = per_cpu_ptr(p->cur, cpu);
 +	write_seqcount_begin(&cur->lock);
 +	cur->count++;
 +	if (cur->maxlat < dur)
@@ -30426,8 +33050,8 @@
 +	cycles_t m;
 +
 +	memset(&p->last, 0, sizeof(p->last));
-+	for (cpu = 0; cpu < NR_CPUS; cpu++) {
-+		cur = &p->cur[cpu];
++	for_each_online_cpu(cpu) {
++		cur = per_cpu_ptr(p->cur, cpu);
 +		do {
 +			i = read_seqcount_begin(&cur->lock);
 +			memcpy(&snap, cur, sizeof(snap));
@@ -30454,6 +33078,24 @@
 +}
 +
 +#endif /* __VZSTAT_H__ */
+diff --git a/include/linux/xattr.h b/include/linux/xattr.h
+index 5c84af8..12bd3c3 100644
+--- a/include/linux/xattr.h
++++ b/include/linux/xattr.h
+@@ -10,6 +10,13 @@
+ #ifndef _LINUX_XATTR_H
+ #define _LINUX_XATTR_H
+ 
++#ifdef CONFIG_VE
++extern int ve_xattr_policy;
++#define VE_XATTR_POLICY_ACCEPT	0
++#define VE_XATTR_POLICY_IGNORE	1
++#define VE_XATTR_POLICY_REJECT	2
++#endif
++
+ #define XATTR_CREATE	0x1	/* set value, fail if attr already exists */
+ #define XATTR_REPLACE	0x2	/* set value, fail if attr does not exist */
+ 
 diff --git a/include/net/addrconf.h b/include/net/addrconf.h
 index 0f7c378..e2a9043 100644
 --- a/include/net/addrconf.h
@@ -31026,7 +33668,7 @@
  	printed = true;
  }
 diff --git a/init/main.c b/init/main.c
-index bc109c7..d7f4866 100644
+index bc109c7..d06cdc8 100644
 --- a/init/main.c
 +++ b/init/main.c
 @@ -70,6 +70,9 @@
@@ -31056,20 +33698,20 @@
  /*
   * Boot command-line arguments
   */
-@@ -516,6 +529,9 @@ asmlinkage void __init start_kernel(void)
+@@ -516,6 +529,8 @@ asmlinkage void __init start_kernel(void)
  
  	smp_setup_processor_id();
  
 +	prepare_ve0_process(&init_task);
-+	init_ve0();
 +
  	/*
  	 * Need to run as early as possible, to initialize the
  	 * lockdep hash:
-@@ -548,6 +564,7 @@ asmlinkage void __init start_kernel(void)
+@@ -548,6 +563,8 @@ asmlinkage void __init start_kernel(void)
  	setup_command_line(command_line);
  	setup_nr_cpu_ids();
  	setup_per_cpu_areas();
++	init_ve0();
 +	ub_init_early();
  	smp_prepare_boot_cpu();	/* arch-specific boot-cpu hooks */
  
@@ -32131,10 +34773,10 @@
 +obj-$(CONFIG_BC_IO_ACCOUNTING) += io_acct.o
 diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
 new file mode 100644
-index 0000000..6513257
+index 0000000..fdf3bb8
 --- /dev/null
 +++ b/kernel/bc/beancounter.c
-@@ -0,0 +1,688 @@
+@@ -0,0 +1,715 @@
 +/*
 + *  linux/kernel/bc/beancounter.c
 + *
@@ -32168,6 +34810,7 @@
 +#include <linux/module.h>
 +#include <linux/mm.h>
 +#include <linux/sched.h>
++#include <linux/random.h>
 +
 +#include <bc/beancounter.h>
 +#include <bc/hash.h>
@@ -32204,9 +34847,9 @@
 +	"dummy",
 +	"dummy",
 +	"numiptent",
++	"swappages",
 +	"unused_privvmpages",	/* UB_RESOURCES */
 +	"tmpfs_respages",
-+	"swap_pages",
 +	"held_pages",
 +};
 +
@@ -32309,6 +34952,25 @@
 +	return NULL;
 +}
 +
++int ub_count;
++
++/* next two must be called under ub_hash_lock */
++static inline void ub_count_inc(struct user_beancounter *ub)
++{
++	if (ub->parent)
++		ub->parent->ub_childs++;
++	else
++	       ub_count++;
++}
++
++static inline void ub_count_dec(struct user_beancounter *ub)
++{
++	if (ub->parent)
++		ub->parent->ub_childs--;
++	else
++		ub_count--;
++}
++
 +struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
 +{
 +	struct user_beancounter *new_ub, *ub;
@@ -32337,6 +34999,7 @@
 +	if (new_ub != NULL) {
 +		list_add_rcu(&new_ub->ub_list, &ub_list_head);
 +		hlist_add_head(&new_ub->ub_hash, hash);
++		ub_count_inc(new_ub);
 +		spin_unlock_irqrestore(&ub_hash_lock, flags);
 +		return new_ub;
 +	}
@@ -32382,6 +35045,7 @@
 +	if (new_ub != NULL) {
 +		list_add_rcu(&new_ub->ub_list, &ub_list_head);
 +		hlist_add_head(&new_ub->ub_hash, hash);
++		ub_count_inc(new_ub);
 +		spin_unlock_irqrestore(&ub_hash_lock, flags);
 +		return new_ub;
 +	}
@@ -32431,7 +35095,6 @@
 +
 +	clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
 +	clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
-+	clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
 +	clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
 +
 +	ub_debug_trace(!clean, 5, 60*HZ);
@@ -32460,6 +35123,7 @@
 +	}
 +
 +	hlist_del(&ub->ub_hash);
++	ub_count_dec(ub);
 +	list_del_rcu(&ub->ub_list);
 +	spin_unlock_irqrestore(&ub_hash_lock, flags);
 +
@@ -32716,6 +35380,7 @@
 +static void init_beancounter_struct(struct user_beancounter *ub)
 +{
 +	ub->ub_magic = UB_MAGIC;
++	ub->ub_cookie = get_random_int();
 +	atomic_set(&ub->ub_refcount, 1);
 +	spin_lock_init(&ub->ub_lock);
 +	INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
@@ -32775,6 +35440,7 @@
 +	ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
 +	ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
 +	ub->ub_parms[UB_NUMFILE].limit = 1024;
++	ub->ub_parms[UB_SWAPPAGES].limit = UB_MAXVALUE;
 +
 +	for (k = 0; k < UB_RESOURCES; k++)
 +		ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
@@ -32783,6 +35449,8 @@
 +	ub->ub_limit_rl.interval = 300*HZ;
 +}
 +
++static DEFINE_PER_CPU(struct ub_percpu_struct, ub0_percpu);
++
 +void __init ub_init_early(void)
 +{
 +	struct user_beancounter *ub;
@@ -32794,7 +35462,7 @@
 +	init_beancounter_nolimits(ub);
 +	init_beancounter_store(ub);
 +	init_beancounter_struct(ub);
-+	ub->ub_percpu = NULL;
++	ub->ub_percpu = &per_cpu__ub0_percpu;
 +
 +	memset(&current->task_bc, 0, sizeof(struct task_beancounter));
 +	(void)set_exec_ub(ub);
@@ -32806,6 +35474,7 @@
 +
 +	hlist_add_head(&ub->ub_hash, &ub_hash[ub->ub_uid]);
 +	list_add(&ub->ub_list, &ub_list_head);
++	ub_count_inc(ub);
 +}
 +
 +void __init ub_init_late(void)
@@ -33737,10 +36406,10 @@
 +#endif
 diff --git a/kernel/bc/kmem.c b/kernel/bc/kmem.c
 new file mode 100644
-index 0000000..74c4179
+index 0000000..7068e57
 --- /dev/null
 +++ b/kernel/bc/kmem.c
-@@ -0,0 +1,406 @@
+@@ -0,0 +1,405 @@
 +/*
 + *  kernel/bc/kmem.c
 + *
@@ -33911,16 +36580,15 @@
 +{
 +	struct user_beancounter *ub;
 +	struct ub_cache_counter *cc;
-+	long pages, vmpages, pbc;
++	long pages, vmpages;
 +	int i;
 +
 +	ub = seq_beancounter(f);
 +
-+	pages = vmpages = pbc = 0;
++	pages = vmpages = 0;
 +	for_each_online_cpu(i) {
 +		pages += per_cpu_ptr(ub->ub_percpu, i)->pages_charged;
 +		vmpages += per_cpu_ptr(ub->ub_percpu, i)->vmalloc_charged;
-+		pbc += per_cpu_ptr(ub->ub_percpu, i)->pbcs;
 +	}
 +	if (pages < 0)
 +		pages = 0;
@@ -33929,7 +36597,7 @@
 +
 +	seq_printf(f, bc_proc_lu_lu_fmt, "pages", pages, PAGE_SIZE);
 +	seq_printf(f, bc_proc_lu_lu_fmt, "vmalloced", vmpages, PAGE_SIZE);
-+	seq_printf(f, bc_proc_lu_lu_fmt, "pbcs", pbc,
++	seq_printf(f, bc_proc_lu_lu_fmt, "pbcs", ub->ub_pbcs,
 +			sizeof(struct page_beancounter));
 +
 +	spin_lock_irq(&cc_lock);
@@ -34149,10 +36817,10 @@
 +EXPORT_SYMBOL(mem_ub);
 diff --git a/kernel/bc/misc.c b/kernel/bc/misc.c
 new file mode 100644
-index 0000000..a47b355
+index 0000000..15e7aa4
 --- /dev/null
 +++ b/kernel/bc/misc.c
-@@ -0,0 +1,454 @@
+@@ -0,0 +1,460 @@
 +/*
 + *  kernel/bc/misc.c
 + *
@@ -34447,28 +37115,34 @@
 +	return err;
 +}
 +
++static inline int task_precharge_farnr(struct task_beancounter *task_bc)
++{
++       return (task_bc->file_precharged < (1UL << task_bc->file_quant));
++}
++
 +void ub_file_uncharge(struct file *f)
 +{
 +	struct user_beancounter *ub, *pub;
 +	struct task_beancounter *task_bc;
-+	unsigned long nr;
++	int nr;
 +
 +	ub = f->f_ub;
 +	task_bc = &current->task_bc;
 +	if (likely(ub == task_bc->task_ub)) {
 +		task_bc->file_precharged++;
 +		pub = top_beancounter(ub);
-+		if (ub_barrier_farnr(pub, UB_NUMFILE) &&
++		if (task_precharge_farnr(task_bc) &&
 +				ub_barrier_farsz(pub, UB_KMEMSIZE))
 +			return;
-+		if (task_bc->file_precharged < (1UL << task_bc->file_quant))
-+			return;
 +		nr = task_bc->file_precharged
 +			- (1UL << (task_bc->file_quant - 1));
-+		task_bc->file_precharged -= nr;
-+		__put_beancounter_batch(ub, nr);
-+		uncharge_beancounter(ub, UB_NUMFILE, nr);
-+		uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(nr));
++		if (nr > 0) {
++			task_bc->file_precharged -= nr;
++			__put_beancounter_batch(ub, nr);
++			uncharge_beancounter(ub, UB_NUMFILE, nr);
++			uncharge_beancounter(ub, UB_KMEMSIZE,
++					ub_file_kmemsize(nr));
++		}
 +	} else {
 +		uncharge_beancounter(ub, UB_NUMFILE, 1);
 +		uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(1));
@@ -35974,10 +38648,10 @@
 +EXPORT_SYMBOL(ub_out_of_memory);
 diff --git a/kernel/bc/proc.c b/kernel/bc/proc.c
 new file mode 100644
-index 0000000..4bfc03c
+index 0000000..dd96e38
 --- /dev/null
 +++ b/kernel/bc/proc.c
-@@ -0,0 +1,682 @@
+@@ -0,0 +1,703 @@
 +/*
 + *  kernel/bc/proc.c 
 + *
@@ -36265,7 +38939,7 @@
 +
 +	ret = 0xbc000000;
 +	if (ub->parent)
-+		ret |= ((ub->parent->ub_uid) << 4);
++		ret |= ((ub->parent->ub_uid + 1) << 4);
 +	ret |= (ub->ub_uid + 1);
 +	return ret;
 +}
@@ -36583,6 +39257,17 @@
 +	return bc_lookup(ub, dir, dentry);
 +}
 +
++static int bc_entry_getattr(struct vfsmount *mnt, struct dentry *dentry,
++		struct kstat *stat)
++{
++	struct user_beancounter *ub;
++
++	generic_fillattr(dentry->d_inode, stat);
++	ub = (struct user_beancounter *)dentry->d_fsdata;
++	stat->nlink = ub->ub_childs + 2;
++	return 0;
++}
++
 +static struct file_operations bc_entry_fops = {
 +	.read = generic_read_dir,
 +	.readdir = bc_entry_readdir,
@@ -36590,6 +39275,7 @@
 +
 +static struct inode_operations bc_entry_iops = {
 +	.lookup = bc_entry_lookup,
++	.getattr = bc_entry_getattr,
 +};
 +
 +/*
@@ -36627,6 +39313,14 @@
 +	return bc_lookup(ub, dir, dentry);
 +}
 +
++static int bc_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
++	struct kstat *stat)
++{
++	generic_fillattr(dentry->d_inode, stat);
++	stat->nlink = ub_count + 2;
++	return 0;
++}
++
 +static struct file_operations bc_root_fops = {
 +	.read = generic_read_dir,
 +	.readdir = bc_root_readdir,
@@ -36634,6 +39328,7 @@
 +
 +static struct inode_operations bc_root_iops = {
 +	.lookup = bc_root_lookup,
++	.getattr = bc_root_getattr,
 +};
 +
 +static int __init ub_init_proc(void)
@@ -36662,10 +39357,10 @@
 +core_initcall(ub_init_proc);
 diff --git a/kernel/bc/rss_pages.c b/kernel/bc/rss_pages.c
 new file mode 100644
-index 0000000..7b3d872
+index 0000000..2f64be5
 --- /dev/null
 +++ b/kernel/bc/rss_pages.c
-@@ -0,0 +1,438 @@
+@@ -0,0 +1,454 @@
 +/*
 + *  kernel/bc/rss_pages.c
 + *
@@ -36754,6 +39449,22 @@
 +}
 +
 +/*
++ * ++ and -- beyond are protected with pb_lock
++ */
++
++static inline void inc_pbc_count(struct user_beancounter *ub)
++{
++	for (; ub != NULL; ub = ub->parent)
++		ub->ub_pbcs++;
++}
++
++static inline void dec_pbc_count(struct user_beancounter *ub)
++{
++	for (; ub != NULL; ub = ub->parent)
++		ub->ub_pbcs--;
++}
++
++/*
 + * Alloc - free
 + */
 +
@@ -36865,7 +39576,7 @@
 +
 +static inline int pb_hash(struct user_beancounter *ub, struct page *page)
 +{
-+	return (page_to_pfn(page) + (ub->ub_uid << 10)) & pb_hash_mask;
++	return (page_to_pfn(page) ^ ub->ub_cookie) & pb_hash_mask;
 +}
 +
 +/* pb_lock should be held */
@@ -37565,10 +40276,10 @@
 +module_init(ubstatd_init);
 diff --git a/kernel/bc/sys.c b/kernel/bc/sys.c
 new file mode 100644
-index 0000000..a997944
+index 0000000..8fb942e
 --- /dev/null
 +++ b/kernel/bc/sys.c
-@@ -0,0 +1,176 @@
+@@ -0,0 +1,184 @@
 +/*
 + *  kernel/bc/sys.c
 + *
@@ -37724,18 +40435,26 @@
 +}
 +
 +#ifdef CONFIG_COMPAT
-+asmlinkage long compat_sys_setublimit(uid_t uid, int resource,
-+		unsigned int __user *limits)
++#define UB_MAXVALUE_COMPAT ((1UL << (sizeof(compat_long_t) * 8 - 1)) - 1)
++
++asmlinkage long compat_sys_setublimit(uid_t uid,
++		compat_long_t resource,
++		compat_long_t __user *limits)
 +{
-+	unsigned int u_new_limits[2];
++	compat_long_t u_new_limits[2];
 +	unsigned long new_limits[2];
 +
-+        if (copy_from_user(&u_new_limits, limits, sizeof(u_new_limits)))
-+                return -EFAULT;
++	if (copy_from_user(&u_new_limits, limits, sizeof(u_new_limits)))
++		return -EFAULT;
 +
 +	new_limits[0] = u_new_limits[0];
 +	new_limits[1] = u_new_limits[1];
 +
++	if (u_new_limits[0] == UB_MAXVALUE_COMPAT)
++		new_limits[0] = UB_MAXVALUE;
++	if (u_new_limits[1] == UB_MAXVALUE_COMPAT)
++		new_limits[1] = UB_MAXVALUE;
++
 +	return do_setublimit(uid, resource, new_limits);
 +}
 +
@@ -37747,10 +40466,10 @@
 +#endif
 diff --git a/kernel/bc/vm_pages.c b/kernel/bc/vm_pages.c
 new file mode 100644
-index 0000000..e98134b
+index 0000000..9b4ef0e
 --- /dev/null
 +++ b/kernel/bc/vm_pages.c
-@@ -0,0 +1,549 @@
+@@ -0,0 +1,546 @@
 +/*
 + *  kernel/bc/vm_pages.c
 + *
@@ -37858,7 +40577,8 @@
 +void __ub_update_oomguarpages(struct user_beancounter *ub)
 +{
 +	ub->ub_parms[UB_OOMGUARPAGES].held =
-+		ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
++		ub->ub_parms[UB_PHYSPAGES].held +
++		ub->ub_parms[UB_SWAPPAGES].held;
 +	ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
 +}
 +
@@ -38160,7 +40880,7 @@
 +	unsigned long flags;
 +
 +	spin_lock_irqsave(&ub->ub_lock, flags);
-+	ub->ub_swap_pages++;
++	__charge_beancounter_locked(ub, UB_SWAPPAGES, 1, UB_FORCE);
 +	__ub_update_oomguarpages(ub);
 +	spin_unlock_irqrestore(&ub->ub_lock, flags);
 +}
@@ -38179,10 +40899,7 @@
 +	unsigned long flags;
 +
 +	spin_lock_irqsave(&ub->ub_lock, flags);
-+	if (ub->ub_swap_pages <= 0)
-+		uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
-+	else
-+		ub->ub_swap_pages--;
++	__uncharge_beancounter_locked(ub, UB_SWAPPAGES, 1);
 +	__ub_update_oomguarpages(ub);
 +	spin_unlock_irqrestore(&ub->ub_lock, flags);
 +}
@@ -38280,8 +40997,7 @@
 +			ub->ub_unused_privvmpages);
 +	seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_TMPFSPAGES],
 +			ub->ub_tmpfs_respages);
-+	seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_SWAPPAGES],
-+			ub->ub_swap_pages);
++	seq_printf(f, bc_proc_lu_fmt, "rss", ub->ub_pbcs);
 +
 +	seq_printf(f, bc_proc_lu_fmt, "swapin", swap);
 +	seq_printf(f, bc_proc_lu_fmt, "unmap", unmap);
@@ -39015,7 +41731,7 @@
 +#endif
 diff --git a/kernel/cpt/cpt_context.c b/kernel/cpt/cpt_context.c
 new file mode 100644
-index 0000000..bfba186
+index 0000000..f095a73
 --- /dev/null
 +++ b/kernel/cpt/cpt_context.c
 @@ -0,0 +1,285 @@
@@ -39171,7 +41887,7 @@
 +	hdr.cpt_signature[2] = CPT_SIGNATURE2;
 +	hdr.cpt_signature[3] = CPT_SIGNATURE3;
 +	hdr.cpt_hdrlen = sizeof(hdr);
-+	hdr.cpt_image_version = CPT_VERSION_32;
++	hdr.cpt_image_version = CPT_CURRENT_VERSION;
 +#ifdef CONFIG_X86_64
 +	hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
 +#elif defined(CONFIG_X86_32)
@@ -39306,10 +42022,10 @@
 +}
 diff --git a/kernel/cpt/cpt_context.h b/kernel/cpt/cpt_context.h
 new file mode 100644
-index 0000000..e4f82f9
+index 0000000..9eb851a
 --- /dev/null
 +++ b/kernel/cpt/cpt_context.h
-@@ -0,0 +1,215 @@
+@@ -0,0 +1,225 @@
 +#include <linux/fs.h>
 +#include <asm/uaccess.h>
 +#include <bc/beancounter.h>
@@ -39415,6 +42131,16 @@
 +	   and restore them before resuming */
 +	struct ubparm	saved_ubc[UB_RESOURCES];
 +#endif
++
++	int		tcp_cb_convert;
++#define CPT_TCP_CB_CONV		1
++#define CPT_TCP_CB_NOT_CONV	2
++
++#define CPT_MAX_LINKDIRS	1
++	struct file	*linkdirs[CPT_MAX_LINKDIRS];
++	int		linkdirs_num;
++	unsigned int	linkcnt; /* for create hardlinked files */
++	int	hardlinked_on;
 +} cpt_context_t;
 +
 +typedef struct {
@@ -39527,10 +42253,10 @@
 +}
 diff --git a/kernel/cpt/cpt_dump.c b/kernel/cpt/cpt_dump.c
 new file mode 100644
-index 0000000..7a36b4e
+index 0000000..08ae5e6
 --- /dev/null
 +++ b/kernel/cpt/cpt_dump.c
-@@ -0,0 +1,1248 @@
+@@ -0,0 +1,1271 @@
 +/*
 + *
 + *  kernel/cpt/cpt_dump.c
@@ -40327,6 +43053,7 @@
 +	i->start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
 +
 +	i->last_pid = ve->ve_ns->pid_ns->last_pid;
++	i->rnd_va_space	= ve->_randomize_va_space + 1;
 +
 +	ctx->write(i, sizeof(*i), ctx);
 +	cpt_release_buf(ctx);
@@ -40692,8 +43419,10 @@
 +
 +		p.dentry = mnt->mnt_root;
 +		p.mnt = mnt;
++		spin_lock(&dcache_lock);
 +		path = __d_path(&p, &env->root_path,
 +				path_buf, PAGE_SIZE);
++		spin_unlock(&dcache_lock);
 +		if (IS_ERR(path))
 +			continue;
 +
@@ -40714,7 +43443,7 @@
 +	struct nsproxy *old_ns;
 +	struct mnt_namespace *n;
 +	int err;
-+	unsigned int flags = test_cpu_caps();
++	unsigned int flags = test_cpu_caps_and_features();
 +
 +	if (!ctx->ve_id)
 +		return -EINVAL;
@@ -40723,8 +43452,26 @@
 +	if (env == NULL)
 +		return -ESRCH;
 +
++	down_read(&env->op_sem);
++	err = -ESRCH;
++	if (!env->is_running) {
++		eprintk_ctx("CT is not running\n");
++		goto out_noenv;
++	}
++
++	err = -EBUSY;
++	if (env->is_locked) {
++		eprintk_ctx("CT is locked\n");
++		goto out_noenv;
++	}
++
 +	*caps = flags & (1<<CPT_CPU_X86_CMOV);
 +
++	if (flags & (1 << CPT_SLM_DMPRST)) {
++		eprintk_ctx("SLM is enabled, but slm_dmprst module is not loaded\n");
++		*caps |= (1 << CPT_SLM_DMPRST);
++	}
++
 +	old_env = set_exec_env(env);
 +	old_ns = current->nsproxy;
 +	current->nsproxy = env->ve_ns;
@@ -40775,6 +43522,8 @@
 +out:
 +	current->nsproxy = old_ns;
 +	set_exec_env(old_env);
++out_noenv:
++	up_read(&env->op_sem);
 +	put_ve(env);
 +
 +	return err;
@@ -40941,10 +43690,10 @@
 +EXPORT_SYMBOL(lookup_cpt_obj_bypos);
 diff --git a/kernel/cpt/cpt_files.c b/kernel/cpt/cpt_files.c
 new file mode 100644
-index 0000000..f013331
+index 0000000..3ada205
 --- /dev/null
 +++ b/kernel/cpt/cpt_files.c
-@@ -0,0 +1,1648 @@
+@@ -0,0 +1,1783 @@
 +/*
 + *
 + *  kernel/cpt/cpt_files.c
@@ -40973,6 +43722,7 @@
 +#include <linux/namei.h>
 +#include <linux/smp_lock.h>
 +#include <linux/pagemap.h>
++#include <linux/proc_fs.h>
 +#include <asm/uaccess.h>
 +#include <linux/vzcalluser.h>
 +#include <linux/ve_proto.h>
@@ -41020,15 +43770,29 @@
 +}
 +
 +int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
-+			 cpt_context_t *ctx)
++			 int verify, cpt_context_t *ctx)
 +{
++	if (d->d_inode->i_sb->s_magic == FSMAGIC_PROC &&
++	    proc_dentry_of_dead_task(d))
++		return 0;
++
 +	if (path[0] == '/' && !(!IS_ROOT(d) && d_unhashed(d))) {
 +		struct nameidata nd;
 +		if (path_lookup(path, 0, &nd)) {
 +			eprintk_ctx("d_path cannot be looked up %s\n", path);
 +			return -EINVAL;
 +		}
-+		if (nd.path.dentry != d || nd.path.mnt != mnt) {
++		if (nd.path.dentry != d || (verify && nd.path.mnt != mnt)) {
++			if (!strcmp(path, "/dev/null")) {
++				/*
++				 * epic kludge to workaround the case, when the
++				 * init opens a /dev/null and then udevd
++				 * overmounts the /dev with tmpfs
++				 */
++				path_put(&nd.path);
++				return 0;
++			}
++
 +			eprintk_ctx("d_path is invisible %s\n", path);
 +			path_put(&nd.path);
 +			return -EINVAL;
@@ -41090,7 +43854,7 @@
 +}
 +
 +static int cpt_dump_dentry(struct dentry *d, struct vfsmount *mnt,
-+			   int replaced, cpt_context_t *ctx)
++			   int replaced, int verify, cpt_context_t *ctx)
 +{
 +	int len;
 +	char *path;
@@ -41155,7 +43919,7 @@
 +		o.cpt_content = CPT_CONTENT_NAME;
 +		path[len] = 0;
 +
-+		if (cpt_verify_overmount(path, d, mnt, ctx)) {
++		if (cpt_verify_overmount(path, d, mnt, verify, ctx)) {
 +			__cpt_release_buf(ctx);
 +			return -EINVAL;
 +		}
@@ -41194,7 +43958,7 @@
 +static int
 +cpt_dump_filename(struct file *file, int replaced, cpt_context_t *ctx)
 +{
-+	return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, replaced, ctx);
++	return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, replaced, 1, ctx);
 +}
 +
 +int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
@@ -41435,25 +44199,33 @@
 +
 +	v->cpt_i_mode = sbuf.mode;
 +	v->cpt_lflags = 0;
++
++	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC) {
++		v->cpt_lflags |= CPT_DENTRY_PROC;
++		if (proc_dentry_of_dead_task(file->f_dentry))
++			v->cpt_lflags |= CPT_DENTRY_PROCPID_DEAD;
++	}
++
 +	if (IS_ROOT(file->f_dentry))
 +		v->cpt_lflags |= CPT_DENTRY_ROOT;
 +	else if (d_unhashed(file->f_dentry)) {
 +		if (cpt_replaced(file->f_dentry, file->f_vfsmnt, ctx)) {
 +			v->cpt_lflags |= CPT_DENTRY_REPLACED;
 +			replaced = 1;
-+		} else {
++		} else if (!(v->cpt_lflags & CPT_DENTRY_PROCPID_DEAD))
 +			v->cpt_lflags |= CPT_DENTRY_DELETED;
-+		}
 +	}
 +	if (is_cloning_inode(file->f_dentry->d_inode))
 +		v->cpt_lflags |= CPT_DENTRY_CLONING;
-+	if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC)
-+		v->cpt_lflags |= CPT_DENTRY_PROC;
++
 +	v->cpt_inode = CPT_NULL;
 +	if (!(v->cpt_lflags & CPT_DENTRY_REPLACED)) {
 +		iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
-+		if (iobj)
++		if (iobj) {
 +			v->cpt_inode = iobj->o_pos;
++			if (iobj->o_flags & CPT_INODE_HARDLINKED)
++				v->cpt_lflags |= CPT_DENTRY_HARDLINKED;
++		}
 +	}
 +	v->cpt_priv = CPT_NULL;
 +	v->cpt_fown_fd = -1;
@@ -41604,14 +44376,17 @@
 +
 +	if (!(file->f_mode & FMODE_READ) ||
 +	    (file->f_flags & O_DIRECT)) {
-+		file = dentry_open(dget(file->f_dentry),
-+				   mntget(file->f_vfsmnt), O_RDONLY,
++		struct file *filp;
++		filp = dentry_open(dget(file->f_dentry),
++				   mntget(file->f_vfsmnt),
++				   O_RDONLY | O_LARGEFILE,
 +				   NULL /* not checked */);
-+		if (IS_ERR(file)) {
++		if (IS_ERR(filp)) {
 +			cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
-+			eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(file));
-+			return PTR_ERR(file);
++			eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(filp));
++			return PTR_ERR(filp);
 +		}
++		file = filp;
 +	} else {
 +		atomic_long_inc(&file->f_count);
 +	}
@@ -41858,7 +44633,7 @@
 +	}
 +	spin_unlock(&dcache_lock);
 +	if (found) {
-+		err = cpt_dump_dentry(found, mnt, 0, ctx);
++		err = cpt_dump_dentry(found, mnt, 0, 1, ctx);
 +		dput(found);
 +		if (!err) {
 +			dprintk_ctx("dentry found in aliases\n");
@@ -41872,7 +44647,7 @@
 +		return -EINVAL;
 +
 +	mntget(mnt);
-+	f = dentry_open(de, mnt, O_RDONLY, NULL);
++	f = dentry_open(de, mnt, O_RDONLY | O_LARGEFILE, NULL);
 +	if (IS_ERR(f))
 +		return PTR_ERR(f);
 +
@@ -41897,7 +44672,7 @@
 +
 +	dprintk_ctx("dentry found in dir\n");
 +	__cpt_release_buf(ctx);
-+	err = cpt_dump_dentry(found, mnt, 0, ctx);
++	err = cpt_dump_dentry(found, mnt, 0, 1, ctx);
 +
 +err_lookup:
 +	dput(found);
@@ -41907,6 +44682,86 @@
 +	return err;
 +}
 +
++static struct dentry *find_linkdir(struct vfsmount *mnt, struct cpt_context *ctx)
++{
++	int i;
++
++	for (i = 0; i < ctx->linkdirs_num; i++)
++		if (ctx->linkdirs[i]->f_vfsmnt == mnt)
++			return ctx->linkdirs[i]->f_dentry;
++	return NULL;
++}
++
++struct dentry *cpt_fake_link(struct dentry *d, struct vfsmount *mnt,
++		struct inode *ino, struct cpt_context *ctx)
++{
++	int err;
++	int order = 8;
++	const char *prefix = ".cpt_hardlink.";
++	int preflen = strlen(prefix) + order;
++	char name[preflen + 1];
++	struct dentry *dirde, *hardde;
++
++	dirde = find_linkdir(mnt, ctx);
++	if (!dirde) {
++		err = -ENOENT;
++		goto out;
++	}
++
++	ctx->linkcnt++;
++	snprintf(name, sizeof(name), "%s%0*u", prefix, order, ctx->linkcnt);
++
++	mutex_lock(&dirde->d_inode->i_mutex);
++	hardde = lookup_one_len(name, dirde, strlen(name));
++	if (IS_ERR(hardde)) {
++		err = PTR_ERR(hardde);
++		goto out_unlock;
++	}
++
++	if (hardde->d_inode) {
++		/* Userspace should clean hardlinked files from previous
++		 * dump/undump
++		 */
++		eprintk_ctx("Hardlinked file already exists: %s\n", name);
++		err = -EEXIST;
++		goto out_put;
++	}
++
++	if (d == NULL)
++		err = vfs_create(dirde->d_inode, hardde, 0600, NULL);
++	else
++		err = vfs_link(d, dirde->d_inode, hardde);
++	if (err) {
++		eprintk_ctx("error hardlink %s, %d\n", name, err);
++		goto out_put;
++	}
++
++out_unlock:
++	mutex_unlock(&dirde->d_inode->i_mutex);
++out:
++	return err ? ERR_PTR(err) : hardde;
++
++out_put:
++	dput(hardde);
++	goto out_unlock;
++}
++
++static int create_dump_hardlink(struct dentry *d, struct vfsmount *mnt,
++				struct inode *ino, struct cpt_context *ctx)
++{
++	int err;
++	struct dentry *hardde;
++
++	hardde = cpt_fake_link(d, mnt, ino, ctx);
++	if (IS_ERR(hardde))
++		return PTR_ERR(hardde);
++
++	err = cpt_dump_dentry(hardde, mnt, 0, 1, ctx);
++	dput(hardde);
++
++	return err;
++}
++
 +static int dump_one_inode(struct file *file, struct dentry *d,
 +			  struct vfsmount *mnt, struct cpt_context *ctx)
 +{
@@ -41922,6 +44777,10 @@
 +	if (iobj->o_pos >= 0)
 +		return 0;
 +
++	if (ino->i_sb->s_magic == FSMAGIC_PROC &&
++	    proc_dentry_of_dead_task(d))
++		return 0;
++
 +	if ((!IS_ROOT(d) && d_unhashed(d)) &&
 +	    !cpt_replaced(d, mnt, ctx))
 +		dump_it = 1;
@@ -41948,6 +44807,14 @@
 +			 * process group. */
 +			if (ino->i_nlink != 0) {
 +				err = find_linked_dentry(d, mnt, ino, ctx);
++				if (err && S_ISREG(ino->i_mode)) {
++					err = create_dump_hardlink(d, mnt, ino, ctx);
++					iobj->o_flags |= CPT_INODE_HARDLINKED;
++				} else if (S_ISCHR(ino->i_mode) ||
++					   S_ISBLK(ino->i_mode) ||
++					   S_ISFIFO(ino->i_mode))
++					err = 0;
++
 +				if (err) {
 +					eprintk_ctx("deleted reference to existing inode, checkpointing is impossible: %d\n", err);
 +					return -EBUSY;
@@ -42305,6 +45172,7 @@
 +{
 +	int* pfd;
 +	char* path;
++	envid_t veid;
 +};
 +
 +static int dumptmpfs(void *arg)
@@ -42316,7 +45184,7 @@
 +	char *path = args->path;
 +	char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
 +
-+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
++	i = real_env_create(args->veid, VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
 +	if (i < 0) {
 +		eprintk("cannot enter ve to dump tmpfs\n");
 +		module_put(THIS_MODULE);
@@ -42363,16 +45231,20 @@
 +	int status;
 +	mm_segment_t oldfs;
 +	sigset_t ignore, blocked;
++	struct ve_struct *oldenv;
 +	
 +	err = sc_pipe(pfd);
 +	if (err < 0)
 +		return err;
 +	args.pfd = pfd;
 +	args.path = path;
++	args.veid = VEID(get_exec_env());
 +	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
 +	sigprocmask(SIG_BLOCK, &ignore, &blocked);
++	oldenv = set_exec_env(get_ve0());
 +	err = pid = local_kernel_thread(dumptmpfs, (void*)&args,
 +			SIGCHLD | CLONE_VFORK, 0);
++	set_exec_env(oldenv);
 +	if (err < 0) {
 +		eprintk_ctx("tmpfs local_kernel_thread: %d\n", err);
 +		goto out;
@@ -42454,7 +45326,7 @@
 +
 +	/* One special case: mount --bind /a /a */
 +	if (mnt->mnt_root == mnt->mnt_mountpoint)
-+		return cpt_dump_dentry(mnt->mnt_root, mnt, 0, ctx);
++		return cpt_dump_dentry(mnt->mnt_root, mnt, 0, 0, ctx);
 +
 +	list_for_each_prev(p, &mnt->mnt_list) {
 +		struct vfsmount * m;
@@ -42467,7 +45339,7 @@
 +		if (m->mnt_sb != mnt->mnt_sb)
 +			continue;
 +
-+		err = cpt_dump_dentry(mnt->mnt_root, m, 0, ctx);
++		err = cpt_dump_dentry(mnt->mnt_root, m, 0, 1, ctx);
 +		if (err == 0)
 +			break;
 +	}
@@ -42517,19 +45389,30 @@
 +	cpt_dump_string(path, ctx);
 +	cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
 +
-+	if (v.cpt_mntflags & CPT_MNT_BIND)
++	if (v.cpt_mntflags & CPT_MNT_BIND) {
 +		err = cpt_dump_bind_mnt(mnt, ctx);
-+	else if (!(v.cpt_mntflags & CPT_MNT_EXT) &&
-+		   strcmp(mnt->mnt_sb->s_type->name, "tmpfs") == 0) {
-+		mntget(mnt);
-+		up_read(&namespace_sem);
-+		err = cpt_dump_tmpfs(path, ctx);
-+		down_read(&namespace_sem);
-+		if (!err) {
-+			if (list_empty(&mnt->mnt_list))
++
++		/* Temporary solution for Ubuntu 8.04 */
++		if (err == -EINVAL && !strcmp(path, "/dev/.static/dev")) {
++			cpt_dump_string("/dev", ctx);
++			err = 0;
++		}
++	}
++	else if (!(v.cpt_mntflags & CPT_MNT_EXT)) {
++
++		if (mnt->mnt_sb->s_type->fs_flags & FS_REQUIRES_DEV) {
++			eprintk_ctx("Checkpoint supports only nodev fs: %s\n",
++				    mnt->mnt_sb->s_type->name);
++			err = -EXDEV;
++		} else if (!strcmp(mnt->mnt_sb->s_type->name, "tmpfs")) {
++			mntget(mnt);
++			up_read(&namespace_sem);
++			err = cpt_dump_tmpfs(path, ctx);
++			down_read(&namespace_sem);
++			if (!err && list_empty(&mnt->mnt_list))
 +				err = -EBUSY;
++			mntput(mnt);
 +		}
-+		mntput(mnt);
 +	}
 +
 +	cpt_pop_object(&saved_obj, ctx);
@@ -42547,7 +45430,7 @@
 +{
 +	struct mnt_namespace *n = obj->o_obj;
 +	struct cpt_object_hdr v;
-+	struct list_head *p;
++	struct vfsmount *rootmnt, *p;
 +	loff_t saved_obj;
 +	int err = 0;
 +
@@ -42563,8 +45446,9 @@
 +	cpt_push_object(&saved_obj, ctx);
 +
 +	down_read(&namespace_sem);
-+	list_for_each(p, &n->list) {
-+		err = dump_vfsmount(list_entry(p, struct vfsmount, mnt_list), ctx);
++	rootmnt = n->root;
++	for (p = rootmnt; p; p = next_mnt(p, rootmnt)) {
++		err = dump_vfsmount(p, ctx);
 +		if (err)
 +			break;
 +	}
@@ -42595,10 +45479,10 @@
 +}
 diff --git a/kernel/cpt/cpt_files.h b/kernel/cpt/cpt_files.h
 new file mode 100644
-index 0000000..e0ebd97
+index 0000000..bc66731
 --- /dev/null
 +++ b/kernel/cpt/cpt_files.h
-@@ -0,0 +1,73 @@
+@@ -0,0 +1,77 @@
 +int cpt_collect_files(cpt_context_t *);
 +int cpt_collect_fs(cpt_context_t *);
 +int cpt_collect_namespace(cpt_context_t *);
@@ -42619,6 +45503,7 @@
 +
 +struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
 +int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
++int rst_files_std(struct cpt_task_image *ti, struct cpt_context *ctx);
 +__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
 +int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
 +int rst_restore_fs(struct cpt_context *ctx);
@@ -42658,9 +45543,11 @@
 +			      unsigned flags,
 +			      struct cpt_context *ctx);
 +
++struct dentry *cpt_fake_link(struct dentry *d, struct vfsmount *mnt,
++		struct inode *ino, struct cpt_context *ctx);
 +
 +int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
-+			 cpt_context_t *ctx);
++			 int verify, cpt_context_t *ctx);
 +
 +#define check_one_vfsmount(mnt) \
 +	(strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 && \
@@ -42671,7 +45558,8 @@
 +	 strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 && \
 +	 strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 && \
 +	 strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 && \
-+	 strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0)
++	 strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0 && \
++	 strcmp(mnt->mnt_sb->s_type->name, "binfmt_misc") != 0)
 diff --git a/kernel/cpt/cpt_fsmagic.h b/kernel/cpt/cpt_fsmagic.h
 new file mode 100644
 index 0000000..7e79789
@@ -42697,10 +45585,10 @@
 +#define FSMAGIC_ANON	0x09041934
 diff --git a/kernel/cpt/cpt_inotify.c b/kernel/cpt/cpt_inotify.c
 new file mode 100644
-index 0000000..87f6bfd
+index 0000000..4f2abb0
 --- /dev/null
 +++ b/kernel/cpt/cpt_inotify.c
-@@ -0,0 +1,151 @@
+@@ -0,0 +1,174 @@
 +/*
 + *
 + *  kernel/cpt/cpt_inotify.c
@@ -42744,6 +45632,29 @@
 +#include "cpt_fsmagic.h"
 +#include "cpt_syscalls.h"
 +
++static int dump_watch_inode(struct path *path, cpt_context_t *ctx)
++{
++	int err;
++	struct dentry *d;
++
++	d = path->dentry;
++	if (IS_ROOT(d) || !d_unhashed(d))
++		goto dump_dir;
++
++	d = cpt_fake_link(d->d_inode->i_nlink ? d : NULL,
++			path->mnt, d->d_inode, ctx);
++
++	if (IS_ERR(d))
++		return PTR_ERR(d);
++
++dump_dir:
++	err = cpt_dump_dir(d, path->mnt, ctx);
++	if (d != path->dentry)
++		dput(d);
++
++	return err;
++}
++
 +static int cpt_dump_watches(struct fsnotify_group *g, struct cpt_context *ctx)
 +{
 +	int err = 0;
@@ -42783,7 +45694,7 @@
 +		path_get(&path);
 +		spin_unlock(&fse->lock);
 +
-+		err = cpt_dump_dir(path.dentry, path.mnt, ctx);
++		err = dump_watch_inode(&path, ctx);
 +		cpt_pop_object(&saved_obj, ctx);
 +		path_put(&path);
 +
@@ -42854,10 +45765,10 @@
 +}
 diff --git a/kernel/cpt/cpt_kernel.c b/kernel/cpt/cpt_kernel.c
 new file mode 100644
-index 0000000..3272d81
+index 0000000..10fa5d6
 --- /dev/null
 +++ b/kernel/cpt/cpt_kernel.c
-@@ -0,0 +1,178 @@
+@@ -0,0 +1,185 @@
 +/*
 + *
 + *  kernel/cpt/cpt_kernel.c
@@ -42880,6 +45791,8 @@
 +#include <asm/cpufeature.h>
 +#endif
 +#include <linux/cpt_image.h>
++#include <linux/virtinfo.h>
++#include <linux/virtinfoscp.h>
 +
 +#include "cpt_kernel.h"
 +#include "cpt_syscalls.h"
@@ -42952,7 +45865,9 @@
 +	}
 +	if (!try_module_get(THIS_MODULE))
 +		return -EBUSY;
-+	ret = asm_kernel_thread(fn, arg, flags, pid);
++	while ((ret = asm_kernel_thread(fn, arg, flags, pid)) ==
++							-ERESTARTNOINTR)
++		cond_resched();
 +	if (ret < 0)
 +		module_put(THIS_MODULE);
 +	return ret;
@@ -42981,7 +45896,7 @@
 +	return ret;
 +}
 +
-+unsigned int test_cpu_caps(void)
++unsigned int test_cpu_caps_and_features(void)
 +{
 +	unsigned int flags = 0;
 +
@@ -43023,6 +45938,9 @@
 +	flags |= 1 << CPT_CPU_X86_IA64;
 +	flags |= 1 << CPT_CPU_X86_FXSR;
 +#endif
++	if (virtinfo_notifier_call(VITYPE_SCP,
++				VIRTINFO_SCP_TEST, NULL) & NOTIFY_FAIL)
++		flags |= 1 << CPT_SLM_DMPRST;
 +	return flags;
 +}
 +
@@ -43038,7 +45956,7 @@
 +}
 diff --git a/kernel/cpt/cpt_kernel.h b/kernel/cpt/cpt_kernel.h
 new file mode 100644
-index 0000000..9254778
+index 0000000..8bbd402
 --- /dev/null
 +++ b/kernel/cpt/cpt_kernel.h
 @@ -0,0 +1,99 @@
@@ -43092,7 +46010,7 @@
 +static inline void vefs_track_notify(struct dentry *vdentry, int track_cow) { };
 +#endif
 +
-+unsigned int test_cpu_caps(void);
++unsigned int test_cpu_caps_and_features(void);
 +unsigned int test_kernel_config(void);
 +
 +#define test_one_flag_old(src, dst, flag, message, ret) \
@@ -44113,10 +47031,10 @@
 +extern struct vm_operations_struct special_mapping_vmops;
 diff --git a/kernel/cpt/cpt_net.c b/kernel/cpt/cpt_net.c
 new file mode 100644
-index 0000000..9e09675
+index 0000000..4e183ba
 --- /dev/null
 +++ b/kernel/cpt/cpt_net.c
-@@ -0,0 +1,544 @@
+@@ -0,0 +1,652 @@
 +/*
 + *
 + *  kernel/cpt/cpt_net.c
@@ -44514,13 +47432,20 @@
 +	return err;
 +}
 +
++struct args_t
++{
++	int* pfd;
++	envid_t veid;
++};
++
 +static int dumpfn(void *arg)
 +{
 +	int i;
-+	int *pfd = arg;
++	struct args_t *args = arg;
++	int *pfd = args->pfd;
 +	char *argv[] = { "iptables-save", "-c", NULL };
 +
-+	i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
++	i = real_env_create(args->veid, VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
 +	if (i < 0) {
 +		eprintk("cannot enter ve to dump iptables\n");
 +		module_put(THIS_MODULE);
@@ -44560,6 +47485,8 @@
 +	int status;
 +	mm_segment_t oldfs;
 +	sigset_t ignore, blocked;
++	struct args_t args;
++	struct ve_struct *oldenv;
 +
 +	if (!(get_exec_env()->_iptables_modules & VE_IP_IPTABLES_MOD))
 +		return 0;
@@ -44569,9 +47496,14 @@
 +		eprintk_ctx("sc_pipe: %d\n", err);
 +		return err;
 +	}
++	args.pfd = pfd;
++	args.veid = VEID(get_exec_env());
 +	ignore.sig[0] = CPT_SIG_IGNORE_MASK;
 +	sigprocmask(SIG_BLOCK, &ignore, &blocked);
-+	err = pid = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
++	oldenv = set_exec_env(get_ve0());
++	err = pid = local_kernel_thread(dumpfn, (void*)&args,
++			SIGCHLD | CLONE_VFORK, 0);
++	set_exec_env(oldenv);
 +	if (err < 0) {
 +		eprintk_ctx("local_kernel_thread: %d\n", err);
 +		goto out;
@@ -44646,6 +47578,98 @@
 +	return err;
 +}
 +
++static unsigned long fold_field(void *mib[], int offt)
++{
++	unsigned long res = 0;
++	int i;
++
++	for_each_possible_cpu(i) {
++		res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
++		res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
++	}
++	return res;
++}
++
++static void cpt_dump_snmp_stat(struct cpt_context *ctx, void *mib[], int n)
++{
++	int i;
++	struct cpt_object_hdr o;
++	__u32 *stats;
++
++	stats = cpt_get_buf(ctx);
++
++	cpt_open_object(NULL, ctx);
++
++	for (i = 0; i < n; i++)
++		stats[i] = fold_field(mib, i);
++
++ 	o.cpt_next = CPT_NULL;
++	o.cpt_object = CPT_OBJ_BITS;
++	o.cpt_hdrlen = sizeof(o);
++	o.cpt_content = CPT_CONTENT_DATA;
++
++	ctx->write(&o, sizeof(o), ctx);
++	ctx->write(stats, n * sizeof(*stats), ctx);
++	ctx->align(ctx);
++
++	cpt_close_object(ctx);
++
++	cpt_release_buf(ctx);
++}
++
++static void cpt_dump_snmp_stub(struct cpt_context *ctx)
++{
++	struct cpt_object_hdr o;
++
++	cpt_open_object(NULL, ctx);
++ 	o.cpt_next = CPT_NULL;
++	o.cpt_object = CPT_OBJ_BITS;
++	o.cpt_hdrlen = sizeof(o);
++	o.cpt_content = CPT_CONTENT_VOID;
++	ctx->write(&o, sizeof(o), ctx);
++	ctx->align(ctx);
++	cpt_close_object(ctx);
++}
++
++static int cpt_dump_snmp(struct cpt_context *ctx)
++{
++	struct ve_struct *ve;
++	struct net *net;
++
++	ve = get_exec_env();
++	net = ve->ve_netns;
++
++	cpt_open_section(ctx, CPT_SECT_SNMP_STATS);
++
++	cpt_dump_snmp_stat(ctx, (void **)&net->mib.net_statistics,
++				LINUX_MIB_MAX);
++	cpt_dump_snmp_stat(ctx, (void **)&net->mib.ip_statistics,
++				IPSTATS_MIB_MAX);
++	cpt_dump_snmp_stat(ctx, (void **)&net->mib.tcp_statistics,
++				TCP_MIB_MAX);
++	cpt_dump_snmp_stat(ctx, (void **)&net->mib.udp_statistics,
++				UDP_MIB_MAX);
++	cpt_dump_snmp_stat(ctx, (void **)&net->mib.icmp_statistics,
++				ICMP_MIB_MAX);
++	cpt_dump_snmp_stat(ctx, (void **)&net->mib.icmpmsg_statistics,
++				ICMPMSG_MIB_MAX);
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	cpt_dump_snmp_stat(ctx, (void **)&ve->_ipv6_statistics,
++				IPSTATS_MIB_MAX);
++	cpt_dump_snmp_stat(ctx, (void **)&ve->_udp_stats_in6,
++				UDP_MIB_MAX);
++	cpt_dump_snmp_stat(ctx, (void **)&ve->_icmpv6_statistics,
++				ICMP6_MIB_MAX);
++#else
++	cpt_dump_snmp_stub(ctx);
++	cpt_dump_snmp_stub(ctx);
++	cpt_dump_snmp_stub(ctx);
++#endif
++	cpt_close_section(ctx);
++
++	return 0;
++}
++
 +int cpt_dump_ifinfo(struct cpt_context * ctx)
 +{
 +	int err;
@@ -44659,6 +47683,8 @@
 +		err = cpt_dump_route(ctx);
 +	if (!err)
 +		err = cpt_dump_iptables(ctx);
++	if (!err)
++		err = cpt_dump_snmp(ctx);
 +	return err;
 +}
 diff --git a/kernel/cpt/cpt_net.h b/kernel/cpt/cpt_net.h
@@ -44676,10 +47702,10 @@
 +int rst_restore_ip_conntrack(struct cpt_context * ctx);
 diff --git a/kernel/cpt/cpt_obj.c b/kernel/cpt/cpt_obj.c
 new file mode 100644
-index 0000000..7ab23d7
+index 0000000..341d2ab
 --- /dev/null
 +++ b/kernel/cpt/cpt_obj.c
-@@ -0,0 +1,162 @@
+@@ -0,0 +1,163 @@
 +/*
 + *
 + *  kernel/cpt/cpt_obj.c
@@ -44720,6 +47746,7 @@
 +		obj->o_index = CPT_NOINDEX;
 +		obj->o_obj = NULL;
 +		obj->o_image = NULL;
++		obj->o_flags = 0;
 +		ctx->objcount++;
 +	}
 +	return obj;
@@ -44844,10 +47871,10 @@
 +}
 diff --git a/kernel/cpt/cpt_obj.h b/kernel/cpt/cpt_obj.h
 new file mode 100644
-index 0000000..7762623
+index 0000000..2dca39b
 --- /dev/null
 +++ b/kernel/cpt/cpt_obj.h
-@@ -0,0 +1,62 @@
+@@ -0,0 +1,64 @@
 +#ifndef __CPT_OBJ_H_
 +#define __CPT_OBJ_H_ 1
 +
@@ -44867,6 +47894,8 @@
 +	void			*o_image;
 +	void			*o_parent;
 +	struct list_head	o_alist;
++	unsigned int		o_flags;
++#define CPT_INODE_HARDLINKED	0x1
 +} cpt_object_t;
 +
 +struct cpt_context;
@@ -44912,10 +47941,10 @@
 +#endif /* __CPT_OBJ_H_ */
 diff --git a/kernel/cpt/cpt_proc.c b/kernel/cpt/cpt_proc.c
 new file mode 100644
-index 0000000..918fe2a
+index 0000000..a7d2d82
 --- /dev/null
 +++ b/kernel/cpt/cpt_proc.c
-@@ -0,0 +1,594 @@
+@@ -0,0 +1,623 @@
 +/*
 + *
 + *  kernel/cpt/cpt_proc.c
@@ -45001,6 +48030,8 @@
 +
 +void cpt_context_release(cpt_context_t *ctx)
 +{
++	int i;
++
 +	list_del(&ctx->ctx_list);
 +	spin_unlock(&cpt_context_lock);
 +
@@ -45027,6 +48058,8 @@
 +		fput(ctx->errorfile);
 +		ctx->errorfile = NULL;
 +	}
++	for (i = 0; i < ctx->linkdirs_num; i++)
++		fput(ctx->linkdirs[i]);
 +	if (ctx->error_msg) {
 +		free_page((unsigned long)ctx->error_msg);
 +		ctx->error_msg = NULL;
@@ -45122,7 +48155,7 @@
 +		unsigned int src_flags, dst_flags = arg;
 +
 +		err = 0;
-+		src_flags = test_cpu_caps();
++		src_flags = test_cpu_caps_and_features();
 +		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
 +		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
 +		test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
@@ -45244,6 +48277,26 @@
 +			fput(ctx->file);
 +		ctx->file = dfile;
 +		break;
++	case CPT_LINKDIR_ADD:
++		if (ctx->linkdirs_num >= CPT_MAX_LINKDIRS) {
++			err = -EMLINK;
++			break;
++		}
++
++		dfile = fget(arg);
++		if (!dfile) {
++			err = -EBADFD;
++			break;
++		}
++
++		if (!S_ISDIR(dfile->f_dentry->d_inode->i_mode)) {
++			err = -ENOTDIR;
++			fput(dfile);
++			break;
++		}
++
++		ctx->linkdirs[ctx->linkdirs_num++] = dfile;
++		break;
 +	case CPT_SET_ERRORFD:
 +		if (arg >= 0) {
 +			dfile = fget(arg);
@@ -45304,7 +48357,7 @@
 +			break;
 +		}
 +		ctx->dst_cpu_flags = arg;
-+		ctx->src_cpu_flags = test_cpu_caps();
++		ctx->src_cpu_flags = test_cpu_caps_and_features();
 +		break;
 +	case CPT_SUSPEND:
 +		if (cpt_context_lookup_veid(ctx->ve_id) ||
@@ -45378,6 +48431,11 @@
 +		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
 +		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL, "syscall", err);
 +		test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL32, "syscall32", err);
++		if (dst_flags & (1 << CPT_SLM_DMPRST)) {
++			eprintk_ctx("SLM is enabled on destination node, but slm_dmprst module is not loaded\n");
++			err = 1;
++		}
++
 +		if (src_flags & CPT_UNSUPPORTED_MASK)
 +			err = 2;
 +		break;
@@ -45512,10 +48570,10 @@
 +module_exit(exit_cpt);
 diff --git a/kernel/cpt/cpt_process.c b/kernel/cpt/cpt_process.c
 new file mode 100644
-index 0000000..2afc171
+index 0000000..6314bee
 --- /dev/null
 +++ b/kernel/cpt/cpt_process.c
-@@ -0,0 +1,1383 @@
+@@ -0,0 +1,1379 @@
 +/*
 + *
 + *  kernel/cpt/cpt_process.c
@@ -46241,10 +49299,6 @@
 +
 +int cpt_check_unsupported(struct task_struct *tsk, cpt_context_t *ctx)
 +{
-+	if (tsk->splice_pipe) {
-+		eprintk_ctx("splice is used by " CPT_FID "\n", CPT_TID(tsk));
-+		return -EBUSY;
-+	}
 +#ifdef CONFIG_KEYS
 +	if (tsk->cred->request_key_auth || tsk->cred->thread_keyring) {
 +		eprintk_ctx("keys are used by " CPT_FID "\n", CPT_TID(tsk));
@@ -46321,7 +49375,7 @@
 +		return -EBUSY;
 +	}
 +
-+	v->cpt_flags = tsk->flags&~(PF_FROZEN|PF_EXIT_RESTART);
++	v->cpt_flags = tsk->flags & CPT_TASK_FLAGS_MASK;
 +	v->cpt_ptrace = tsk->ptrace;
 +	v->cpt_prio = tsk->prio;
 +	v->cpt_exit_code = tsk->exit_code;
@@ -46920,10 +49974,10 @@
 +struct pid *alloc_vpid_safe(pid_t vnr);
 diff --git a/kernel/cpt/cpt_socket.c b/kernel/cpt/cpt_socket.c
 new file mode 100644
-index 0000000..939fb30
+index 0000000..3943b60
 --- /dev/null
 +++ b/kernel/cpt/cpt_socket.c
-@@ -0,0 +1,790 @@
+@@ -0,0 +1,802 @@
 +/*
 + *
 + *  kernel/cpt/cpt_socket.c
@@ -47105,7 +50159,7 @@
 +}
 +
 +int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
-+		 struct cpt_context *ctx)
++		 struct sock *sk, struct cpt_context *ctx)
 +{
 +	struct cpt_skb_image *v = cpt_get_buf(ctx);
 +	loff_t saved_obj;
@@ -47129,7 +50183,19 @@
 +	v->cpt_nh = skb_network_header(skb) - skb->head;
 +	v->cpt_mac = skb_mac_header(skb) - skb->head;
 +	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(v->cpt_cb));
-+	memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
++	memset(v->cpt_cb, 0, sizeof(v->cpt_cb));
++#if !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE)
++	if (sk->sk_protocol == IPPROTO_TCP) {
++		/* Save control block according to tcp_skb_cb with IPv6 */
++		BUG_ON(sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm) >
++		       sizeof(v->cpt_cb) - sizeof(struct inet6_skb_parm));
++		memcpy(v->cpt_cb, skb->cb, sizeof(struct inet_skb_parm));
++		memcpy((void *)v->cpt_cb + sizeof(struct inet6_skb_parm),
++		       skb->cb + sizeof(struct inet_skb_parm),
++		       sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
++	} else
++#endif
++		memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
 +	if (sizeof(skb->cb) > sizeof(v->cpt_cb)) {
 +		int i;
 +		for (i=sizeof(v->cpt_cb); i<sizeof(skb->cb); i++) {
@@ -47256,7 +50322,7 @@
 +			}
 +		}
 +
-+		err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, ctx);
++		err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, sk, ctx);
 +		if (err)
 +			return err;
 +
@@ -47274,7 +50340,7 @@
 +
 +	skb = skb_peek(&sk->sk_write_queue);
 +	while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
-+		int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, ctx);
++		int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, sk, ctx);
 +		if (err)
 +			return err;
 +
@@ -47438,7 +50504,7 @@
 +					} else {
 +						wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
 +					}
-+					err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, ctx);
++					err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, 1, ctx);
 +				} else {
 +					eprintk_ctx("cannot get path of an af_unix socket\n");
 +					err = PTR_ERR(path);
@@ -47716,10 +50782,10 @@
 +}
 diff --git a/kernel/cpt/cpt_socket.h b/kernel/cpt/cpt_socket.h
 new file mode 100644
-index 0000000..6489184
+index 0000000..9c64399
 --- /dev/null
 +++ b/kernel/cpt/cpt_socket.h
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,37 @@
 +struct sock;
 +
 +int cpt_collect_passedfds(cpt_context_t *);
@@ -47733,7 +50799,8 @@
 +int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
 +
 +int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx);
-+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx);
++struct sk_buff * rst_skb(struct sock *sk, loff_t *pos_p, __u32 *owner,
++			 __u32 *queue, struct cpt_context *ctx);
 +
 +void cpt_unlock_sockets(cpt_context_t *);
 +void cpt_kill_sockets(cpt_context_t *);
@@ -47742,11 +50809,14 @@
 +int cpt_kill_socket(struct sock *, cpt_context_t *);
 +int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
 +int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
++int rst_listen_socket_in(struct sock *sk, struct cpt_sock_image *si,
++			 loff_t pos, struct cpt_context *ctx);
 +__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
 +int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
 +int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
 +int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
-+int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct cpt_context *ctx);
++int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct sock *sk,
++		 struct cpt_context *ctx);
 +int cpt_dump_mcfilter(struct sock *sk, struct cpt_context *ctx);
 +
 +int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
@@ -47755,7 +50825,7 @@
 +			loff_t pos, cpt_context_t *ctx);
 diff --git a/kernel/cpt/cpt_socket_in.c b/kernel/cpt/cpt_socket_in.c
 new file mode 100644
-index 0000000..9c25d70
+index 0000000..d565745
 --- /dev/null
 +++ b/kernel/cpt/cpt_socket_in.c
 @@ -0,0 +1,448 @@
@@ -47820,7 +50890,7 @@
 +	while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
 +		int err;
 +
-+		err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, ctx);
++		err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, sk, ctx);
 +		if (err)
 +			return err;
 +
@@ -48073,7 +51143,7 @@
 +	v->cpt_snt_isn = tcp_rsk(req)->snt_isn;
 +	v->cpt_rmt_port = inet_rsk(req)->rmt_port;
 +	v->cpt_mss = req->mss;
-+	// // v->cpt_family = (req->class == &or_ipv4 ? AF_INET : AF_INET6);
++	v->cpt_family = req->rsk_ops->family;
 +	v->cpt_retrans = req->retrans;
 +	v->cpt_snd_wscale = inet_rsk(req)->snd_wscale;
 +	v->cpt_rcv_wscale = inet_rsk(req)->rcv_wscale;
@@ -48946,10 +52016,10 @@
 +}
 diff --git a/kernel/cpt/cpt_ubc.c b/kernel/cpt/cpt_ubc.c
 new file mode 100644
-index 0000000..5746184
+index 0000000..0fc4f5f
 --- /dev/null
 +++ b/kernel/cpt/cpt_ubc.c
-@@ -0,0 +1,133 @@
+@@ -0,0 +1,135 @@
 +/*
 + *
 + *  kernel/cpt/cpt_ubc.c
@@ -49020,13 +52090,15 @@
 +	v->cpt_next = CPT_NULL;
 +	v->cpt_object = CPT_OBJ_UBC;
 +	v->cpt_hdrlen = sizeof(*v);
-+	v->cpt_content = CPT_CONTENT_VOID;
++	v->cpt_content = CPT_CONTENT_ARRAY;
 +
 +	if (obj->o_parent != NULL)
 +		v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
 +	else
 +		v->cpt_parent = CPT_NULL;
 +	v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
++	v->cpt_ub_resources = UB_RESOURCES;
++	BUILD_BUG_ON(ARRAY_SIZE(v->cpt_parms) < UB_RESOURCES * 2);
 +	for (i = 0; i < UB_RESOURCES; i++) {
 +		dump_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
 +		dump_one_bc_parm(v->cpt_parms + i * 2 + 1, bc->ub_store + i, 1);
@@ -49187,10 +52259,10 @@
 +
 diff --git a/kernel/cpt/rst_conntrack.c b/kernel/cpt/rst_conntrack.c
 new file mode 100644
-index 0000000..4c31f32
+index 0000000..b863ac4
 --- /dev/null
 +++ b/kernel/cpt/rst_conntrack.c
-@@ -0,0 +1,283 @@
+@@ -0,0 +1,328 @@
 +/*
 + *
 + *  kernel/cpt/rst_conntrack.c
@@ -49249,17 +52321,33 @@
 +	int index;
 +};
 +
-+static void decode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple, int dir)
++static int decode_tuple(struct cpt_ipct_tuple *v,
++			 struct ip_conntrack_tuple *tuple, int dir,
++			 cpt_context_t *ctx)
 +{
 +	tuple->dst.ip = v->cpt_dst;
 +	tuple->dst.u.all = v->cpt_dstport;
-+	tuple->dst.protonum = v->cpt_protonum;
-+	tuple->dst.dir = v->cpt_dir;
-+	if (dir != tuple->dst.dir)
-+		wprintk("dir != tuple->dst.dir\n");
++	if (ctx->image_version < CPT_VERSION_16) {
++		/* In 2.6.9 kernel protonum has short type */
++		__u16 protonum = *(__u16 *)&v->cpt_protonum;
++		if (protonum > 0xff && protonum < 0xffff) {
++			eprintk_ctx("tuple: protonum > 255: %u\n", protonum);
++			return -EINVAL;
++		}
++		tuple->dst.protonum = protonum;
++		tuple->dst.dir = dir;
++	} else {
++		tuple->dst.protonum = v->cpt_protonum;
++		tuple->dst.dir = v->cpt_dir;
++		if (dir != tuple->dst.dir) {
++			eprintk_ctx("dir != tuple->dst.dir\n");
++			return -EINVAL;
++		}
++	}
 +
 +	tuple->src.ip = v->cpt_src;
 +	tuple->src.u.all = v->cpt_srcport;
++	return 0;
 +}
 +
 +
@@ -49314,16 +52402,13 @@
 +			return -ENOMEM;
 +		}
 +
-+		if (ct->helper->timeout && !del_timer(&exp->timeout)) {
-+			/* Dying already. We can do nothing. */
++		if (decode_tuple(&v.cpt_tuple, &exp->tuple, 0, ctx) ||
++		    decode_tuple(&v.cpt_mask, &exp->mask, 0, ctx)) {
++			ip_conntrack_expect_put(exp);
 +			write_unlock_bh(&ip_conntrack_lock);
-+			dprintk_ctx("conntrack expectation is dying\n");
-+			continue;
++			return -EINVAL;
 +		}
 +
-+		decode_tuple(&v.cpt_tuple, &exp->tuple, 0);
-+		decode_tuple(&v.cpt_mask, &exp->mask, 0);
-+
 +		exp->master = ct;
 +		nf_conntrack_get(&ct->ct_general);
 +		ip_conntrack_expect_insert(exp);
@@ -49337,11 +52422,12 @@
 +		} else
 +#endif
 +		if (ct->helper->timeout) {
-+			exp->timeout.expires = jiffies + v.cpt_timeout;
-+			add_timer(&exp->timeout);
++			mod_timer(&exp->timeout, jiffies + v.cpt_timeout);
 +		}
 +		write_unlock_bh(&ip_conntrack_lock);
 +
++		ip_conntrack_expect_put(exp);
++
 +		pos += v.cpt_next;
 +	}
 +	return 0;
@@ -49359,8 +52445,11 @@
 +	if (c == NULL)
 +		return -ENOMEM;
 +
-+	decode_tuple(&ci->cpt_tuple[0], &orig, 0);
-+	decode_tuple(&ci->cpt_tuple[1], &repl, 1);
++	if (decode_tuple(&ci->cpt_tuple[0], &orig, 0, ctx) ||
++	    decode_tuple(&ci->cpt_tuple[1], &repl, 1, ctx)) {
++		kfree(c);
++		return -EINVAL;
++	}
 +
 +	conntrack = ip_conntrack_alloc(&orig, &repl, get_exec_env()->_ip_conntrack->ub);
 +	if (!conntrack || IS_ERR(conntrack)) {
@@ -49373,14 +52462,15 @@
 +	*ct_list = c;
 +	c->index = ci->cpt_index;
 +
-+	decode_tuple(&ci->cpt_tuple[0], &conntrack->tuplehash[0].tuple, 0);
-+	decode_tuple(&ci->cpt_tuple[1], &conntrack->tuplehash[1].tuple, 1);
-+
 +	conntrack->status = ci->cpt_status;
 +
 +	memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
 +	memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
 +
++#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
++	conntrack->mark = ci->cpt_mark;
++#endif
++
 +#ifdef CONFIG_IP_NF_NAT_NEEDED
 +#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
 +	defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
@@ -49412,9 +52502,34 @@
 +	if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
 +		err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
 +
++	if (conntrack->helper)
++		ip_conntrack_helper_put(conntrack->helper);
++
 +	return err;
 +}
 +
++static void convert_conntrack_image(struct cpt_ip_conntrack_image *ci)
++{
++	struct cpt_ip_conntrack_image_compat img;
++
++	memcpy(&img, ci, sizeof(struct cpt_ip_conntrack_image_compat));
++	/* 
++	 * Size of cpt_help_data in 2.6.9 kernel is 16 bytes,
++	 * in 2.6.18 cpt_help_data size is 24 bytes, so zero the rest 8 bytes
++	 */
++	memset(ci->cpt_help_data + 4, 0, 8);
++	ci->cpt_initialized = img.cpt_initialized;
++	ci->cpt_num_manips = img.cpt_num_manips;
++	memcpy(ci->cpt_nat_manips, img.cpt_nat_manips, sizeof(img.cpt_nat_manips));
++	memcpy(ci->cpt_nat_seq, img.cpt_nat_seq, sizeof(img.cpt_nat_seq));
++	ci->cpt_masq_index = img.cpt_masq_index;
++	/* Id will be assigned in ip_conntrack_hash_insert(), so make it 0 here */
++	ci->cpt_id = 0;
++	/* mark was not supported in 2.6.9, so set it to default 0 value */
++	ci->cpt_mark = 0;
++
++}
++
 +int rst_restore_ip_conntrack(struct cpt_context * ctx)
 +{
 +	int err = 0;
@@ -49445,6 +52560,8 @@
 +		err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
 +		if (err)
 +			break;
++		if (ctx->image_version < CPT_VERSION_16)
++			convert_conntrack_image(&ci);
 +		err = undump_one_ct(&ci, sec, &ct_list, ctx);
 +		if (err)
 +			break;
@@ -49476,10 +52593,10 @@
 +#endif
 diff --git a/kernel/cpt/rst_context.c b/kernel/cpt/rst_context.c
 new file mode 100644
-index 0000000..c68e807
+index 0000000..0007197
 --- /dev/null
 +++ b/kernel/cpt/rst_context.c
-@@ -0,0 +1,330 @@
+@@ -0,0 +1,331 @@
 +/*
 + *
 + *  kernel/cpt/rst_context.c
@@ -49662,8 +52779,9 @@
 +	ctx->start_time.tv_nsec = h.cpt_start_nsec;
 +	ctx->kernel_config_flags = h.cpt_kernel_config[0];
 +	ctx->iptables_mask = h.cpt_iptables_mask;
-+	if (h.cpt_image_version > CPT_VERSION_32 ||
-+			CPT_VERSION_MINOR(h.cpt_image_version) > 1) {
++	if (h.cpt_image_version > CPT_CURRENT_VERSION ||
++			CPT_VERSION_MINOR(h.cpt_image_version) >
++			CPT_VERSION_MINOR(CPT_CURRENT_VERSION)) {
 +		eprintk_ctx("Unknown image version: %x. Can't restore.\n",
 +				h.cpt_image_version);
 +		err = -EINVAL;
@@ -49987,10 +53105,10 @@
 +}
 diff --git a/kernel/cpt/rst_files.c b/kernel/cpt/rst_files.c
 new file mode 100644
-index 0000000..4b21b04
+index 0000000..a84e3d3
 --- /dev/null
 +++ b/kernel/cpt/rst_files.c
-@@ -0,0 +1,1698 @@
+@@ -0,0 +1,1779 @@
 +/*
 + *
 + *  kernel/cpt/rst_files.c
@@ -50030,6 +53148,7 @@
 +#include <linux/fdtable.h>
 +#include <linux/shm.h>
 +#include <linux/signalfd.h>
++#include <linux/proc_fs.h>
 +
 +#include "cpt_obj.h"
 +#include "cpt_context.h"
@@ -50523,7 +53642,7 @@
 +				fput(file);
 +				file = dentry_open(dget(file->f_dentry),
 +						   mntget(file->f_vfsmnt),
-+						   O_WRONLY, NULL);
++						   O_WRONLY | O_LARGEFILE, NULL);
 +				if (IS_ERR(file)) {
 +					__cpt_release_buf(ctx);
 +					return PTR_ERR(file);
@@ -50825,6 +53944,7 @@
 +	struct cpt_file_image fi;
 +	__u8 *name = NULL;
 +	struct file *file;
++	struct proc_dir_entry *proc_dead_file;
 +	int flags;
 +
 +	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
@@ -50896,6 +54016,12 @@
 +					err = -EINVAL;
 +					goto err_out;
 +				}
++				if ((fi.cpt_lflags & CPT_DENTRY_HARDLINKED) &&
++				    !ctx->hardlinked_on) {
++					eprintk_ctx("Open hardlinked is off\n");
++					err = -EPERM;
++					goto err_out;
++				}
 +				goto open_file;
 +			}
 +		}
@@ -50963,8 +54089,32 @@
 +			goto map_file;
 +	}
 +
++	/* This hook is needed to open file /proc/<pid>/<somefile>
++	 * but there is no proccess with pid <pid>.
++	 */
++	proc_dead_file = NULL;
++	if (fi.cpt_lflags & CPT_DENTRY_PROCPID_DEAD) {
++		sprintf(name, "/proc/rst_dead_pid_file_%d", task_pid_vnr(current));
++
++		proc_dead_file = create_proc_entry(name + 6, S_IRUGO|S_IWUGO,
++						   NULL);
++		if (!proc_dead_file) {
++			eprintk_ctx("can't create proc entry %s\n", name);
++			err = -ENOMEM;
++			goto err_out;
++		}
++#ifdef CONFIG_PROC_FS
++		proc_dead_file->proc_fops = &dummy_proc_pid_file_operations;
++#endif
++	}
++
 +	file = filp_open(name, flags, 0);
 +
++	if (proc_dead_file) {
++		remove_proc_entry(proc_dead_file->name, NULL);
++		if (!IS_ERR(file))
++			d_drop(file->f_dentry);
++	}
 +map_file:
 +	if (!IS_ERR(file)) {
 +		fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
@@ -51009,7 +54159,8 @@
 +				goto err_put;
 +		}
 +	} else {
-+		if (fi.cpt_lflags & CPT_DENTRY_PROC) {
++		if ((fi.cpt_lflags & CPT_DENTRY_PROC) &&
++		    !(fi.cpt_lflags & CPT_DENTRY_PROCPID_DEAD)) {
 +			dprintk_ctx("rst_file /proc delayed\n");
 +			file = NULL;
 +		} else if (name)
@@ -51073,7 +54224,8 @@
 +extern int expand_fdtable(struct files_struct *files, int nr);
 +
 +
-+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++static int rst_files(struct cpt_task_image *ti, struct cpt_context *ctx,
++		int from, int to)
 +{
 +	struct cpt_files_struct_image fi;
 +	struct files_struct *f = current->files;
@@ -51088,6 +54240,14 @@
 +		return 0;
 +	}
 +
++	if (from == 3) {
++		err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
++		if (err)
++			return err;
++
++		goto just_do_it;
++	}
++
 +	obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
 +	if (obj) {
 +		if (obj->o_obj != f) {
@@ -51113,6 +54273,7 @@
 +			return err;
 +	}
 +
++just_do_it:
 +	pos = ti->cpt_files + fi.cpt_hdrlen;
 +	endpos = ti->cpt_files + fi.cpt_next;
 +	while (pos < endpos) {
@@ -51122,6 +54283,9 @@
 +		err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
 +		if (err)
 +			return err;
++		if (fdi.cpt_fd < from || fdi.cpt_fd > to)
++			goto skip;
++
 +		filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
 +		if (IS_ERR(filp)) {
 +			eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp),
@@ -51139,6 +54303,8 @@
 +			if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
 +				FD_SET(fdi.cpt_fd, f->fdt->close_on_exec);
 +		}
++
++skip:
 +		pos += fdi.cpt_next;
 +	}
 +	f->next_fd = fi.cpt_next_fd;
@@ -51151,6 +54317,16 @@
 +	return 0;
 +}
 +
++int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	return rst_files(ti, ctx, (ti->cpt_pid == 1) ? 3 : 0, INT_MAX);
++}
++
++int rst_files_std(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++	return rst_files(ti, ctx, 0, 2);
++}
++
 +int rst_do_filejobs(cpt_context_t *ctx)
 +{
 +	struct filejob *j;
@@ -51260,8 +54436,31 @@
 +		return err;
 +
 +	file = rst_file(*pos, -2, ctx);
-+	if (IS_ERR(file))
++	if (IS_ERR(file)) {
++		if (PTR_ERR(file) == -EINVAL && S_ISLNK(fi.cpt_i_mode)) {
++			/* One special case: inotify on symlink */
++			struct nameidata nd;
++			__u8 *name = NULL;
++
++			if (fi.cpt_next > fi.cpt_hdrlen)
++				name = rst_get_name(*pos + sizeof(fi), ctx);
++			if (!name) {
++				eprintk_ctx("can't get name for file\n");
++				return -EINVAL;
++			}
++			if ((err = path_lookup(name, 0, &nd)) != 0) {
++				eprintk_ctx("path_lookup %s: %d\n", name, err);
++				rst_put_name(name, ctx);
++				return -EINVAL;
++			}
++			*dp = nd.path.dentry;
++			*mp = nd.path.mnt;
++			*pos += fi.cpt_next;
++			rst_put_name(name, ctx);
++			return 0;
++		}
 +		return PTR_ERR(file);
++	}
 +
 +	*dp = dget(file->f_dentry);
 +	*mp = mntget(file->f_vfsmnt);
@@ -53041,10 +56240,10 @@
 +}
 diff --git a/kernel/cpt/rst_net.c b/kernel/cpt/rst_net.c
 new file mode 100644
-index 0000000..dc5de80
+index 0000000..4c8d482
 --- /dev/null
 +++ b/kernel/cpt/rst_net.c
-@@ -0,0 +1,628 @@
+@@ -0,0 +1,745 @@
 +/*
 + *
 + *  kernel/cpt/rst_net.c
@@ -53638,6 +56837,7 @@
 +		err = (status & 0xff00) >> 8;
 +		if (err != 0) {
 +			eprintk_ctx("iptables-restore exited with %d\n", err);
++			eprintk_ctx("Most probably some iptables modules are not loaded\n");
 +			err = -EINVAL;
 +		}
 +	} else {
@@ -53658,6 +56858,120 @@
 +	return err;
 +}
 +
++static int rst_restore_snmp_stat(struct cpt_context *ctx, void *mib[], int n,
++		loff_t *ppos, loff_t endpos)
++{
++	int err, in, i;
++	struct cpt_object_hdr o;
++	__u32 *stats;
++
++	err = rst_get_object(CPT_OBJ_BITS, *ppos, &o, ctx);
++	if (err)
++		return err;
++
++	in = o.cpt_next - o.cpt_hdrlen;
++	if (in >= PAGE_SIZE - 4) {
++		eprintk_ctx("Too long SNMP buf (%d)\n", in);
++		return -EINVAL;
++	}
++
++	if (o.cpt_content != CPT_CONTENT_DATA) {
++		if (o.cpt_content == CPT_CONTENT_VOID)
++			return 1;
++
++		eprintk_ctx("Corrupted SNMP stats\n");
++		return -EINVAL;
++	}
++
++	stats = cpt_get_buf(ctx);
++	err = ctx->pread(stats, in, ctx, (*ppos) + o.cpt_hdrlen);
++	if (err)
++		goto out;
++
++	in /= sizeof(*stats);
++	if (in > n)
++		wprintk_ctx("SNMP stats trimmed\n");
++	else
++		n = in;
++
++	for (i = 0; i < n; i++)
++		*((unsigned long *)(per_cpu_ptr(mib[0], 0)) + i) = stats[i];
++
++	*ppos += o.cpt_next;
++	if (*ppos < endpos)
++		err = 1; /* go on restoring */
++out:
++	cpt_release_buf(ctx);
++	return err;
++}
++
++static int rst_restore_snmp(struct cpt_context *ctx)
++{
++	int err;
++	loff_t sec = ctx->sections[CPT_SECT_SNMP_STATS];
++	loff_t endsec;
++	struct cpt_section_hdr h;
++	struct ve_struct *ve;
++	struct net *net;
++
++	if (sec == CPT_NULL)
++		return 0;
++
++	err = ctx->pread(&h, sizeof(h), ctx, sec);
++	if (err)
++		return err;
++	if (h.cpt_section != CPT_SECT_SNMP_STATS || h.cpt_hdrlen < sizeof(h))
++		return -EINVAL;
++
++	ve = get_exec_env();
++	net = ve->ve_netns;
++	endsec = sec + h.cpt_next;
++	sec += h.cpt_hdrlen;
++	if (sec >= endsec)
++		goto out;
++
++	err = rst_restore_snmp_stat(ctx, (void **)&net->mib.net_statistics,
++			LINUX_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++	err = rst_restore_snmp_stat(ctx, (void **)&net->mib.ip_statistics,
++			IPSTATS_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++	err = rst_restore_snmp_stat(ctx, (void **)&net->mib.tcp_statistics,
++			TCP_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++	err = rst_restore_snmp_stat(ctx, (void **)&net->mib.udp_statistics,
++			UDP_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++	err = rst_restore_snmp_stat(ctx, (void **)&net->mib.icmp_statistics,
++			ICMP_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++	err = rst_restore_snmp_stat(ctx, (void **)&net->mib.icmpmsg_statistics,
++			ICMPMSG_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	err = rst_restore_snmp_stat(ctx, (void **)&ve->_ipv6_statistics,
++			IPSTATS_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++	err = rst_restore_snmp_stat(ctx, (void **)&ve->_udp_stats_in6,
++			UDP_MIB_MAX, &sec, endsec);
++	if (err <= 0)
++		goto out;
++	err = rst_restore_snmp_stat(ctx, (void **)&ve->_icmpv6_statistics,
++			ICMP6_MIB_MAX, &sec, endsec);
++#endif
++	if (err == 1)
++		err = 0;
++out:
++	return err;
++}
++
 +int rst_restore_net(struct cpt_context *ctx)
 +{
 +	int err;
@@ -53671,14 +56985,16 @@
 +		err = rst_restore_iptables(ctx);
 +	if (!err)
 +		err = rst_restore_ip_conntrack(ctx);
++	if (!err)
++		err = rst_restore_snmp(ctx);
 +	return err;
 +}
 diff --git a/kernel/cpt/rst_proc.c b/kernel/cpt/rst_proc.c
 new file mode 100644
-index 0000000..2b0b283
+index 0000000..beaaa3f
 --- /dev/null
 +++ b/kernel/cpt/rst_proc.c
-@@ -0,0 +1,579 @@
+@@ -0,0 +1,582 @@
 +/*
 + *
 + *  kernel/cpt/rst_proc.c
@@ -53887,7 +57203,7 @@
 +	unlock_kernel();
 +
 +	if (cmd == CPT_TEST_CAPS) {
-+		err = test_cpu_caps();
++		err = test_cpu_caps_and_features();
 +		goto out_lock;
 +	}
 +
@@ -54087,6 +57403,9 @@
 +			fput(ctx->errorfile);
 +		ctx->errorfile = dfile;
 +		break;
++	case CPT_HARDLNK_ON:
++		ctx->hardlinked_on = 1;
++		break;
 +	case CPT_SET_VEID:
 +		if (ctx->ctx_state > 0) {
 +			err = -EBUSY;
@@ -54260,10 +57579,10 @@
 +module_exit(exit_rst);
 diff --git a/kernel/cpt/rst_process.c b/kernel/cpt/rst_process.c
 new file mode 100644
-index 0000000..19915b3
+index 0000000..000e0b9
 --- /dev/null
 +++ b/kernel/cpt/rst_process.c
-@@ -0,0 +1,1614 @@
+@@ -0,0 +1,1661 @@
 +/*
 + *
 + *  kernel/cpt/rst_process.c
@@ -54687,8 +58006,13 @@
 +		}
 +	}
 +
-+	if (si->cpt_curr_target)
++	if (si->cpt_curr_target) {
 +		current->signal->curr_target = find_task_by_vpid(si->cpt_curr_target);
++		if (current->signal->curr_target == NULL) {
++			wprintk_ctx("oops, curr_target=NULL, pid=%u\n", si->cpt_curr_target);
++			current->signal->curr_target = current;
++		}
++	}
 +	current->signal->flags = 0;
 +	*exiting = si->cpt_group_exit;
 +	current->signal->group_exit_code = si->cpt_group_exit_code;
@@ -55449,7 +58773,7 @@
 +#ifdef CONFIG_X86_32
 +	unsigned int flags;
 +
-+	flags = test_cpu_caps();
++	flags = test_cpu_caps_and_features();
 +
 +	/* if cpu does not support sse2 mask 6 bit (DAZ flag) and 16-31 bits
 +	   in MXCSR to avoid general protection fault */
@@ -55462,6 +58786,32 @@
 +#include <asm/i387.h>
 +#endif
 +
++#define RLIM_INFINITY32		0xffffffff
++#define RLIM_INFINITY64		(~0ULL)
++
++#ifdef CONFIG_X86_64
++#define rst_rlim_32_to_64(a, i, t, im)					\
++do {									\
++	if (im->cpt_rlim_##a[i] == RLIM_INFINITY32)			\
++		t->signal->rlim[i].rlim_##a = RLIM_INFINITY64;		\
++	else								\
++		t->signal->rlim[i].rlim_##a = im->cpt_rlim_##a[i];	\
++} while (0)
++#elif defined(CONFIG_X86_32)
++#define rst_rlim_64_to_32(a, i, t, im)					\
++do {									\
++	if (im->cpt_rlim_##a[i] == RLIM_INFINITY64)			\
++		t->signal->rlim[i].rlim_##a = RLIM_INFINITY32;		\
++	else if (im->cpt_rlim_##a[i] > RLIM_INFINITY32) {		\
++		eprintk_ctx("rlimit %Lu is too high for 32-bit task, "	\
++			    "dump file is corrupted\n",			\
++			    im->cpt_rlim_##a[i]);			\
++		return -EINVAL;						\
++	} else								\
++		t->signal->rlim[i].rlim_##a = im->cpt_rlim_##a[i];	\
++} while (0)
++#endif
++
 +int rst_restore_process(struct cpt_context *ctx)
 +{
 +	cpt_object_t *obj;
@@ -55574,8 +58924,23 @@
 +			tsk->signal->cmaj_flt = ti->cpt_cmaj_flt;
 +
 +			for (i=0; i<RLIM_NLIMITS; i++) {
-+				tsk->signal->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
-+				tsk->signal->rlim[i].rlim_max = ti->cpt_rlim_max[i];
++#ifdef CONFIG_X86_64
++				if (ctx->image_arch == CPT_OS_ARCH_I386) {
++					rst_rlim_32_to_64(cur, i, tsk, ti);
++					rst_rlim_32_to_64(max, i, tsk, ti);
++				} else 
++#elif defined(CONFIG_X86_32)
++				if (ctx->image_arch == CPT_OS_ARCH_EMT64) {
++					rst_rlim_64_to_32(cur, i, tsk, ti);
++					rst_rlim_64_to_32(max, i, tsk, ti);
++				} else 
++#endif
++				{
++					tsk->signal->rlim[i].rlim_cur =
++						ti->cpt_rlim_cur[i];
++					tsk->signal->rlim[i].rlim_max =
++						ti->cpt_rlim_max[i];
++				}
 +			}
 +		}
 +#endif
@@ -55809,7 +59174,8 @@
 +		}
 +
 +		tsk->ptrace = ti->cpt_ptrace;
-+		tsk->flags = ti->cpt_flags & ~PF_FROZEN;
++		tsk->flags = (tsk->flags & PF_USED_MATH) |
++			(ti->cpt_flags & CPT_TASK_FLAGS_MASK);
 +		clear_tsk_thread_flag(tsk, TIF_FREEZE);
 +		tsk->exit_signal = ti->cpt_exit_signal;
 +
@@ -55880,10 +59246,10 @@
 +}
 diff --git a/kernel/cpt/rst_socket.c b/kernel/cpt/rst_socket.c
 new file mode 100644
-index 0000000..22e1d1b
+index 0000000..78cc4ff
 --- /dev/null
 +++ b/kernel/cpt/rst_socket.c
-@@ -0,0 +1,918 @@
+@@ -0,0 +1,993 @@
 +/*
 + *
 + *  kernel/cpt/rst_socket.c
@@ -56121,7 +59487,7 @@
 +		struct sk_buff *skb;
 +		__u32 type;
 +
-+		skb = rst_skb(&pos, NULL, &type, ctx);
++		skb = rst_skb(sk, &pos, NULL, &type, ctx);
 +		if (IS_ERR(skb)) {
 +			if (PTR_ERR(skb) == -EINVAL) {
 +				int err;
@@ -56374,8 +59740,10 @@
 +
 +	setup_sock_common(sock->sk, si, pos, ctx);
 +
-+	if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6)
++	if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6) {
++		rst_listen_socket_in(sock->sk, si, pos, ctx);
 +		rst_restore_synwait_queue(sock->sk, si, pos, ctx);
++	}
 +
 +	return 0;
 +
@@ -56456,7 +59824,53 @@
 +	return err;
 +}
 +
-+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx)
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++static void rst_tcp_cb_ipv4_to_ipv6(struct cpt_skb_image *v, struct sk_buff *skb)
++{
++	BUG_ON(sizeof(skb->cb) - sizeof(struct inet6_skb_parm) <
++	       sizeof(struct tcp_skb_cb) - sizeof(struct inet6_skb_parm));
++	memcpy(skb->cb, v->cpt_cb, sizeof(struct inet_skb_parm));
++	memcpy(skb->cb + sizeof(struct inet6_skb_parm),
++	       (void *)v->cpt_cb + sizeof(struct inet_skb_parm),
++	       sizeof(struct tcp_skb_cb) - sizeof(struct inet6_skb_parm));
++}
++#else
++static void rst_tcp_cb_ipv6_to_ipv4(struct cpt_skb_image *v, struct sk_buff *skb)
++{
++	BUG_ON(sizeof(v->cpt_cb) - sizeof(struct inet6_skb_parm) <
++	       sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
++	memcpy(skb->cb, v->cpt_cb, sizeof(struct inet_skb_parm));
++	memcpy(skb->cb + sizeof(struct inet_skb_parm),
++	       (void *)v->cpt_cb + sizeof(struct inet6_skb_parm),
++	       sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
++}
++#endif
++
++struct tcp_skb_cb_ipv6 {
++	union {
++		struct inet_skb_parm	h4;
++		struct inet6_skb_parm	h6;
++	} header;
++	__u32		seq;
++	__u32		end_seq;
++	__u32		when;
++	__u8		flags;
++	__u8		sacked;
++	__u16		urg_ptr;
++	__u32		ack_seq;
++};
++
++#define check_tcp_cb_conv(op1, op2) do {			\
++	if (!ctx->tcp_cb_convert)				\
++		ctx->tcp_cb_convert = CPT_TCP_CB_##op1;		\
++	else if (ctx->tcp_cb_convert == CPT_TCP_CB_##op2) {	\
++		kfree_skb(skb);					\
++		return ERR_PTR(-EINVAL);			\
++	}							\
++} while (0)
++
++struct sk_buff * rst_skb(struct sock *sk, loff_t *pos_p, __u32 *owner,
++			 __u32 *queue, struct cpt_context *ctx)
 +{
 +	int err;
 +	struct sk_buff *skb;
@@ -56490,7 +59904,34 @@
 +	skb->mac_header = skb->head + v.cpt_mac;
 +#endif
 +	BUILD_BUG_ON(sizeof(skb->cb) < sizeof(v.cpt_cb));
-+	memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
++	if (sk->sk_protocol == IPPROTO_TCP) {
++		/* 
++		 * According to Alexey all packets in queue have non-zero
++		 * flags, as at least TCPCB_FLAG_ACK is set on them.
++		 * Luckily for us, offset of field flags in tcp_skb_cb struct
++		 * with IPv6 is higher then total size of tcp_skb_cb struct
++		 * without IPv6.
++		 */
++		if (ctx->image_version >= CPT_VERSION_18_2 ||
++				((struct tcp_skb_cb_ipv6 *)&v.cpt_cb)->flags) {
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++			check_tcp_cb_conv(NOT_CONV, CONV);
++			memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
++#else
++			check_tcp_cb_conv(CONV, NOT_CONV);
++			rst_tcp_cb_ipv6_to_ipv4(&v, skb);
++#endif
++		} else {
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++			check_tcp_cb_conv(CONV, NOT_CONV);
++			rst_tcp_cb_ipv4_to_ipv6(&v, skb);
++#else
++			check_tcp_cb_conv(NOT_CONV, CONV);
++			memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
++#endif
++		}
++	} else
++		memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
 +	skb->mac_len = v.cpt_mac_len;
 +
 +	skb->csum = v.cpt_csum;
@@ -56568,7 +60009,7 @@
 +		struct sock *owner_sk;
 +		__u32 owner;
 +
-+		skb = rst_skb(&pos, &owner, NULL, ctx);
++		skb = rst_skb(sk, &pos, &owner, NULL, ctx);
 +		if (IS_ERR(skb)) {
 +			if (PTR_ERR(skb) == -EINVAL) {
 +				int err;
@@ -56804,10 +60245,10 @@
 +
 diff --git a/kernel/cpt/rst_socket_in.c b/kernel/cpt/rst_socket_in.c
 new file mode 100644
-index 0000000..f63df90
+index 0000000..08bf907
 --- /dev/null
 +++ b/kernel/cpt/rst_socket_in.c
-@@ -0,0 +1,492 @@
+@@ -0,0 +1,578 @@
 +/*
 + *
 + *  kernel/cpt/rst_socket_in.c
@@ -56869,7 +60310,7 @@
 +		struct sk_buff *skb;
 +		__u32 type;
 +
-+		skb = rst_skb(&pos, NULL, &type, ctx);
++		skb = rst_skb(sk, &pos, NULL, &type, ctx);
 +		if (IS_ERR(skb)) {
 +			if (PTR_ERR(skb) == -EINVAL) {
 +				int err;
@@ -57104,6 +60545,62 @@
 +	return 0;
 +}
 +
++static void rst_listen_socket_tcp(struct cpt_sock_image *si, struct sock *sk)
++{
++	struct tcp_sock *tp = tcp_sk(sk);
++
++	tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
++	tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
++	tp->tcp_header_len = si->cpt_tcp_header_len;
++	inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
++
++	/* Next options are inherited by children */
++	tp->mss_cache = si->cpt_mss_cache;
++	inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
++	tp->reordering = si->cpt_reordering;
++	tp->nonagle = si->cpt_nonagle;
++	tp->keepalive_probes = si->cpt_keepalive_probes;
++	tp->rx_opt.user_mss = si->cpt_user_mss;
++	inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
++	tp->keepalive_time = si->cpt_keepalive_time;
++	tp->keepalive_intvl = si->cpt_keepalive_intvl;
++	tp->linger2 = si->cpt_linger2;
++}
++
++int rst_listen_socket_in( struct sock *sk, struct cpt_sock_image *si,
++			  loff_t pos, struct cpt_context *ctx)
++{
++	struct inet_sock *inet = inet_sk(sk);
++
++	lock_sock(sk);
++
++	inet->uc_ttl = si->cpt_uc_ttl;
++	inet->tos = si->cpt_tos;
++	inet->cmsg_flags = si->cpt_cmsg_flags;
++	inet->pmtudisc = si->cpt_pmtudisc;
++	inet->recverr = si->cpt_recverr;
++	inet->freebind = si->cpt_freebind;
++	inet->id = si->cpt_idcounter;
++
++	if (sk->sk_family == AF_INET6) {
++		struct ipv6_pinfo *np = inet6_sk(sk);
++
++		np->frag_size = si->cpt_frag_size6;
++		np->hop_limit = si->cpt_hop_limit6;
++
++		np->rxopt.all = si->cpt_rxopt6;
++		np->mc_loop = si->cpt_mc_loop6;
++		np->recverr = si->cpt_recverr6;
++		np->pmtudisc = si->cpt_pmtudisc6;
++		np->ipv6only = si->cpt_ipv6only6;
++	}
++
++	if (sk->sk_protocol == IPPROTO_TCP)
++		rst_listen_socket_tcp(si, sk);
++
++	release_sock(sk);
++	return 0;
++}
 +
 +int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
 +		  struct cpt_context *ctx)
@@ -57215,26 +60712,49 @@
 +			      loff_t pos, struct cpt_context *ctx)
 +{
 +	int err;
-+	loff_t end = si->cpt_next;
++	loff_t end = pos + si->cpt_next;
 +
 +	pos += si->cpt_hdrlen;
++
++	lock_sock(sk);
 +	while (pos < end) {
 +		struct cpt_openreq_image oi;
 +
 +		err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
 +		if (err) {
 +			err = rst_sock_attr(&pos, sk, ctx);
-+			if (err)
++			if (err) {
++				release_sock(sk);
 +				return err;
++			}
++
 +			continue;
 +		}
 +
 +		if (oi.cpt_object == CPT_OBJ_OPENREQ) {
-+			struct request_sock *req = reqsk_alloc(&tcp_request_sock_ops);
-+			if (req == NULL)
++			struct request_sock *req;
++
++			if (oi.cpt_family == AF_INET6 &&
++			    sk->sk_family != AF_INET6)
++				/* related to non initialized cpt_family bug */
++				goto next;
++
++			if (oi.cpt_family == AF_INET6) {
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++				req = reqsk_alloc(&tcp6_request_sock_ops);
++#else
++				release_sock(sk);
++				return -EINVAL;
++#endif
++			} else {
++				req = reqsk_alloc(&tcp_request_sock_ops);
++			}
++
++			if (req == NULL) {
++				release_sock(sk);
 +				return -ENOMEM;
++			}
 +
-+			memset(req, 0, sizeof(*req));
 +			tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
 +			tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
 +			inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
@@ -57247,26 +60767,33 @@
 +			inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
 +			inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
 +			inet_rsk(req)->acked = oi.cpt_acked;
++			inet_rsk(req)->opt = NULL;
 +			req->window_clamp = oi.cpt_window_clamp;
 +			req->rcv_wnd = oi.cpt_rcv_wnd;
 +			req->ts_recent = oi.cpt_ts_recent;
 +			req->expires = jiffies_import(oi.cpt_expires);
++			req->sk = NULL;
++			req->secid = 0;
++			req->peer_secid = 0;
 +
-+			if (oi.cpt_family == AF_INET) {
-+				memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
-+				memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
-+				inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-+			} else {
++			if (oi.cpt_family == AF_INET6) {
 +#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++				inet6_rsk(req)->pktopts = NULL;
 +				memcpy(&inet6_rsk(req)->loc_addr, oi.cpt_loc_addr, 16);
 +				memcpy(&inet6_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 16);
 +				inet6_rsk(req)->iif = oi.cpt_iif;
 +				inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 +#endif
++			} else {
++				memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
++				memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
++				inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
 +			}
 +		}
++next:
 +		pos += oi.cpt_next;
 +	}
++	release_sock(sk);
 +	return 0;
 +}
 +
@@ -57302,10 +60829,10 @@
 +#endif
 diff --git a/kernel/cpt/rst_sysvipc.c b/kernel/cpt/rst_sysvipc.c
 new file mode 100644
-index 0000000..0f21493
+index 0000000..b5e62a7
 --- /dev/null
 +++ b/kernel/cpt/rst_sysvipc.c
-@@ -0,0 +1,634 @@
+@@ -0,0 +1,639 @@
 +/*
 + *
 + *  kernel/cpt/rst_sysvipc.c
@@ -57468,8 +60995,11 @@
 +				 u.shmi.cpt_segsz, u.shmi.cpt_mode);
 +	if (!IS_ERR(file)) {
 +		err = fixup_shm(file, &u.shmi);
-+		if (err != -EEXIST && dpos < epos)
++		if (err != -EEXIST && dpos < epos) {
 +			err = fixup_shm_data(file, dpos, epos, ctx);
++			if (err)
++				goto err_put;
++		}
 +	} else if (IS_ERR(file) && PTR_ERR(file) == -EEXIST) {
 +		struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
 +		struct shmid_kernel *shp;
@@ -57482,6 +61012,8 @@
 +	}
 +	return file;
 +
++err_put:
++	fput(file);
 +err_out:
 +	return ERR_PTR(err);
 +}
@@ -58332,10 +61864,10 @@
 +}
 diff --git a/kernel/cpt/rst_ubc.c b/kernel/cpt/rst_ubc.c
 new file mode 100644
-index 0000000..e7f717e
+index 0000000..db1f982
 --- /dev/null
 +++ b/kernel/cpt/rst_ubc.c
-@@ -0,0 +1,133 @@
+@@ -0,0 +1,144 @@
 +/*
 + *
 + *  kernel/cpt/rst_ubc.c
@@ -58396,7 +61928,7 @@
 +{
 +	struct user_beancounter *bc;
 +	cpt_object_t *pobj;
-+	int i;
++	int resources, i;
 +
 +	if (v->cpt_parent != CPT_NULL) {
 +		pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
@@ -58417,7 +61949,15 @@
 +			CPT_VERSION_MINOR(ctx->image_version) < 1)
 +		goto out;
 +
-+	for (i = 0; i < UB_RESOURCES; i++) {
++	if (v->cpt_content == CPT_CONTENT_ARRAY)
++		resources = v->cpt_ub_resources;
++	else
++		resources = UB_RESOURCES_COMPAT;
++
++	if (resources > UB_RESOURCES)
++		return -EINVAL;
++
++	for (i = 0; i < resources; i++) {
 +		restore_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
 +		restore_one_bc_parm(v->cpt_parms + i * 2 + 1,
 +				bc->ub_store + i, 1);
@@ -58454,9 +61994,12 @@
 +		cpt_obj_setpos(obj, start, ctx);
 +		intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
 +
-+		restore_one_bc(v, obj, ctx);
++		err = restore_one_bc(v, obj, ctx);
 +
 +		cpt_release_buf(ctx);
++		if (err)
++			return err;
++
 +		start += v->cpt_next;
 +	}
 +	return 0;
@@ -58471,10 +62014,10 @@
 +}
 diff --git a/kernel/cpt/rst_undump.c b/kernel/cpt/rst_undump.c
 new file mode 100644
-index 0000000..aadddcb
+index 0000000..68cc6c2
 --- /dev/null
 +++ b/kernel/cpt/rst_undump.c
-@@ -0,0 +1,1069 @@
+@@ -0,0 +1,1077 @@
 +/*
 + *
 + *  kernel/cpt/rst_undump.c
@@ -58589,6 +62132,8 @@
 +	// // ve->start_cycles -= (s64)i->start_jiffies_delta * cycles_per_jiffy;
 +
 +	ctx->last_vpid = i->last_pid;
++	if (i->rnd_va_space)
++		ve->_randomize_va_space = i->rnd_va_space - 1;
 +
 +	err = 0;
 +out_rel:
@@ -58626,7 +62171,7 @@
 +	param.known_features = (ctx->image_version < CPT_VERSION_18) ?
 +		VE_FEATURES_OLD : ~(__u64)0;
 +
-+	err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK, 2,
++	err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK|VE_EXCLUSIVE, 2,
 +			&param, sizeof(param));
 +	if (err < 0)
 +		eprintk_ctx("real_env_create: %d\n", err);
@@ -58769,6 +62314,12 @@
 +			goto out;
 +		}
 +
++		err = rst_files_std(ti, ctx);
++		if (err) {
++			eprintk_ctx("rst_root_stds: %d\n", err);
++			goto out;
++		}
++
 +		err = rst_root_namespace(ctx);
 +		if (err) {
 +			eprintk_ctx("rst_namespace: %d\n", err);
@@ -59558,7 +63109,7 @@
  		    (!cputime_eq(p->utime, cputime_zero) ||
  		     !cputime_eq(p->stime, cputime_zero)))
 diff --git a/kernel/exit.c b/kernel/exit.c
-index f7864ac..7773280 100644
+index f7864ac..38b3e22 100644
 --- a/kernel/exit.c
 +++ b/kernel/exit.c
 @@ -22,6 +22,9 @@
@@ -59621,7 +63172,16 @@
  	call_rcu(&p->rcu, delayed_put_task_struct);
  
  	p = leader;
-@@ -526,6 +540,7 @@ void put_files_struct(struct files_struct *files)
+@@ -422,6 +436,8 @@ void daemonize(const char *name, ...)
+ 	va_list args;
+ 	sigset_t blocked;
+ 
++	(void)virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
++
+ 	va_start(args, name);
+ 	vsnprintf(current->comm, sizeof(current->comm), name, args);
+ 	va_end(args);
+@@ -526,6 +542,7 @@ void put_files_struct(struct files_struct *files)
  		free_fdtable(fdt);
  	}
  }
@@ -59629,7 +63189,7 @@
  
  void reset_files_struct(struct files_struct *files)
  {
-@@ -598,10 +613,10 @@ retry:
+@@ -598,10 +615,10 @@ retry:
  	 * Search through everything else. We should not get
  	 * here often
  	 */
@@ -59642,7 +63202,7 @@
  
  	read_unlock(&tasklist_lock);
  	/*
-@@ -640,7 +655,7 @@ assign_new_owner:
+@@ -640,7 +657,7 @@ assign_new_owner:
   * Turn us into a lazy TLB process if we
   * aren't already..
   */
@@ -59651,7 +63211,7 @@
  {
  	struct mm_struct *mm = tsk->mm;
  	struct core_state *core_state;
-@@ -648,6 +663,10 @@ static void exit_mm(struct task_struct * tsk)
+@@ -648,6 +665,10 @@ static void exit_mm(struct task_struct * tsk)
  	mm_release(tsk, mm);
  	if (!mm)
  		return;
@@ -59662,7 +63222,7 @@
  	/*
  	 * Serialize with any possible pending coredump.
  	 * We must hold mmap_sem around checking core_state
-@@ -692,6 +711,7 @@ static void exit_mm(struct task_struct * tsk)
+@@ -692,6 +713,7 @@ static void exit_mm(struct task_struct * tsk)
  	mm_update_next_owner(mm);
  	mmput(mm);
  }
@@ -59670,7 +63230,7 @@
  
  /*
   * When we die, we re-parent all our children.
-@@ -706,7 +726,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
+@@ -706,7 +728,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
  	struct task_struct *thread;
  
  	thread = father;
@@ -59679,7 +63239,7 @@
  		if (thread->flags & PF_EXITING)
  			continue;
  		if (unlikely(pid_ns->child_reaper == father))
-@@ -839,11 +859,16 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
+@@ -839,11 +861,16 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
  	     tsk->self_exec_id != tsk->parent_exec_id))
  		tsk->exit_signal = SIGCHLD;
  
@@ -59696,7 +63256,7 @@
  
  	/* mt-exec, de_thread() is waiting for us */
  	if (thread_group_leader(tsk) &&
-@@ -900,6 +925,7 @@ NORET_TYPE void do_exit(long code)
+@@ -900,6 +927,7 @@ NORET_TYPE void do_exit(long code)
  		panic("Attempted to kill the idle task!");
  
  	tracehook_report_exit(&code);
@@ -59704,7 +63264,7 @@
  
  	validate_creds_for_do_exit(tsk);
  
-@@ -983,7 +1009,15 @@ NORET_TYPE void do_exit(long code)
+@@ -983,7 +1011,15 @@ NORET_TYPE void do_exit(long code)
  	 */
  	perf_event_exit_task(tsk);
  
@@ -59721,7 +63281,7 @@
  #ifdef CONFIG_NUMA
  	mpol_put(tsk->mempolicy);
  	tsk->mempolicy = NULL;
-@@ -1626,7 +1660,7 @@ repeat:
+@@ -1626,7 +1662,7 @@ repeat:
  
  		if (wo->wo_flags & __WNOTHREAD)
  			break;
@@ -59730,7 +63290,7 @@
  	read_unlock(&tasklist_lock);
  
  notask:
-@@ -1753,6 +1787,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
+@@ -1753,6 +1789,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
  	asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
  	return ret;
  }
@@ -59740,10 +63300,10 @@
  
 diff --git a/kernel/fairsched.c b/kernel/fairsched.c
 new file mode 100644
-index 0000000..bfa5c33
+index 0000000..7cbd309
 --- /dev/null
 +++ b/kernel/fairsched.c
-@@ -0,0 +1,633 @@
+@@ -0,0 +1,683 @@
 +/*
 + * Fair Scheduler
 + *
@@ -59861,7 +63421,7 @@
 +{
 +	int retval;
 +
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		return -EPERM;
 +
 +	mutex_lock(&fairsched_mutex);
@@ -59902,7 +63462,7 @@
 +{
 +	int retval;
 +
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		return -EPERM;
 +
 +	mutex_lock(&fairsched_mutex);
@@ -59936,7 +63496,7 @@
 +{
 +	int retval;
 +
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		return -EPERM;
 +
 +	mutex_lock(&fairsched_mutex);
@@ -59964,7 +63524,7 @@
 +{
 +	int retval;
 +
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		return -EPERM;
 +
 +	mutex_lock(&fairsched_mutex);
@@ -60015,7 +63575,7 @@
 +{
 +	int retval;
 +
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		return -EPERM;
 +
 +	mutex_lock(&fairsched_mutex);
@@ -60063,7 +63623,7 @@
 +{
 +	int retval;
 +
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		return -EPERM;
 +
 +	mutex_lock(&fairsched_mutex);
@@ -60074,6 +63634,56 @@
 +}
 +EXPORT_SYMBOL(sys_fairsched_mvpr);
 +
++int fairsched_new_node(int id, unsigned int vcpus)
++{
++	int err;
++
++	mutex_lock(&fairsched_mutex);
++	/*
++	 * We refuse to switch to an already existing node since nodes
++	 * keep a pointer to their ve_struct...
++	 */
++	err = do_fairsched_mknod(0, 1, id);
++	if (err < 0) {
++		printk(KERN_WARNING "Can't create fairsched node %d\n", id);
++		goto out;
++	}
++#if 0
++	err = do_fairsched_vcpus(id, vcpus);
++	if (err) {
++		printk(KERN_WARNING "Can't set sched vcpus on node %d\n", id);
++		goto cleanup;
++	}
++#endif
++	err = do_fairsched_mvpr(current->pid, id);
++	if (err) {
++		printk(KERN_WARNING "Can't switch to fairsched node %d\n", id);
++		goto cleanup;
++	}
++	mutex_unlock(&fairsched_mutex);
++	return 0;
++
++cleanup:
++	if (do_fairsched_rmnod(id))
++		printk(KERN_ERR "Can't clean fairsched node %d\n", id);
++out:
++	mutex_unlock(&fairsched_mutex);
++	return err;
++}
++EXPORT_SYMBOL(fairsched_new_node);
++
++void fairsched_drop_node(int id)
++{
++	mutex_lock(&fairsched_mutex);
++	if (task_fairsched_node_id(current) == id)
++		if (do_fairsched_mvpr(current->pid, FAIRSCHED_INIT_NODE_ID))
++			printk(KERN_WARNING "Can't leave sched node %d\n", id);
++	if (do_fairsched_rmnod(id))
++		printk(KERN_ERR "Can't remove fairsched node %d\n", id);
++	mutex_unlock(&fairsched_mutex);
++}
++EXPORT_SYMBOL(fairsched_drop_node);
++
 +#ifdef CONFIG_PROC_FS
 +
 +/*********************************************************************/
@@ -61016,7 +64626,7 @@
  /**
   * kthread_stop - stop a thread created by kthread_create().
 diff --git a/kernel/lockdep.c b/kernel/lockdep.c
-index 9af5672..99c3c9b 100644
+index f672d51..bc200db 100644
 --- a/kernel/lockdep.c
 +++ b/kernel/lockdep.c
 @@ -3742,7 +3742,7 @@ retry:
@@ -61038,7 +64648,7 @@
  	printk("\n");
  	printk("=============================================\n\n");
 diff --git a/kernel/module.c b/kernel/module.c
-index dfa33e8..48a2edc 100644
+index a4aae35..6d7a625 100644
 --- a/kernel/module.c
 +++ b/kernel/module.c
 @@ -2915,6 +2915,8 @@ static char *module_flags(struct module *mod, char *buf)
@@ -61656,7 +65266,7 @@
  	rcu_read_unlock();
  	/* If we failed to send the signal the timer stops. */
 diff --git a/kernel/power/process.c b/kernel/power/process.c
-index cc2e553..3122fcb 100644
+index e7cd671..732f532 100644
 --- a/kernel/power/process.c
 +++ b/kernel/power/process.c
 @@ -15,6 +15,8 @@
@@ -61739,7 +65349,7 @@
  			continue;
  
 @@ -142,8 +148,10 @@ static void thaw_tasks(bool nosig_only)
- 		if (cgroup_frozen(p))
+ 		if (cgroup_freezing_or_frozen(p))
  			continue;
  
 -		thaw_process(p);
@@ -61752,7 +65362,7 @@
  }
  
 diff --git a/kernel/printk.c b/kernel/printk.c
-index f38b07f..517bd6a 100644
+index f38b07f..1041e53 100644
 --- a/kernel/printk.c
 +++ b/kernel/printk.c
 @@ -31,7 +31,9 @@
@@ -61973,20 +65583,21 @@
  
  	boot_delay_msec();
  	printk_delay();
-@@ -705,6 +754,12 @@ asmlinkage int vprintk(const char *fmt, va_list args)
+@@ -705,6 +754,13 @@ asmlinkage int vprintk(const char *fmt, va_list args)
  	spin_lock(&logbuf_lock);
  	printk_cpu = this_cpu;
  
 +	err = ve_log_init();
 +	if (err) {
-+		spin_unlock_irqrestore(&logbuf_lock, flags);
-+		return err;
++		spin_unlock(&logbuf_lock);
++		printed_len = err;
++		goto out_lockdep;
 +	}
 +
  	if (recursion_bug) {
  		recursion_bug = 0;
  		strcpy(printk_buf, recursion_bug_msg);
-@@ -788,7 +843,13 @@ asmlinkage int vprintk(const char *fmt, va_list args)
+@@ -788,19 +844,67 @@ asmlinkage int vprintk(const char *fmt, va_list args)
  	 * will release 'logbuf_lock' regardless of whether it
  	 * actually gets the semaphore or not.
  	 */
@@ -61994,14 +65605,24 @@
 +	if (!ve_is_super(get_exec_env())) {
 +		need_wake = (ve_log_start != ve_log_end);
 +		printk_cpu = UINT_MAX;
-+		spin_unlock_irqrestore(&logbuf_lock, flags);
++		spin_unlock(&logbuf_lock);
++		lockdep_on();
++		raw_local_irq_restore(flags);
 +		if (!oops_in_progress && need_wake)
 +			wake_up_interruptible(&ve_log_wait);
++		goto out_preempt;
 +	} else if (acquire_console_semaphore_for_printk(this_cpu))
  		release_console_sem();
  
++out_lockdep:
  	lockdep_on();
-@@ -801,6 +862,41 @@ out_restore_irqs:
+ out_restore_irqs:
+ 	raw_local_irq_restore(flags);
+ 
++out_preempt:
+ 	preempt_enable();
+ 	return printed_len;
+ }
  EXPORT_SYMBOL(printk);
  EXPORT_SYMBOL(vprintk);
  
@@ -62019,12 +65640,14 @@
 +asmlinkage int ve_vprintk(int dst, const char *fmt, va_list args)
 +{
 +	int printed_len;
++	va_list args2;
 +
 +	printed_len = 0;
++	va_copy(args2, args);
 +	if (ve_is_super(get_exec_env()) || (dst & VE0_LOG))
 +		printed_len = vprintk(fmt, args);
 +	if (!ve_is_super(get_exec_env()) && (dst & VE_LOG))
-+		printed_len = __vprintk(fmt, args);
++		printed_len = __vprintk(fmt, args2);
 +	return printed_len;
 +}
 +
@@ -62043,7 +65666,7 @@
  #else
  
  static void call_console_drivers(unsigned start, unsigned end)
-@@ -1058,6 +1154,7 @@ void release_console_sem(void)
+@@ -1058,6 +1162,7 @@ void release_console_sem(void)
  		_con_start = con_start;
  		_log_end = log_end;
  		con_start = log_end;		/* Flush */
@@ -62051,7 +65674,7 @@
  		spin_unlock(&logbuf_lock);
  		stop_critical_timings();	/* don't trace print latency */
  		call_console_drivers(_con_start, _log_end);
-@@ -1066,6 +1163,7 @@ void release_console_sem(void)
+@@ -1066,6 +1171,7 @@ void release_console_sem(void)
  	}
  	console_locked = 0;
  	up(&console_sem);
@@ -62059,7 +65682,7 @@
  	spin_unlock_irqrestore(&logbuf_lock, flags);
  	if (wake_klogd)
  		wake_up_klogd();
-@@ -1382,6 +1480,36 @@ int printk_ratelimit(void)
+@@ -1382,6 +1488,36 @@ int printk_ratelimit(void)
  }
  EXPORT_SYMBOL(printk_ratelimit);
  
@@ -62096,6 +65719,72 @@
  /**
   * printk_timed_ratelimit - caller-controlled printk ratelimiting
   * @caller_jiffies: pointer to caller's state
+@@ -1405,3 +1541,65 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ }
+ EXPORT_SYMBOL(printk_timed_ratelimit);
+ #endif
++
++static cpumask_t nmi_show_regs_cpus = CPU_MASK_NONE;
++static unsigned long nmi_show_regs_timeout;
++
++void __attribute__((weak)) send_nmi_ipi_allbutself(void)
++{
++	cpus_clear(nmi_show_regs_cpus);
++}
++
++static void busted_show_regs(struct pt_regs *regs, int in_nmi)
++{
++	if (!regs || (in_nmi && spin_is_locked(&logbuf_lock)))
++		return;
++
++	bust_spinlocks(1);
++	printk("----------- IPI show regs -----------\n");
++	show_regs(regs);
++	bust_spinlocks(0);
++}
++
++void nmi_show_regs(struct pt_regs *regs, int in_nmi)
++{
++	if (cpus_empty(nmi_show_regs_cpus))
++		goto doit;
++
++	/* Previous request still in progress */
++	if (time_before(jiffies, nmi_show_regs_timeout))
++		return;
++
++	if (!in_nmi || !spin_is_locked(&logbuf_lock)) {
++		int cpu;
++
++		bust_spinlocks(1);
++		printk("previous show regs lost IPI to: ");
++		for_each_cpu_mask(cpu, nmi_show_regs_cpus)
++			printk("%d ", cpu);
++		printk("\n");
++		bust_spinlocks(0);
++	}
++
++doit:
++	nmi_show_regs_timeout = jiffies + HZ/10;
++	nmi_show_regs_cpus = cpu_online_map;
++	cpu_clear(raw_smp_processor_id(), nmi_show_regs_cpus);
++	busted_show_regs(regs, in_nmi);
++	send_nmi_ipi_allbutself();
++}
++
++/* call only from nmi handler */
++int do_nmi_show_regs(struct pt_regs *regs, int cpu)
++{
++	static DEFINE_SPINLOCK(nmi_show_regs_lock);
++
++	if (!cpu_isset(cpu, nmi_show_regs_cpus))
++		return 0;
++
++	spin_lock(&nmi_show_regs_lock);
++	busted_show_regs(regs, 1);
++	cpu_clear(cpu, nmi_show_regs_cpus);
++	spin_unlock(&nmi_show_regs_lock);
++	return 1;
++}
 diff --git a/kernel/ptrace.c b/kernel/ptrace.c
 index 23bd09c..8967db7 100644
 --- a/kernel/ptrace.c
@@ -62159,7 +65848,7 @@
  	child = find_task_by_vpid(pid);
  	if (child)
 diff --git a/kernel/sched.c b/kernel/sched.c
-index ed61192..e66f256 100644
+index 34d924e..bf1165c 100644
 --- a/kernel/sched.c
 +++ b/kernel/sched.c
 @@ -71,6 +71,8 @@
@@ -62190,7 +65879,7 @@
  	struct task_struct *curr, *idle;
  	unsigned long next_balance;
  	struct mm_struct *prev_mm;
-@@ -647,6 +654,11 @@ static inline int cpu_of(struct rq *rq)
+@@ -647,6 +654,12 @@ static inline int cpu_of(struct rq *rq)
  #endif
  }
  
@@ -62198,11 +65887,12 @@
 +DEFINE_SPINLOCK(kstat_glb_lock);
 +EXPORT_SYMBOL(kstat_glob);
 +EXPORT_SYMBOL(kstat_glb_lock);
++static DEFINE_PER_CPU(struct kstat_lat_pcpu_snap_struct, glob_kstat_lat);
 +
  /*
   * The domain tree (rq->sd) is protected by RCU's quiescent state transition.
   * See detach_destroy_domains: synchronize_sched for details.
-@@ -998,6 +1010,220 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
+@@ -998,6 +1011,220 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
  	spin_unlock_irqrestore(&rq->lock, *flags);
  }
  
@@ -62423,7 +66113,7 @@
  /*
   * this_rq_lock - lock this runqueue and disable interrupts.
   */
-@@ -1943,11 +2169,21 @@ static int effective_prio(struct task_struct *p)
+@@ -1943,11 +2170,21 @@ static int effective_prio(struct task_struct *p)
   */
  static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
  {
@@ -62446,7 +66136,7 @@
  }
  
  /*
-@@ -1955,11 +2191,31 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+@@ -1955,11 +2192,31 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
   */
  static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
  {
@@ -62479,7 +66169,7 @@
  }
  
  /**
-@@ -2278,6 +2534,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+@@ -2276,6 +2533,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
  
  	return ncsw;
  }
@@ -62487,7 +66177,7 @@
  
  /***
   * kick_process - kick a running thread to enter/exit the kernel
-@@ -2374,8 +2631,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
+@@ -2372,8 +2630,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
  	 *
  	 * First fix up the nr_uninterruptible count:
  	 */
@@ -62500,7 +66190,7 @@
  	p->state = TASK_WAKING;
  	task_rq_unlock(rq, &flags);
  
-@@ -2609,6 +2869,10 @@ void sched_fork(struct task_struct *p, int clone_flags)
+@@ -2607,6 +2868,10 @@ void sched_fork(struct task_struct *p, int clone_flags)
  	/* Want to start with kernel preemption disabled. */
  	task_thread_info(p)->preempt_count = 1;
  #endif
@@ -62511,7 +66201,7 @@
  	plist_node_init(&p->pushable_tasks, MAX_PRIO);
  
  	put_cpu();
-@@ -2639,6 +2903,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+@@ -2637,6 +2902,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
  		 */
  		p->sched_class->task_new(rq, p);
  		inc_nr_running(rq);
@@ -62520,7 +66210,7 @@
  	}
  	trace_sched_wakeup_new(rq, p, 1);
  	check_preempt_curr(rq, p, WF_FORK);
-@@ -2841,6 +3107,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
+@@ -2839,6 +3106,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
  	if (current->set_child_tid)
  		put_user(task_pid_vnr(current), current->set_child_tid);
  }
@@ -62528,7 +66218,7 @@
  
  /*
   * context_switch - switch to the new MM and the new
-@@ -2912,6 +3179,7 @@ unsigned long nr_running(void)
+@@ -2910,6 +3178,7 @@ unsigned long nr_running(void)
  
  	return sum;
  }
@@ -62536,7 +66226,7 @@
  
  unsigned long nr_uninterruptible(void)
  {
-@@ -2929,6 +3197,7 @@ unsigned long nr_uninterruptible(void)
+@@ -2927,6 +3196,7 @@ unsigned long nr_uninterruptible(void)
  
  	return sum;
  }
@@ -62544,7 +66234,7 @@
  
  unsigned long long nr_context_switches(void)
  {
-@@ -2964,6 +3233,72 @@ unsigned long this_cpu_load(void)
+@@ -2962,6 +3232,72 @@ unsigned long this_cpu_load(void)
  }
  
  
@@ -62617,7 +66307,7 @@
  /* Variables and functions for calc_load */
  static atomic_long_t calc_load_tasks;
  static unsigned long calc_load_update;
-@@ -2985,6 +3320,16 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+@@ -2983,6 +3319,16 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
  	loads[2] = (avenrun[2] + offset) << shift;
  }
  
@@ -62634,7 +66324,7 @@
  static unsigned long
  calc_load(unsigned long load, unsigned long exp, unsigned long active)
  {
-@@ -2993,6 +3338,35 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
+@@ -2991,6 +3337,35 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
  	return load >> FSHIFT;
  }
  
@@ -62670,7 +66360,7 @@
  /*
   * calc_load - update the avenrun load estimates 10 ticks after the
   * CPUs have updated calc_load_tasks.
-@@ -3012,6 +3386,8 @@ void calc_global_load(void)
+@@ -3010,6 +3385,8 @@ void calc_global_load(void)
  	avenrun[1] = calc_load(avenrun[1], EXP_5, active);
  	avenrun[2] = calc_load(avenrun[2], EXP_15, active);
  
@@ -62679,7 +66369,7 @@
  	calc_load_update += LOAD_FREQ;
  }
  
-@@ -3076,6 +3452,16 @@ static void update_cpu_load(struct rq *this_rq)
+@@ -3074,6 +3451,16 @@ static void update_cpu_load(struct rq *this_rq)
  	}
  }
  
@@ -62696,7 +66386,7 @@
  #ifdef CONFIG_SMP
  
  /*
-@@ -3176,8 +3562,15 @@ void sched_exec(void)
+@@ -3174,8 +3561,15 @@ void sched_exec(void)
  static void pull_task(struct rq *src_rq, struct task_struct *p,
  		      struct rq *this_rq, int this_cpu)
  {
@@ -62712,7 +66402,7 @@
  	activate_task(this_rq, p, 0);
  	check_preempt_curr(this_rq, p, 0);
  }
-@@ -5054,10 +5447,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
+@@ -5052,10 +5446,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
  
  	/* Add user time to cpustat. */
  	tmp = cputime_to_cputime64(cputime);
@@ -62728,7 +66418,7 @@
  
  	cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
  	/* Account for user time used */
-@@ -5114,6 +5510,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
+@@ -5112,6 +5509,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
  
  	/* Add system time to cpustat. */
  	tmp = cputime_to_cputime64(cputime);
@@ -62736,7 +66426,7 @@
  	if (hardirq_count() - hardirq_offset)
  		cpustat->irq = cputime64_add(cpustat->irq, tmp);
  	else if (softirq_count())
-@@ -5492,6 +5889,8 @@ need_resched_nonpreemptible:
+@@ -5490,6 +5888,8 @@ need_resched_nonpreemptible:
  	next = pick_next_task(rq);
  
  	if (likely(prev != next)) {
@@ -62745,7 +66435,7 @@
  		sched_info_switch(prev, next);
  		perf_event_task_sched_out(prev, next, cpu);
  
-@@ -5499,6 +5898,22 @@ need_resched_nonpreemptible:
+@@ -5497,6 +5897,22 @@ need_resched_nonpreemptible:
  		rq->curr = next;
  		++*switch_count;
  
@@ -62768,7 +66458,7 @@
  		context_switch(rq, prev, next); /* unlocks the rq */
  		/*
  		 * the context switch might have flipped the stack from under
-@@ -5506,8 +5921,10 @@ need_resched_nonpreemptible:
+@@ -5504,8 +5920,10 @@ need_resched_nonpreemptible:
  		 */
  		cpu = smp_processor_id();
  		rq = cpu_rq(cpu);
@@ -62780,7 +66470,7 @@
  
  	post_schedule(rq);
  
-@@ -6291,7 +6708,7 @@ recheck:
+@@ -6289,7 +6707,7 @@ recheck:
  	/*
  	 * Allow unprivileged RT tasks to decrease priority:
  	 */
@@ -62789,7 +66479,7 @@
  		if (rt_policy(policy)) {
  			unsigned long rlim_rtprio;
  
-@@ -6798,11 +7215,16 @@ EXPORT_SYMBOL(yield);
+@@ -6800,11 +7218,16 @@ EXPORT_SYMBOL(yield);
  void __sched io_schedule(void)
  {
  	struct rq *rq = raw_rq();
@@ -62806,7 +66496,7 @@
  	current->in_iowait = 0;
  	atomic_dec(&rq->nr_iowait);
  	delayacct_blkio_end();
-@@ -6813,11 +7235,16 @@ long __sched io_schedule_timeout(long timeout)
+@@ -6815,11 +7238,16 @@ long __sched io_schedule_timeout(long timeout)
  {
  	struct rq *rq = raw_rq();
  	long ret;
@@ -62823,7 +66513,7 @@
  	current->in_iowait = 0;
  	atomic_dec(&rq->nr_iowait);
  	delayacct_blkio_end();
-@@ -6924,17 +7351,7 @@ void sched_show_task(struct task_struct *p)
+@@ -6926,17 +7354,7 @@ void sched_show_task(struct task_struct *p)
  	state = p->state ? __ffs(p->state) + 1 : 0;
  	printk(KERN_INFO "%-13.13s %c", p->comm,
  		state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
@@ -62842,7 +66532,7 @@
  #ifdef CONFIG_DEBUG_STACK_USAGE
  	free = stack_not_used(p);
  #endif
-@@ -6951,13 +7368,13 @@ void show_state_filter(unsigned long state_filter)
+@@ -6953,13 +7371,13 @@ void show_state_filter(unsigned long state_filter)
  
  #if BITS_PER_LONG == 32
  	printk(KERN_INFO
@@ -62859,7 +66549,7 @@
  		/*
  		 * reset the NMI-timeout, listing all files on a slow
  		 * console might take alot of time:
-@@ -6965,7 +7382,7 @@ void show_state_filter(unsigned long state_filter)
+@@ -6967,7 +7385,7 @@ void show_state_filter(unsigned long state_filter)
  		touch_nmi_watchdog();
  		if (!state_filter || (p->state & state_filter))
  			sched_show_task(p);
@@ -62868,7 +66558,7 @@
  
  	touch_all_softlockup_watchdogs();
  
-@@ -7331,13 +7748,13 @@ static void migrate_live_tasks(int src_cpu)
+@@ -7336,13 +7754,13 @@ static void migrate_live_tasks(int src_cpu)
  
  	read_lock(&tasklist_lock);
  
@@ -62884,7 +66574,15 @@
  
  	read_unlock(&tasklist_lock);
  }
-@@ -9498,7 +9915,7 @@ void __init sched_init(void)
+@@ -9490,6 +9908,7 @@ void __init sched_init(void)
+ 	update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
+ 					    __alignof__(unsigned long));
+ #endif
++	kstat_glob.sched_lat.cur = &per_cpu__glob_kstat_lat;
+ 	for_each_possible_cpu(i) {
+ 		struct rq *rq;
+ 
+@@ -9503,7 +9922,7 @@ void __init sched_init(void)
  #ifdef CONFIG_FAIR_GROUP_SCHED
  		init_task_group.shares = init_task_group_load;
  		INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -62893,7 +66591,7 @@
  		/*
  		 * How much cpu bandwidth does init_task_group get?
  		 *
-@@ -9544,7 +9961,7 @@ void __init sched_init(void)
+@@ -9549,7 +9968,7 @@ void __init sched_init(void)
  		rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
  #ifdef CONFIG_RT_GROUP_SCHED
  		INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
@@ -62902,7 +66600,7 @@
  		init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
  #elif defined CONFIG_USER_SCHED
  		init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
-@@ -9610,6 +10027,7 @@ void __init sched_init(void)
+@@ -9615,6 +10034,7 @@ void __init sched_init(void)
  	 * During early bootup we pretend to be a normal task:
  	 */
  	current->sched_class = &fair_sched_class;
@@ -62910,7 +66608,7 @@
  
  	/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
  	zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
-@@ -9688,7 +10106,7 @@ void normalize_rt_tasks(void)
+@@ -9693,7 +10113,7 @@ void normalize_rt_tasks(void)
  	struct rq *rq;
  
  	read_lock_irqsave(&tasklist_lock, flags);
@@ -62919,7 +66617,7 @@
  		/*
  		 * Only normalize user tasks:
  		 */
-@@ -9719,7 +10137,7 @@ void normalize_rt_tasks(void)
+@@ -9724,7 +10144,7 @@ void normalize_rt_tasks(void)
  
  		__task_rq_unlock(rq);
  		spin_unlock(&p->pi_lock);
@@ -62928,7 +66626,7 @@
  
  	read_unlock_irqrestore(&tasklist_lock, flags);
  }
-@@ -10165,10 +10583,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
+@@ -10170,10 +10590,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
  {
  	struct task_struct *g, *p;
  
@@ -63457,17 +67155,19 @@
  	if (len < 0 || len > __NEW_UTS_LEN)
  		return -EINVAL;
 diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index e06d0b8..da15284 100644
+index e06d0b8..7216e06 100644
 --- a/kernel/sys_ni.c
 +++ b/kernel/sys_ni.c
-@@ -179,3 +179,15 @@ cond_syscall(sys_eventfd2);
+@@ -179,3 +179,17 @@ cond_syscall(sys_eventfd2);
  
  /* performance counters: */
  cond_syscall(sys_perf_event_open);
 +cond_syscall(sys_getluid);
 +cond_syscall(sys_setluid);
 +cond_syscall(sys_setublimit);
++cond_syscall(compat_sys_setublimit);
 +cond_syscall(sys_ubstat);
++cond_syscall(compat_sys_lutime);
 +
 +/* fairsched compat */
 +cond_syscall(sys_fairsched_mknod);
@@ -63477,10 +67177,18 @@
 +cond_syscall(sys_fairsched_chwt);
 +cond_syscall(sys_fairsched_rate);
 diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b8bd058..d2d9eec 100644
+index b8bd058..5ef2188 100644
 --- a/kernel/sysctl.c
 +++ b/kernel/sysctl.c
-@@ -83,6 +83,21 @@ extern int pid_max_min, pid_max_max;
+@@ -50,6 +50,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/slow-work.h>
+ #include <linux/perf_event.h>
++#include <linux/ve_task.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/processor.h>
+@@ -83,6 +84,21 @@ extern int pid_max_min, pid_max_max;
  extern int sysctl_drop_caches;
  extern int percpu_pagelist_fraction;
  extern int compat_log;
@@ -63502,7 +67210,17 @@
  extern int latencytop_enabled;
  extern int sysctl_nr_open_min, sysctl_nr_open_max;
  #ifndef CONFIG_MMU
-@@ -178,9 +193,31 @@ static struct ctl_table_header root_table_header = {
+@@ -169,6 +185,9 @@ static int proc_taint(struct ctl_table *table, int write,
+ 			       void __user *buffer, size_t *lenp, loff_t *ppos);
+ #endif
+ 
++static int proc_dointvec_ve(struct ctl_table *table, int write,
++		void __user *buffer, size_t *lenp, loff_t *ppos);
++
+ static struct ctl_table root_table[];
+ static struct ctl_table_root sysctl_table_root;
+ static struct ctl_table_header root_table_header = {
+@@ -178,9 +197,31 @@ static struct ctl_table_header root_table_header = {
  	.root = &sysctl_table_root,
  	.set = &sysctl_table_root.default_set,
  };
@@ -63535,7 +67253,7 @@
  };
  
  static struct ctl_table kern_table[];
-@@ -504,6 +541,20 @@ static struct ctl_table kern_table[] = {
+@@ -504,6 +545,20 @@ static struct ctl_table kern_table[] = {
  		.proc_handler	= &proc_dointvec,
  	},
  #endif
@@ -63556,7 +67274,7 @@
  #ifdef __hppa__
  	{
  		.ctl_name	= KERN_HPPA_PWRSW,
-@@ -699,6 +750,24 @@ static struct ctl_table kern_table[] = {
+@@ -699,6 +754,24 @@ static struct ctl_table kern_table[] = {
  		.extra1		= &pid_max_min,
  		.extra2		= &pid_max_max,
  	},
@@ -63581,7 +67299,22 @@
  	{
  		.ctl_name	= KERN_PANIC_ON_OOPS,
  		.procname	= "panic_on_oops",
-@@ -1424,6 +1493,21 @@ static struct ctl_table vm_table[] = {
+@@ -824,10 +897,12 @@ static struct ctl_table kern_table[] = {
+ 	{
+ 		.ctl_name	= KERN_RANDOMIZE,
+ 		.procname	= "randomize_va_space",
+-		.data		= &randomize_va_space,
++		.data		= &_randomize_va_space,
++		.extra1		= (void *)offsetof(struct ve_struct,
++							_randomize_va_space),
+ 		.maxlen		= sizeof(int),
+ 		.mode		= 0644,
+-		.proc_handler	= &proc_dointvec,
++		.proc_handler	= &proc_dointvec_ve,
+ 	},
+ #endif
+ #if defined(CONFIG_S390) && defined(CONFIG_SMP)
+@@ -1424,6 +1499,21 @@ static struct ctl_table vm_table[] = {
  		.extra2		= &one,
  	},
  #endif
@@ -63603,7 +67336,7 @@
  
  /*
   * NOTE: do not add new entries to this table unless you have read
-@@ -1600,6 +1684,13 @@ static struct ctl_table fs_table[] = {
+@@ -1600,6 +1690,13 @@ static struct ctl_table fs_table[] = {
  };
  
  static struct ctl_table debug_table[] = {
@@ -63617,7 +67350,7 @@
  #if defined(CONFIG_X86) || defined(CONFIG_PPC)
  	{
  		.ctl_name	= CTL_UNNUMBERED,
-@@ -2150,10 +2241,27 @@ struct ctl_table_header *__register_sysctl_paths(
+@@ -2150,10 +2247,27 @@ struct ctl_table_header *__register_sysctl_paths(
  struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
  						struct ctl_table *table)
  {
@@ -63645,7 +67378,7 @@
  /**
   * register_sysctl_table - register a sysctl table hierarchy
   * @table: the top-level table structure
-@@ -2170,6 +2278,14 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
+@@ -2170,6 +2284,14 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
  	return register_sysctl_paths(null_path, table);
  }
  
@@ -63660,7 +67393,7 @@
  /**
   * unregister_sysctl_table - unregister a sysctl table hierarchy
   * @header: the header returned from register_sysctl_table
-@@ -2231,6 +2347,18 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
+@@ -2231,6 +2353,18 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
  	return NULL;
  }
  
@@ -63679,7 +67412,33 @@
  void unregister_sysctl_table(struct ctl_table_header * table)
  {
  }
-@@ -3236,6 +3364,56 @@ static int deprecated_sysctl_warning(struct __sysctl_args *args)
+@@ -2902,6 +3036,25 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
+ 	return 0;
+ }
+ 
++#ifdef CONFIG_VE
++static int proc_dointvec_ve(struct ctl_table *table, int write,
++		void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	struct ctl_table tmp_table;
++
++	tmp_table = *table;
++	tmp_table.data = (char *)get_exec_env() + (unsigned long)table->extra1;
++
++	return proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
++}
++#else
++static int proc_dointvec_ve(struct ctl_table *table, int write,
++		void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++	return proc_dointvec(table, write, buffer, lenp, ppos);
++}
++#endif /* CONFIG_VE */
++
+ #else /* CONFIG_PROC_FS */
+ 
+ int proc_dostring(struct ctl_table *table, int write,
+@@ -3236,6 +3389,56 @@ static int deprecated_sysctl_warning(struct __sysctl_args *args)
  	return 0;
  }
  
@@ -63736,7 +67495,7 @@
  /*
   * No sense putting this after each symbol definition, twice,
   * exception granted :-)
-@@ -3249,7 +3427,9 @@ EXPORT_SYMBOL(proc_dostring);
+@@ -3249,7 +3452,9 @@ EXPORT_SYMBOL(proc_dostring);
  EXPORT_SYMBOL(proc_doulongvec_minmax);
  EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
  EXPORT_SYMBOL(register_sysctl_table);
@@ -64165,10 +67924,10 @@
 +
 diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
 new file mode 100644
-index 0000000..8b59ff7
+index 0000000..85c42c3
 --- /dev/null
 +++ b/kernel/ve/ve.c
-@@ -0,0 +1,119 @@
+@@ -0,0 +1,129 @@
 +/*
 + *  linux/kernel/ve/ve.c
 + *
@@ -64252,6 +68011,12 @@
 +#endif
 +	.features		= VE_FEATURE_SIT | VE_FEATURE_IPIP |
 +				VE_FEATURE_PPP,
++	._randomize_va_space	=
++#ifdef CONFIG_COMPAT_BRK
++					1,
++#else
++					2,
++#endif
 +};
 +
 +EXPORT_SYMBOL(ve0);
@@ -64269,12 +68034,16 @@
 +EXPORT_SYMBOL(ve_cleanup_list);
 +EXPORT_SYMBOL(ve_cleanup_thread);
 +
++static DEFINE_PER_CPU(struct ve_cpu_stats, ve0_cpustats);
++static DEFINE_PER_CPU(struct kstat_lat_pcpu_snap_struct, ve0_lat_stats);
++
 +void init_ve0(void)
 +{
 +	struct ve_struct *ve;
 +
 +	ve = get_ve0();
-+	ve->cpu_stats = NULL;
++	ve->cpu_stats = &per_cpu__ve0_cpustats;
++	ve->sched_lat_ve.cur = &per_cpu__ve0_lat_stats;
 +	list_add(&ve->ve_list, &ve_list_head);
 +}
 +
@@ -64290,10 +68059,10 @@
 +}
 diff --git a/kernel/ve/vecalls.c b/kernel/ve/vecalls.c
 new file mode 100644
-index 0000000..29b455d
+index 0000000..cc27878
 --- /dev/null
 +++ b/kernel/ve/vecalls.c
-@@ -0,0 +1,2264 @@
+@@ -0,0 +1,2335 @@
 +/*
 + *  linux/kernel/ve/vecalls.c
 + *
@@ -64353,6 +68122,7 @@
 +#include <linux/tty.h>
 +#include <linux/mount.h>
 +#include <linux/kthread.h>
++#include <linux/oom.h>
 +
 +#include <net/route.h>
 +#include <net/ip_fib.h>
@@ -64481,7 +68251,7 @@
 +	struct ve_struct *ve;
 +	int err;
 +
-+	if (!capable(CAP_SETVEID) || veid == 0)
++	if (!capable_setveid() || veid == 0)
 +		return -EPERM;
 +
 +	if ((ve = get_ve_by_id(veid)) == NULL)
@@ -64863,44 +68633,18 @@
 +
 +static int init_ve_sched(struct ve_struct *ve)
 +{
-+#ifdef CONFIG_VZ_FAIRSCHED
 +	int err;
 +
-+	/*
-+	 * We refuse to switch to an already existing node since nodes
-+	 * keep a pointer to their ve_struct...
-+	 */
-+	err = sys_fairsched_mknod(0, 1, ve->veid);
-+	if (err < 0) {
-+		printk(KERN_WARNING "Can't create fairsched node %d\n",
-+				ve->veid);
-+		return err;
-+	}
-+	err = sys_fairsched_mvpr(current->pid, ve->veid);
-+	if (err) {
-+		printk(KERN_WARNING "Can't switch to fairsched node %d\n",
-+				ve->veid);
-+		if (sys_fairsched_rmnod(ve->veid))
-+			printk(KERN_ERR "Can't clean fairsched node %d\n",
-+					ve->veid);
-+		return err;
-+	}
-+#endif
-+	ve_sched_attach(ve);
-+	return 0;
++	err = fairsched_new_node(ve->veid, 0);
++	if (err == 0)
++		ve_sched_attach(ve);
++
++	return err;
 +}
 +
 +static void fini_ve_sched(struct ve_struct *ve)
 +{
-+#ifdef CONFIG_VZ_FAIRSCHED
-+	if (task_fairsched_node_id(current) == ve->veid)
-+		if (sys_fairsched_mvpr(current->pid, FAIRSCHED_INIT_NODE_ID))
-+			printk(KERN_WARNING "Can't leave fairsched node %d\n",
-+					ve->veid);
-+	if (sys_fairsched_rmnod(ve->veid))
-+		printk(KERN_ERR "Can't remove fairsched node %d\n",
-+				ve->veid);
-+#endif
++	fairsched_drop_node(ve->veid);
 +}
 +
 +/*
@@ -65023,6 +68767,8 @@
 +	ve->start_jiffies = get_jiffies_64();
 +	ve->start_cycles = get_cycles();
 +
++	ve->_randomize_va_space = ve0._randomize_va_space;
++ 
 +	return 0;
 +}
 +
@@ -65077,7 +68823,6 @@
 +{
 +	/* required for real_setdevperms from register_ve_<fs> above */
 +	memcpy(&ve->ve_cap_bset, &tsk->cred->cap_effective, sizeof(kernel_cap_t));
-+	cap_lower(ve->ve_cap_bset, CAP_SETVEID);
 +}
 +
 +static int ve_list_add(struct ve_struct *ve)
@@ -65135,6 +68880,10 @@
 +	/* setup capabilities before enter */
 +	set_task_ve_caps(new, new_creds);
 +
++	/* Drop OOM protection. */
++	if (tsk->signal->oom_adj == OOM_DISABLE)
++		tsk->signal->oom_adj = 0;
++
 +	old = tsk->ve_task_info.owner_env;
 +	tsk->ve_task_info.owner_env = new;
 +	tsk->ve_task_info.exec_env = new;
@@ -65193,13 +68942,24 @@
 +static inline int init_ve_cpustats(struct ve_struct *ve)
 +{
 +	ve->cpu_stats = alloc_percpu(struct ve_cpu_stats);
-+	return ve->cpu_stats == NULL ? -ENOMEM : 0;
++	if (ve->cpu_stats == NULL)
++		return -ENOMEM;
++	ve->sched_lat_ve.cur = alloc_percpu(struct kstat_lat_pcpu_snap_struct);
++	if (ve == NULL)
++		goto fail;
++	return 0;
++
++fail:
++	free_percpu(ve->cpu_stats);
++	return -ENOMEM;
 +}
 +
 +static inline void free_ve_cpustats(struct ve_struct *ve)
 +{
 +	free_percpu(ve->cpu_stats);
 +	ve->cpu_stats = NULL;
++	free_percpu(ve->sched_lat_ve.cur);
++	ve->sched_lat_ve.cur = NULL;
 +}
 +
 +static int alone_in_pgrp(struct task_struct *tsk)
@@ -65469,7 +69229,7 @@
 +	}
 +
 +	status = -EPERM;
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		goto out;
 +
 +	status = -EINVAL;
@@ -65814,6 +69574,8 @@
 +#ifdef CONFIG_UNIX98_PTYS
 +	free_ve_tty_driver(ve->ptm_driver);
 +	free_ve_tty_driver(ve->pts_driver);
++	if (ve->allocated_ptys)
++		ida_destroy(ve->allocated_ptys);
 +	kfree(ve->allocated_ptys);
 +	ve->ptm_driver = ve->pts_driver = NULL;
 +	ve->allocated_ptys = NULL;
@@ -65998,7 +69760,7 @@
 +
 +int real_ve_dev_map(envid_t veid, int op, char *dev_name)
 +{
-+	if (!capable(CAP_SETVEID))
++	if (!capable_setveid())
 +		return -EPERM;
 +	switch (op) {
 +	case VE_NETDEV_ADD:
@@ -66182,6 +69944,20 @@
 +				 ub->ub_parms[UB_PRIVVMPAGES].held ;
 +}
 +
++static void ve_swapinfo(struct sysinfo *val, struct user_beancounter *ub)
++{
++	unsigned long size, used;
++
++	size = ub->ub_parms[UB_SWAPPAGES].limit;
++	used = ub->ub_parms[UB_SWAPPAGES].held;
++
++	if (size == UB_MAXVALUE)
++		size = 0;
++
++	val->totalswap = size;
++	val->freeswap = size > used ? size - used : 0;
++}
++
 +static inline int ve_mi_replace(struct meminfo *mi, int old_ret)
 +{
 +#ifdef CONFIG_BEANCOUNTERS
@@ -66198,7 +69974,7 @@
 +		return NOTIFY_DONE | NOTIFY_STOP_MASK; /* No virtualization */
 +
 +	nodettram = mi->si.totalram;
-+	ub = current->mm->mm_ub;
++	ub = top_beancounter(current->mm->mm_ub);
 +	usedmem = ve_used_mem(ub);
 +
 +	memset(mi, 0, sizeof(*mi));
@@ -66208,6 +69984,8 @@
 +	mi->si.freeram = (mi->si.totalram > usedmem) ?
 +			(mi->si.totalram - usedmem) : 0;
 +
++	ve_swapinfo(&mi->si, ub);
++
 +	return NOTIFY_OK | NOTIFY_STOP_MASK;
 +#else
 +	return NOTIFY_DONE;
@@ -66228,6 +70006,62 @@
 +	.notifier_call = meminfo_call
 +};
 +
++/* /proc/vz/veinfo */
++
++static ve_seq_print_t veaddr_seq_print_cb;
++
++void vzmon_register_veaddr_print_cb(ve_seq_print_t cb)
++{
++	rcu_assign_pointer(veaddr_seq_print_cb, cb);
++}
++EXPORT_SYMBOL(vzmon_register_veaddr_print_cb);
++
++void vzmon_unregister_veaddr_print_cb(ve_seq_print_t cb)
++{
++	rcu_assign_pointer(veaddr_seq_print_cb, NULL);
++	synchronize_rcu();
++}
++EXPORT_SYMBOL(vzmon_unregister_veaddr_print_cb);
++
++static int veinfo_seq_show(struct seq_file *m, void *v)
++{
++	struct ve_struct *ve;
++	ve_seq_print_t veaddr_seq_print;
++
++	ve = list_entry((struct list_head *)v, struct ve_struct, ve_list);
++
++	seq_printf(m, "%10u %5u %5u", ve->veid,
++			ve->class_id, atomic_read(&ve->pcounter));
++
++	rcu_read_lock();
++	veaddr_seq_print = rcu_dereference(veaddr_seq_print_cb);
++	if (veaddr_seq_print)
++		veaddr_seq_print(m, ve);
++	rcu_read_unlock();
++
++	seq_putc(m, '\n');
++	return 0;
++}
++
++static struct seq_operations veinfo_seq_op = {
++	.start	= ve_seq_start,
++	.next	=  ve_seq_next,
++	.stop	=  ve_seq_stop,
++	.show	=  veinfo_seq_show,
++};
++
++static int veinfo_open(struct inode *inode, struct file *file)
++{
++	return seq_open(file, &veinfo_seq_op);
++}
++
++static struct file_operations proc_veinfo_operations = {
++	.open		= veinfo_open,
++	.read		= seq_read,
++	.llseek		= seq_lseek,
++	.release	= seq_release,
++};
++
 +static int __init init_vecalls_proc(void)
 +{
 +	struct proc_dir_entry *de;
@@ -66247,6 +70081,11 @@
 +	if (!de)
 +		printk(KERN_WARNING "VZMON: can't make version proc entry\n");
 +
++	de = proc_create("veinfo", S_IFREG | S_IRUSR, proc_vz_dir,
++			&proc_veinfo_operations);
++	if (!de)
++		printk(KERN_WARNING "VZMON: can't make veinfo proc entry\n");
++
 +	virtinfo_notifier_register(VITYPE_GENERAL, &meminfo_notifier_block);
 +	return 0;
 +}
@@ -66256,6 +70095,7 @@
 +	remove_proc_entry("version", proc_vz_dir);
 +	remove_proc_entry("devperms", proc_vz_dir);
 +	remove_proc_entry("vestat", proc_vz_dir);
++	remove_proc_entry("veinfo", proc_vz_dir);
 +	virtinfo_notifier_unregister(VITYPE_GENERAL, &meminfo_notifier_block);
 +}
 +#else
@@ -66560,10 +70400,10 @@
 +module_exit(vecalls_exit)
 diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
 new file mode 100644
-index 0000000..3889411
+index 0000000..50f4d9a
 --- /dev/null
 +++ b/kernel/ve/veowner.c
-@@ -0,0 +1,150 @@
+@@ -0,0 +1,160 @@
 +/*
 + *  kernel/ve/veowner.c
 + *
@@ -66590,6 +70430,7 @@
 +#include <linux/list.h>
 +#include <linux/inetdevice.h>
 +#include <linux/pid_namespace.h>
++#include <linux/xattr.h>
 +#include <asm/system.h>
 +#include <asm/io.h>
 +
@@ -66641,6 +70482,7 @@
 + * OpenVZ sysctl
 + * ------------------------------------------------------------------------
 + */
++int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT;
 +extern int ve_area_access_check;
 +
 +#ifdef CONFIG_INET
@@ -66671,6 +70513,14 @@
 +		.mode		= 0644,
 +		.proc_handler	= proc_dointvec,
 +	},
++	{
++		.ctl_name	= 228,
++		.procname	= "ve-xattr-policy",
++		.data		= &ve_xattr_policy,
++		.maxlen		= sizeof(int),
++		.mode		= 0644,
++		.proc_handler	= &proc_dointvec,
++	},
 +	{ 0 }
 +};
 +
@@ -67550,7 +71400,7 @@
  
  		if (!task_early_kill(tsk))
 diff --git a/mm/memory.c b/mm/memory.c
-index 4e59455..fcdb9fb 100644
+index 4e59455..220dc95 100644
 --- a/mm/memory.c
 +++ b/mm/memory.c
 @@ -42,6 +42,9 @@
@@ -67575,6 +71425,15 @@
  #include <asm/io.h>
  #include <asm/pgalloc.h>
  #include <asm/uaccess.h>
+@@ -94,7 +102,7 @@ EXPORT_SYMBOL(high_memory);
+  * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization,
+  *   as ancient (libc5 based) binaries can segfault. )
+  */
+-int randomize_va_space __read_mostly =
++int _randomize_va_space __read_mostly =
+ #ifdef CONFIG_COMPAT_BRK
+ 					1;
+ #else
 @@ -132,18 +140,21 @@ void pgd_clear_bad(pgd_t *pgd)
  	pgd_ERROR(*pgd);
  	pgd_clear(pgd);
@@ -68216,7 +72075,7 @@
  static int do_mlockall(int flags)
  {
 diff --git a/mm/mmap.c b/mm/mmap.c
-index ae19746..a5dd0bf 100644
+index ae19746..991a1ac 100644
 --- a/mm/mmap.c
 +++ b/mm/mmap.c
 @@ -29,6 +29,7 @@
@@ -68279,6 +72138,15 @@
  		goto out;
  set_brk:
  	mm->brk = brk;
+@@ -927,7 +946,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ 			prot |= PROT_EXEC;
+ 
+ 	if (!len)
+-		return -EINVAL;
++		return strncmp(current->comm, "rpm", 3) ? -EINVAL : addr;
+ 
+ 	if (!(flags & MAP_FIXED))
+ 		addr = round_hint_to_min(addr);
 @@ -1106,6 +1125,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
  	struct rb_node **rb_link, *rb_parent;
  	unsigned long charged = 0;
@@ -69179,7 +73047,7 @@
  		dec_mm_counter(mm, file_rss);
  		(*mapcount)--;
 diff --git a/mm/shmem.c b/mm/shmem.c
-index 356dd99..141b181 100644
+index 356dd99..bc74e50 100644
 --- a/mm/shmem.c
 +++ b/mm/shmem.c
 @@ -31,7 +31,11 @@
@@ -69203,7 +73071,41 @@
  #include <asm/uaccess.h>
  #include <asm/div64.h>
  #include <asm/pgtable.h>
-@@ -214,7 +220,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
+@@ -107,14 +113,31 @@ enum sgp_type {
+ };
+ 
+ #ifdef CONFIG_TMPFS
++
++#include <linux/virtinfo.h>
++
++static unsigned long tmpfs_ram_pages(void)
++{
++	struct meminfo mi;
++
++	if (ve_is_super(get_exec_env()))
++		return totalram_pages;
++
++	memset(&mi, 0, sizeof(mi));
++	si_meminfo(&mi.si);
++	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi) & NOTIFY_FAIL)
++		return 0;
++	return mi.si.totalram;
++}
++
+ static unsigned long shmem_default_max_blocks(void)
+ {
+-	return totalram_pages / 2;
++	return tmpfs_ram_pages() / 2;
+ }
+ 
+ static unsigned long shmem_default_max_inodes(void)
+ {
+-	return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
++	return min(totalram_pages - totalhigh_pages, tmpfs_ram_pages() / 2);
+ }
+ #endif
+ 
+@@ -214,7 +237,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
  
  static const struct super_operations shmem_ops;
  static const struct address_space_operations shmem_aops;
@@ -69212,7 +73114,7 @@
  static const struct inode_operations shmem_inode_operations;
  static const struct inode_operations shmem_dir_inode_operations;
  static const struct inode_operations shmem_special_inode_operations;
-@@ -277,7 +283,7 @@ static void shmem_free_inode(struct super_block *sb)
+@@ -277,7 +300,7 @@ static void shmem_free_inode(struct super_block *sb)
   *
   * It has to be called with the spinlock held.
   */
@@ -69221,7 +73123,7 @@
  {
  	struct shmem_inode_info *info = SHMEM_I(inode);
  	long freed;
-@@ -287,6 +293,8 @@ static void shmem_recalc_inode(struct inode *inode)
+@@ -287,6 +310,8 @@ static void shmem_recalc_inode(struct inode *inode)
  		info->alloced -= freed;
  		shmem_unacct_blocks(info->flags, freed);
  		shmem_free_blocks(inode, freed);
@@ -69230,7 +73132,7 @@
  	}
  }
  
-@@ -391,6 +399,11 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
+@@ -391,6 +416,11 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
  		struct page *page = kmap_atomic_to_page(entry);
  		set_page_private(page, page_private(page) + incdec);
  	}
@@ -69242,7 +73144,7 @@
  }
  
  /**
-@@ -407,14 +420,24 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
+@@ -407,14 +437,24 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
  	struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
  	struct page *page = NULL;
  	swp_entry_t *entry;
@@ -69269,7 +73171,7 @@
  		/*
  		 * Test free_blocks against 1 not 0, since we have 1 data
  		 * page (and perhaps indirect index pages) yet to allocate:
-@@ -424,7 +447,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
+@@ -424,7 +464,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
  			spin_lock(&sbinfo->stat_lock);
  			if (sbinfo->free_blocks <= 1) {
  				spin_unlock(&sbinfo->stat_lock);
@@ -69279,7 +73181,7 @@
  			}
  			sbinfo->free_blocks--;
  			inode->i_blocks += BLOCKS_PER_PAGE;
-@@ -432,31 +456,43 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
+@@ -432,31 +473,43 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
  		}
  
  		spin_unlock(&info->lock);
@@ -69329,7 +73231,7 @@
  }
  
  /**
-@@ -564,6 +600,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+@@ -564,6 +617,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
  		return;
  
  	spin_lock(&info->lock);
@@ -69337,7 +73239,7 @@
  	info->flags |= SHMEM_TRUNCATE;
  	if (likely(end == (loff_t) -1)) {
  		limit = info->next_index;
-@@ -750,7 +787,7 @@ done2:
+@@ -750,7 +804,7 @@ done2:
  	info->swapped -= nr_swaps_freed;
  	if (nr_pages_to_free)
  		shmem_free_blocks(inode, nr_pages_to_free);
@@ -69346,7 +73248,7 @@
  	spin_unlock(&info->lock);
  
  	/*
-@@ -833,6 +870,7 @@ static void shmem_delete_inode(struct inode *inode)
+@@ -833,6 +887,7 @@ static void shmem_delete_inode(struct inode *inode)
  		}
  	}
  	BUG_ON(inode->i_blocks);
@@ -69354,7 +73256,7 @@
  	shmem_free_inode(inode->i_sb);
  	clear_inode(inode);
  }
-@@ -1020,6 +1058,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
+@@ -1020,6 +1075,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
  out:	return found;	/* 0 or 1 or -ENOMEM */
  }
  
@@ -69367,7 +73269,7 @@
  /*
   * Move the page from the page cache to the swap cache.
   */
-@@ -1051,7 +1095,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+@@ -1051,7 +1112,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
  	 * discarded.
  	 */
  	if (wbc->for_reclaim)
@@ -69376,7 +73278,7 @@
  	else
  		swap.val = 0;
  
-@@ -1069,7 +1113,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+@@ -1069,7 +1130,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
  		free_swap_and_cache(*entry);
  		shmem_swp_set(info, entry, 0);
  	}
@@ -69385,7 +73287,7 @@
  
  	if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
  		remove_from_page_cache(page);
-@@ -1252,7 +1296,7 @@ repeat:
+@@ -1252,7 +1313,7 @@ repeat:
  	}
  
  	spin_lock(&info->lock);
@@ -69394,7 +73296,7 @@
  	entry = shmem_swp_alloc(info, idx, sgp);
  	if (IS_ERR(entry)) {
  		spin_unlock(&info->lock);
-@@ -1455,6 +1499,7 @@ repeat:
+@@ -1455,6 +1516,7 @@ repeat:
  		clear_highpage(filepage);
  		flush_dcache_page(filepage);
  		SetPageUptodate(filepage);
@@ -69402,7 +73304,7 @@
  		if (sgp == SGP_DIRTY)
  			set_page_dirty(filepage);
  	}
-@@ -1512,20 +1557,27 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
+@@ -1512,20 +1574,27 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
  
  	spin_lock(&info->lock);
  	if (lock && !(info->flags & VM_LOCKED)) {
@@ -69431,7 +73333,7 @@
  	spin_unlock(&info->lock);
  	return retval;
  }
-@@ -1559,6 +1611,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
+@@ -1559,6 +1628,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
  		inode->i_generation = get_seconds();
  		info = SHMEM_I(inode);
  		memset(info, 0, (char *)inode - (char *)info);
@@ -69439,7 +73341,16 @@
  		spin_lock_init(&info->lock);
  		info->flags = flags & VM_NORESERVE;
  		INIT_LIST_HEAD(&info->swaplist);
-@@ -2424,7 +2477,7 @@ static const struct address_space_operations shmem_aops = {
+@@ -2182,7 +2252,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
+ 			size = memparse(value,&rest);
+ 			if (*rest == '%') {
+ 				size <<= PAGE_SHIFT;
+-				size *= totalram_pages;
++				size *= tmpfs_ram_pages();
+ 				do_div(size, 100);
+ 				rest++;
+ 			}
+@@ -2424,7 +2494,7 @@ static const struct address_space_operations shmem_aops = {
  	.error_remove_page = generic_error_remove_page,
  };
  
@@ -69448,7 +73359,7 @@
  	.mmap		= shmem_mmap,
  #ifdef CONFIG_TMPFS
  	.llseek		= generic_file_llseek,
-@@ -2437,6 +2490,7 @@ static const struct file_operations shmem_file_operations = {
+@@ -2437,6 +2507,7 @@ static const struct file_operations shmem_file_operations = {
  	.splice_write	= generic_file_splice_write,
  #endif
  };
@@ -69456,7 +73367,7 @@
  
  static const struct inode_operations shmem_inode_operations = {
  	.truncate	= shmem_truncate,
-@@ -2506,6 +2560,10 @@ static const struct vm_operations_struct shmem_vm_ops = {
+@@ -2506,6 +2577,10 @@ static const struct vm_operations_struct shmem_vm_ops = {
  #endif
  };
  
@@ -69467,7 +73378,7 @@
  
  static int shmem_get_sb(struct file_system_type *fs_type,
  	int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-@@ -2513,12 +2571,13 @@ static int shmem_get_sb(struct file_system_type *fs_type,
+@@ -2513,12 +2588,13 @@ static int shmem_get_sb(struct file_system_type *fs_type,
  	return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
  }
  
@@ -69482,7 +73393,7 @@
  
  int __init init_tmpfs(void)
  {
-@@ -2608,6 +2667,36 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
+@@ -2608,6 +2684,36 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
  
  /* common code */
  
@@ -69519,7 +73430,7 @@
  /**
   * shmem_file_setup - get an unlinked file living in tmpfs
   * @name: name for dentry (to be seen in /proc/<pid>/maps
-@@ -2653,6 +2742,9 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
+@@ -2653,6 +2759,9 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
  	d_instantiate(dentry, inode);
  	inode->i_size = size;
  	inode->i_nlink = 0;	/* It is unlinked */
@@ -69529,7 +73440,7 @@
  	init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
  		  &shmem_file_operations);
  
-@@ -2689,6 +2781,8 @@ int shmem_zero_setup(struct vm_area_struct *vma)
+@@ -2689,6 +2798,8 @@ int shmem_zero_setup(struct vm_area_struct *vma)
  
  	if (vma->vm_file)
  		fput(vma->vm_file);
@@ -70738,7 +74649,7 @@
   * swapin_readahead - swap in pages in hope we need them soon
   * @entry: swap entry of this memory
 diff --git a/mm/swapfile.c b/mm/swapfile.c
-index 9c590ee..f5bc813 100644
+index 9c590ee..9ce0143 100644
 --- a/mm/swapfile.c
 +++ b/mm/swapfile.c
 @@ -35,6 +35,8 @@
@@ -71084,7 +74995,56 @@
  #ifdef CONFIG_PROC_FS
  /* iterator */
  static void *swap_start(struct seq_file *swap, loff_t *pos)
-@@ -1743,7 +1817,7 @@ static const struct file_operations proc_swaps_operations = {
+@@ -1729,21 +1803,55 @@ static const struct seq_operations swaps_op = {
+ 	.show =		swap_show
+ };
+ 
++#include <linux/virtinfo.h>
++
++static int swap_show_ve(struct seq_file *swap, void *v)
++{
++	struct meminfo mi;
++
++	memset(&mi, 0, sizeof(mi));
++	si_swapinfo(&mi.si);
++	if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
++			& NOTIFY_FAIL)
++		goto out;
++
++	seq_printf(swap, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
++	if (!mi.si.totalswap)
++		goto out;
++	seq_printf(swap, "%-40s%s\t%lu\t%lu\t%d\n",
++			"/dev/null",
++			"partition",
++			mi.si.totalswap  << (PAGE_SHIFT - 10),
++			(mi.si.totalswap - mi.si.freeswap) << (PAGE_SHIFT - 10),
++			-1);
++out:
++	return 0;
++}
++
+ static int swaps_open(struct inode *inode, struct file *file)
+ {
++	if (!ve_is_super(get_exec_env()))
++		return single_open(file, &swap_show_ve, NULL);
+ 	return seq_open(file, &swaps_op);
+ }
+ 
++static int swaps_release(struct inode *inode, struct file *file)
++{
++	if (!ve_is_super(file->owner_env))
++		return single_release(inode, file);
++	return seq_release(inode, file);
++}
++
+ static const struct file_operations proc_swaps_operations = {
+ 	.open		= swaps_open,
+ 	.read		= seq_read,
+ 	.llseek		= seq_lseek,
+-	.release	= seq_release,
++	.release	= swaps_release,
+ };
  
  static int __init procswaps_init(void)
  {
@@ -71093,7 +75053,7 @@
  	return 0;
  }
  __initcall(procswaps_init);
-@@ -1973,6 +2047,11 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+@@ -1973,6 +2081,11 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
  		goto bad_swap;
  	}
  
@@ -71105,7 +75065,7 @@
  	if (p->bdev) {
  		if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
  			p->flags |= SWP_SOLIDSTATE;
-@@ -1991,6 +2070,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+@@ -1991,6 +2104,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
  		p->prio = --least_priority;
  	p->swap_map = swap_map;
  	p->flags |= SWP_WRITEOK;
@@ -71114,7 +75074,7 @@
  	nr_swap_pages += nr_good_pages;
  	total_swap_pages += nr_good_pages;
  
-@@ -2049,6 +2130,8 @@ out:
+@@ -2049,6 +2164,8 @@ out:
  	return error;
  }
  
@@ -71123,7 +75083,7 @@
  void si_swapinfo(struct sysinfo *val)
  {
  	unsigned int i;
-@@ -2146,6 +2229,8 @@ void swap_duplicate(swp_entry_t entry)
+@@ -2146,6 +2263,8 @@ void swap_duplicate(swp_entry_t entry)
  	__swap_duplicate(entry, SWAP_MAP);
  }
  
@@ -72129,7 +76089,7 @@
  	else
  		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
 diff --git a/net/core/dev.c b/net/core/dev.c
-index 74d0cce..48199c3 100644
+index 74d0cce..ee00d53 100644
 --- a/net/core/dev.c
 +++ b/net/core/dev.c
 @@ -130,6 +130,9 @@
@@ -72256,13 +76216,14 @@
  		       dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
  							       "left");
  		if (audit_enabled) {
-@@ -4547,11 +4576,20 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+@@ -4547,16 +4576,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
  	 *	- require strict serialization.
  	 *	- do not return a value
  	 */
 +	case SIOCSIFMTU:
 +	case SIOCSIFHWADDR:
  	case SIOCSIFFLAGS:
++	case SIOCSIFTXQLEN:
 +		if (!capable(CAP_NET_ADMIN) &&
 +		    !capable(CAP_VE_NET_ADMIN))
 +			return -EPERM;
@@ -72279,6 +76240,11 @@
  	case SIOCSIFSLAVE:
  	case SIOCADDMULTI:
  	case SIOCDELMULTI:
+ 	case SIOCSIFHWBROADCAST:
+-	case SIOCSIFTXQLEN:
+ 	case SIOCSMIIREG:
+ 	case SIOCBONDENSLAVE:
+ 	case SIOCBONDRELEASE:
 @@ -4619,12 +4657,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
   */
  static int dev_new_index(struct net *net)
@@ -75614,7 +79580,7 @@
  	limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
  	max_share = min(4UL*1024*1024, limit);
 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index d86784b..46b61f5 100644
+index 2433bcd..0eb9c17 100644
 --- a/net/ipv4/tcp_input.c
 +++ b/net/ipv4/tcp_input.c
 @@ -72,6 +72,8 @@
@@ -75751,7 +79717,7 @@
  				NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
  
 diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
-index 7cda24b..b0f93fd 100644
+index 7cda24b..e141833 100644
 --- a/net/ipv4/tcp_ipv4.c
 +++ b/net/ipv4/tcp_ipv4.c
 @@ -72,6 +72,8 @@
@@ -75847,7 +79813,7 @@
  		__free_page(sk->sk_sndmsg_page);
  		sk->sk_sndmsg_page = NULL;
  	}
-@@ -2478,6 +2493,87 @@ void __init tcp_v4_init(void)
+@@ -2478,6 +2493,93 @@ void __init tcp_v4_init(void)
  		panic("Failed to create the TCP control socket.\n");
  }
  
@@ -75858,16 +79824,13 @@
 +
 +	/* Check the assumed state of the socket. */
 +	if (!sock_flag(sk, SOCK_DEAD)) {
-+		static int printed;
-+invalid:
-+		if (!printed)
-+			printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
-+				"wrseq %u unseq %u, wrqu %d.\n",
-+				sock_flag(sk, SOCK_DEAD), sk->sk_state,
-+				tp->write_seq, tp->snd_una,
-+				!skb_queue_empty(&sk->sk_write_queue));
-+		printed = 1;
-+		return;
++		printk(KERN_WARNING "Killing sk: dead %d, state %d, "
++			"wrseq %u unseq %u, wrqu %d.\n",
++			sock_flag(sk, SOCK_DEAD), sk->sk_state,
++			tp->write_seq, tp->snd_una,
++			!skb_queue_empty(&sk->sk_write_queue));
++		sk->sk_err = ECONNRESET;
++		sk->sk_error_report(sk);
 +	}
 +
 +	tcp_send_active_reset(sk, GFP_ATOMIC);
@@ -75886,22 +79849,21 @@
 +			 */
 +			tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
 +			break;
-+		case TCP_LAST_ACK:
++		default:
 +			/* Just jump into CLOSED state. */
 +			tcp_done(sk);
 +			break;
-+		default:
-+			/* The socket must be already close()d. */
-+			goto invalid;
 +	}
 +}
 +
 +void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
 +{
 +	struct inet_ehash_bucket *head;
-+	int i;
++	int i, retry;
 +
 +	/* alive */
++again:
++	retry = 0;
 +	local_bh_disable();
 +	head = tcp_hashinfo.ehash;
 +	for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
@@ -75916,6 +79878,12 @@
 +				spin_unlock(lock);
 +
 +				bh_lock_sock(sk);
++				if (sock_owned_by_user(sk)) {
++					retry = 1;
++					bh_unlock_sock(sk);
++					sock_put(sk);
++					break;
++				}
 +				/* sk might have disappeared from the hash before
 +				 * we got the lock */
 +				if (sk->sk_state != TCP_CLOSE)
@@ -75928,6 +79896,10 @@
 +		spin_unlock(lock);
 +	}
 +	local_bh_enable();
++	if (retry) {
++		schedule_timeout_interruptible(HZ);
++		goto again;
++	}
 +}
 +EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
 +#endif
@@ -76355,8 +80327,20 @@
  	sock_put(sk);
 +	(void)set_exec_env(ve);
  }
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 0fa9f70..ca1c6bf 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -138,6 +138,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
+ 		    sk2 != sk					&&
+ 		    (bitmap || sk2->sk_hash == num)		&&
+ 		    (!sk2->sk_reuse || !sk->sk_reuse)		&&
++		    sk->sk_reuse != 2 &&
+ 		    (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+ 			|| sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ 		    (*saddr_comp)(sk, sk2)) {
 diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
-index d1f77cc..d62bbca 100644
+index d1f77cc..7fc4efd 100644
 --- a/net/ipv6/addrconf.c
 +++ b/net/ipv6/addrconf.c
 @@ -407,9 +407,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
@@ -76397,7 +80381,17 @@
  
  static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
  			  unsigned int plen)
-@@ -2202,7 +2202,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
+@@ -2188,7 +2188,8 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
+ 			   disable IPv6 on this interface.
+ 			 */
+ 			if (idev->addr_list == NULL)
+-				addrconf_ifdown(idev->dev, 1);
++				addrconf_ifdown(idev->dev,
++						!(idev->dev->flags & IFF_LOOPBACK));
+ 			return 0;
+ 		}
+ 	}
+@@ -2202,7 +2203,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
  	struct in6_ifreq ireq;
  	int err;
  
@@ -76406,7 +80400,7 @@
  		return -EPERM;
  
  	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-@@ -2221,7 +2221,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
+@@ -2221,7 +2222,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
  	struct in6_ifreq ireq;
  	int err;
  
@@ -76415,7 +80409,7 @@
  		return -EPERM;
  
  	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-@@ -2731,6 +2731,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
+@@ -2731,6 +2732,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
  static void addrconf_rs_timer(unsigned long data)
  {
  	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
@@ -76425,7 +80419,7 @@
  
  	if (ifp->idev->cnf.forwarding)
  		goto out;
-@@ -2765,6 +2768,7 @@ static void addrconf_rs_timer(unsigned long data)
+@@ -2765,6 +2769,7 @@ static void addrconf_rs_timer(unsigned long data)
  
  out:
  	in6_ifa_put(ifp);
@@ -76433,7 +80427,15 @@
  }
  
  /*
-@@ -2841,7 +2845,9 @@ static void addrconf_dad_timer(unsigned long data)
+@@ -2801,6 +2806,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
+ 	if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+ 	    idev->cnf.accept_dad < 1 ||
+ 	    !(ifp->flags&IFA_F_TENTATIVE) ||
++	    dev->owner_env->disable_net ||
+ 	    ifp->flags & IFA_F_NODAD) {
+ 		ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+ 		spin_unlock_bh(&ifp->lock);
+@@ -2841,7 +2847,9 @@ static void addrconf_dad_timer(unsigned long data)
  	struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
  	struct inet6_dev *idev = ifp->idev;
  	struct in6_addr mcaddr;
@@ -76443,7 +80445,7 @@
  	read_lock_bh(&idev->lock);
  	if (idev->dead) {
  		read_unlock_bh(&idev->lock);
-@@ -2872,6 +2878,7 @@ static void addrconf_dad_timer(unsigned long data)
+@@ -2872,6 +2880,7 @@ static void addrconf_dad_timer(unsigned long data)
  	ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
  out:
  	in6_ifa_put(ifp);
@@ -76451,7 +80453,7 @@
  }
  
  static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
-@@ -3093,6 +3100,7 @@ static void addrconf_verify(unsigned long foo)
+@@ -3093,6 +3102,7 @@ static void addrconf_verify(unsigned long foo)
  	struct inet6_ifaddr *ifp;
  	unsigned long now, next;
  	int i;
@@ -76459,7 +80461,7 @@
  
  	spin_lock_bh(&addrconf_verify_lock);
  	now = jiffies;
-@@ -3113,6 +3121,8 @@ restart:
+@@ -3113,6 +3123,8 @@ restart:
  			if (ifp->flags & IFA_F_PERMANENT)
  				continue;
  
@@ -76468,7 +80470,7 @@
  			spin_lock(&ifp->lock);
  			age = (now - ifp->tstamp) / HZ;
  
-@@ -3128,9 +3138,11 @@ restart:
+@@ -3128,9 +3140,11 @@ restart:
  				in6_ifa_hold(ifp);
  				read_unlock(&addrconf_hash_lock);
  				ipv6_del_addr(ifp);
@@ -76480,7 +80482,7 @@
  				continue;
  			} else if (age >= ifp->prefered_lft) {
  				/* jiffies - ifp->tstamp > age >= ifp->prefered_lft */
-@@ -3152,6 +3164,7 @@ restart:
+@@ -3152,6 +3166,7 @@ restart:
  
  					ipv6_ifa_notify(0, ifp);
  					in6_ifa_put(ifp);
@@ -76488,7 +80490,7 @@
  					goto restart;
  				}
  #ifdef CONFIG_IPV6_PRIVACY
-@@ -3173,6 +3186,7 @@ restart:
+@@ -3173,6 +3188,7 @@ restart:
  						ipv6_create_tempaddr(ifpub, ifp);
  						in6_ifa_put(ifpub);
  						in6_ifa_put(ifp);
@@ -76496,7 +80498,7 @@
  						goto restart;
  					}
  				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
-@@ -3185,6 +3199,7 @@ restart:
+@@ -3185,6 +3201,7 @@ restart:
  					next = ifp->tstamp + ifp->prefered_lft * HZ;
  				spin_unlock(&ifp->lock);
  			}
@@ -77335,7 +81337,7 @@
  	return err;
  }
 diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
-index 21d100b..0ecd5b4 100644
+index 21d100b..1c534b7 100644
 --- a/net/ipv6/tcp_ipv6.c
 +++ b/net/ipv6/tcp_ipv6.c
 @@ -61,6 +61,8 @@
@@ -77356,7 +81358,15 @@
  static const struct inet_connection_sock_af_ops ipv6_specific;
  #ifdef CONFIG_TCP_MD5SIG
  static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
-@@ -1496,6 +1498,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -892,6 +894,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
+ 	.destructor	=	tcp_v6_reqsk_destructor,
+ 	.send_reset	=	tcp_v6_send_reset
+ };
++EXPORT_SYMBOL(tcp6_request_sock_ops);
+ 
+ #ifdef CONFIG_TCP_MD5SIG
+ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+@@ -1496,6 +1499,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
  	struct ipv6_pinfo *np = inet6_sk(sk);
  	struct tcp_sock *tp;
  	struct sk_buff *opt_skb = NULL;
@@ -77364,7 +81374,7 @@
  
  	/* Imagine: socket is IPv6. IPv4 packet arrives,
  	   goes to IPv4 receive handler and backlogged.
-@@ -1508,6 +1511,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1508,6 +1512,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
  	if (skb->protocol == htons(ETH_P_IP))
  		return tcp_v4_do_rcv(sk, skb);
  
@@ -77373,7 +81383,7 @@
  #ifdef CONFIG_TCP_MD5SIG
  	if (tcp_v6_inbound_md5_hash (sk, skb))
  		goto discard;
-@@ -1544,7 +1549,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1544,7 +1550,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
  		TCP_CHECK_TIMER(sk);
  		if (opt_skb)
  			goto ipv6_pktoptions;
@@ -77382,7 +81392,7 @@
  	}
  
  	if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
-@@ -1565,7 +1570,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1565,7 +1571,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
  				goto reset;
  			if (opt_skb)
  				__kfree_skb(opt_skb);
@@ -77391,7 +81401,7 @@
  		}
  	}
  
-@@ -1575,6 +1580,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1575,6 +1581,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
  	TCP_CHECK_TIMER(sk);
  	if (opt_skb)
  		goto ipv6_pktoptions;
@@ -77401,7 +81411,7 @@
  	return 0;
  
  reset:
-@@ -1583,7 +1591,7 @@ discard:
+@@ -1583,7 +1592,7 @@ discard:
  	if (opt_skb)
  		__kfree_skb(opt_skb);
  	kfree_skb(skb);
@@ -77410,7 +81420,7 @@
  csum_err:
  	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
  	goto discard;
-@@ -1614,7 +1622,7 @@ ipv6_pktoptions:
+@@ -1614,7 +1623,7 @@ ipv6_pktoptions:
  	}
  
  	kfree_skb(opt_skb);
@@ -77419,7 +81429,7 @@
  }
  
  static int tcp_v6_rcv(struct sk_buff *skb)
-@@ -1793,7 +1801,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+@@ -1793,7 +1802,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
   *	TCP over IPv4 via INET6 API
   */
  
@@ -77428,7 +81438,7 @@
  	.queue_xmit	   = ip_queue_xmit,
  	.send_check	   = tcp_v4_send_check,
  	.rebuild_header	   = inet_sk_rebuild_header,
-@@ -1812,6 +1820,8 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
+@@ -1812,6 +1821,8 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
  #endif
  };
  
@@ -80219,7 +84229,7 @@
  	cleanup_socket_xprt();
  	svc_cleanup_xprt_sock();
 diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
-index 1c246a4..f969dee 100644
+index 70b0a22..f66b225 100644
 --- a/net/sunrpc/svcsock.c
 +++ b/net/sunrpc/svcsock.c
 @@ -229,6 +229,9 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
@@ -80241,7 +84251,7 @@
  	return len;
  }
  
-@@ -1436,8 +1441,9 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
+@@ -1437,8 +1442,9 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
  
  	error = sock_create_kern(family, type, protocol, &sock);
  	if (error < 0)
@@ -80252,7 +84262,7 @@
  	svc_reclassify_socket(sock);
  
  	/*
-@@ -1488,6 +1494,8 @@ static void svc_sock_detach(struct svc_xprt *xprt)
+@@ -1489,6 +1495,8 @@ static void svc_sock_detach(struct svc_xprt *xprt)
  
  	dprintk("svc: svc_sock_detach(%p)\n", svsk);
  
@@ -80668,7 +84678,7 @@
  	  This allows you to choose different security modules to be
  	  configured into your kernel.
 diff --git a/security/commoncap.c b/security/commoncap.c
-index fe30751..6110691 100644
+index fe30751..3579774 100644
 --- a/security/commoncap.c
 +++ b/security/commoncap.c
 @@ -58,6 +58,10 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
@@ -80700,15 +84710,18 @@
  		return -EPERM;
  	return 0;
  }
-@@ -962,7 +966,7 @@ error:
+@@ -962,8 +966,9 @@ error:
   */
  int cap_syslog(int type)
  {
 -	if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
-+	if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
- 		return -EPERM;
+-		return -EPERM;
++	if ((type != 3 && type != 10) &&
++		!capable(CAP_VE_SYS_ADMIN) && !capable(CAP_SYS_ADMIN))
++			return -EPERM;
  	return 0;
  }
+ 
 diff --git a/security/device_cgroup.c b/security/device_cgroup.c
 index 6cf8fd2..02aeae6 100644
 --- a/security/device_cgroup.c



More information about the Kernel-svn-changes mailing list