[kernel] r15569 - in dists/sid/linux-2.6/debian: . patches/features/all/openvz
Maximilian Attems
maks at alioth.debian.org
Tue Apr 27 14:04:44 UTC 2010
Author: maks
Date: Tue Apr 27 14:04:39 2010
New Revision: 15569
Log:
update openvz patch
Modified:
dists/sid/linux-2.6/debian/changelog
dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch
Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog Tue Apr 27 11:26:09 2010 (r15568)
+++ dists/sid/linux-2.6/debian/changelog Tue Apr 27 14:04:39 2010 (r15569)
@@ -27,7 +27,7 @@
[ maximilian attems]
* [ia64] Built in fbcon.
- * Update openvz patch to 6b5607eeec54. (closes: #574598)
+ * Update openvz patch to c05f95fcb04e. (closes: #574598)
* Reenable nouveau autoloading.
* reiserfs: Fix permissions on .reiserfs_priv. CVE-2010-1146
* libata,ata_piix: detect and clear spurious IRQs.
Modified: dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch Tue Apr 27 11:26:09 2010 (r15568)
+++ dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch Tue Apr 27 14:04:39 2010 (r15569)
@@ -1,3 +1,1794 @@
+commit c05f95fcb04e896c898218d12a8f37c43d2f9cc6
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date: Tue Apr 27 15:10:13 2010 +0400
+
+ OpenVZ kernel 2.6.32-avdeyev released
+
+ Named after Sergei Vasilyevich Avdeyev - a Russian cosmonaut.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b4a419d9abd11e3efd02e9fccd4a14180866cf99
+Merge: 455792e 5bf3475
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date: Tue Apr 27 14:01:27 2010 +0400
+
+ Merged linux-2.6.32.12
+
+ Conflicts:
+
+ Makefile
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 455792e7712fac15bba7ca187c244f30c9d0e825
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Thu Apr 22 19:08:13 2010 +0400
+
+ ipv6: fix sysctl unregistering order
+
+ call addrconf_ifdown for loopback at last last ipv6 addr delete with how=0
+ to fix sysctl tables undergister ordering: all other interfaces attach their
+ sysctl paths to lo's, so unregister lo sysctl tables only at namespace destroy.
+
+ https://bugzilla.sw.ru/show_bug.cgi?id=473430
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fa86dba2b6213e770f102d1e688f6527d759aecf
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Mon Apr 5 15:43:18 2010 +0400
+
+ ve: fix ve task state percpu counters
+
+ Counters overlap detection for ve tasks in running/uninterraprible/iowait state
+ was broken due to type mismatch:
+ nr_{running/unin..e/iowait}_ve() uses _long_ for summing _int_ percpu counters.
+
+ As result, it broke ve loadavg calculation after first int overlap.
+
+ This patch expand all this percpu counters to unsigned long.
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=1396
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b484e22d951a02bd7ce25aaac396742766142790
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Mon Apr 5 15:41:30 2010 +0400
+
+ check flags on parsed structure
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=1464
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d8a86ef5a6c747ddb2896696269c0feef5d6fe1e
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Mon Apr 5 15:38:29 2010 +0400
+
+ CPT: check signal curr_target at restore
+
+ set signal curr_target to current if right task was not found.
+ fix oops after broken restore.
+
+ "curr_target" controls round robin signal target balance over process
+ threads, there no reasons to care about migration accuracy.
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=1467
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 61845b781db7d86180977270c73f6ea3885485f3
+Author: Pavel Emelyanov <xemul at openvzorg>
+Date: Mon Apr 5 15:35:58 2010 +0400
+
+ cpt: Don't mind the tsk->splice_pipe cache at cpt time
+
+ This field is just a cache for sendfile systemcall. It can be dropped
+ safely during migration - the first sendfile after restore will create
+ it back.
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=881
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fcd86ff706b309999e526dc4a37e9de88ec051fb
+Author: Peter Volkov <pva at gentoo.org>
+Date: Sun Mar 28 18:04:44 2010 +0400
+
+ Fix /proc/kmsg permissions with capabilities active
+
+ Whenever application sets cap_sys_admin=ep it is unable to read
+ /proc/kmsg with EPERM. This patch makes /proc/kmsg readable on HN.
+ http://bugzilla.openvz.org/show_bug.cgi?id=1360
+
+ Signed-off-by: Peter Volkov <pva at gentoo.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 8c6af363b89ebf94d3982d786dd21c64fb41528f
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Fri Mar 12 15:58:35 2010 +0300
+
+ quota: fix compilation 32-bit compat quota, remove size checks.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 26aeb82fc7ef70e83a4e0640fcb77c7b6f31d81b
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Fri Mar 12 15:58:34 2010 +0300
+
+ x86: fix compilation for 32-bit kernel
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 92875e3c49a15885ffbf40cbb0f2bd82cf423e43
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Mon Mar 1 13:03:59 2010 +0300
+
+ CPT: update image version to CPT_VERSION_27_3
+
+ sync cpt minor version with rhel5 branch
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f7dd75ba9debbd60b12eec93128a5742d6876d28
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Mon Mar 1 12:56:27 2010 +0300
+
+ CPT: ignore deleted linked chr blk fifo nodes
+
+ Ignore unlinked but referenced pipes, character and block device nodes.
+ Restore process will create it itself.
+
+ Bug #455855
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d7c68b191825cbbf6c7a40a75d38d09330b3abca
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Mon Mar 1 12:55:36 2010 +0300
+
+ CPT: Dump fake hardlinks on inotify watch's inodes
+
+ When a watch is attached to unlinked and closed file it
+ will not be restored, since the inode will not be in image.
+
+ To fix this the proposal is to create a fake link on the
+ inode in a temp dir and dump it.
+
+ Bug #454944
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7cf74bdd35d9559c671362cf8ce7016bb51aedaa
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Mon Mar 1 12:52:42 2010 +0300
+
+ CPT: Open hardlinked files only if is set 'hardlinked_on'
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 52c2eb6da3f09f44d652eb7156a793b5f50e8e08
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Mon Mar 1 12:52:09 2010 +0300
+
+ CPT: Add ioctl CPT_HARDLNK_ON for rst
+
+ vzctl have to call ioctl CPT_HARDLNK_ON to enable open hardlinked
+ files by kernel during restore.
+
+ This protection is needed to prevent mix new kernel + old vzctl (which
+ doesn't do cleaning). In other words, prevent creating/open files
+ which will not be removed, and therefore this issue can lead to
+ security problem.
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 72dfa44429c57c924ec4ac4d25d9ef6a343ddade
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Mon Mar 1 12:51:39 2010 +0300
+
+ CPT: Add CPT_DENTRY_HARDLINKED flag to cpt_file_image
+
+ This flag tells that file was hardlinked.
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 80d2ce353aa41820eca28c15abd6c1421d537736
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Mon Mar 1 12:49:48 2010 +0300
+
+ CPT: Create hard links to "deleted but referenced" during checkpoint
+
+ For "deleted but referenced" files, kernel creates hard link in
+ directory (that was set via CPT_LINKDIR_ADD) in format:
+
+ .cpt_hardlink.xxxxxxxx
+
+ x - digit, from 0 to 9
+
+ Note - this policy is used only when no other ways of dumping unlined
+ file helped.
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c24ab545f53ae07a2bfb3a6df100b56d49b57281
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Mon Mar 1 12:47:30 2010 +0300
+
+ CPT: Add ioctl CPT_LINKDIR_ADD for cpt
+
+ vzctl have to call ioctl CPT_LINKDIR_ADD to tell kernel where
+ create hardlinked files during checkpoint. Without this ioctl
+ kernel assumes that creating hardlinked files is off.
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d4ef97ff64464126b459ef8d9a0adbb95fb9dc09
+Author: Konstantin Khorenko <khorenko at openvz.org>
+Date: Sat Feb 27 16:58:11 2010 +0300
+
+ CPT: stop the migration if shm restoration failed
+
+ Bug #268163
+
+ Signed-off-by: Konstantin Khorenko <khorenko at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 089c01a6503ec6fc1ce66841d049bb65aa3c212c
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date: Sat Feb 27 16:58:11 2010 +0300
+
+ CPT: restart local_kernel_thread in case of -ERESTARTNOINTR
+
+ This is essential in case of migration to SLM node.
+
+ We can bump into situation when SLM refuses to fork during the
+ undumping process because it thinks that subgroup's resources
+ are to be redistributed. When this happens fork is delayed with
+ the -ERESTARTNOINTR error and the undumping process fails.
+
+ As Den (den@) noticed userspace is not intented to see the
+ -ERESTARTNOINTR error so we should handle this situation in the
+ kernel. According to the logic in the do_signal() function the
+ interrupted system call is immediately restarted in case of the
+ -ERESTARTNOINTR error.
+
+ We borrow this policy and apply it to the local_kernel_thread()
+ cpt helper function.
+
+ [ xemul: this is quite a rare case, so simple cond_resched()
+ is OK here all the more so the redistribution should
+ happen in a timer ]
+
+ Bug #116787
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 8551a850a459df659d7b14a66dfc8cf6da5065d6
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:11 2010 +0300
+
+ CPT: save/restore only classic task flags
+
+ Task flags were restored as they were saved in image. That is not correct as
+ flags are differs in 2.6.9, 2.6.16 and 2.6.18 kernels.
+ Actually we just need to save/restore only classic flags (PF_EXITING, PF_DEAD,
+ PF_FORKNOEXEC, PF_SUPERPRIV, PF_DUMPCORE and PF_SIGNALED).
+
+ The problems can occure because during migration from 2.6.9 to 2.6.18 kernel
+ flag PF_USED_MATH was not restored on tsk->flags correctly.
+
+ In 2.6.9 kernel there was field tsk->used_math for this purpose, in 2.6.18
+ kernel it is transformed into one of the tsk->flags.
+
+ And it was a bug, that after restore of fpu state and PF_USED_MATH flag, it
+ was cleared by "tsk->flags = ti->cpt_flags & ~PF_FROZEN", as old cpt_flags do
+ not contain PF_USED_MATH flag.
+
+ Bugs #115977 #115980 #115982
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 75f2abfa9f92fc7ac512a8ed9a34c2df0edd133d
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:11 2010 +0300
+
+ CPT: udp sockets restore fix
+
+ Some applications (like ntpd) set on udp sockets sk_reuse to 1. So any other
+ applications can bind to the same port. During restore we must skip this
+ check and restore and bind all sockets. On IPv6 we must also force DAD
+ (Duplicate Address Detection) procedure to be sure that IFA_F_TENTATIVE flag
+ will be cleared on IPv6 address and socket can be binded to it.
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=784
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit ba94d3fa2bb8636a7dceaa01fbf6fecdb8edacd5
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:11 2010 +0300
+
+ CPT: screw up udev bindmounts knot
+
+ Ubuntu's udev on boot does:
+
+ if ! mountpoint -q /dev; then
+ # initramfs didn't mount /dev, so we'll need to do that
+ mount -n --bind /dev /etc/udev
+ mount -n -t tmpfs -o mode=0755 udev /dev
+ mkdir -m 0700 -p /dev/.static/dev
+ mount -n --move /etc/udev /dev/.static/dev
+ fi
+
+ So, workaround is dumping "/dev" as bindmount's source.
+
+ Bug #120852
+ http://bugzilla.openvz.org/show_bug.cgi?id=1198
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit faa9a6dd94c072b38c8f963ce314fc1d6ff69ddf
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:10 2010 +0300
+
+ CPT: restore dead tasks proc files
+
+ If some process opened /proc/<pid><somefile> and process with <pid> will die
+ after some time then checkpoint fails with error:
+
+ Can not dump VE: Invalid argument
+ Error: d_path cannot be looked up /proc/125/cmdline
+
+ The fix is to catch this situation at the dump time, mark the image respectively
+ and restore a fake file on restore.
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=1047
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 977418edceabb4705f5012e562d4e5e04a19f138
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:10 2010 +0300
+
+ CPT: adjust vfsmounts restore order
+
+ Idea is: Dump parent before dump his children
+
+ This order is needed during checkpoint/restore:
+
+ mount /A /B -o bind
+ mount none /C -t tmpfs
+ mkdir /C/D
+ mount /B /C/D --move
+
+ After this, checkpoint (w/o this patch) will dump vfsmounts in order:
+
+ - vfsmount, bind to /A, mounted to /C/D
+ - vfsmount, mounted to /C (tmpfs)
+ and will restore in the same order, that causes error.
+
+ Bug #132951
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c42b985195cc8e7c2bbeb644e92d98a066aacc18
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:10 2010 +0300
+
+ CPT: dont cpt requiresdev fs
+
+ Don't allow chkpnt VE with mounted ext2/ext3, etc filesystems.
+
+ Allow checkpoint only for mounted nodev and "external" filesystem.
+
+ This check protects from error on restore:
+ CPT ERR: ffff810007113000,102 :-2 mounting /root/some_dir ext3 40000000
+
+ as do_one_mount() doesn't pass mntdev to mount().
+
+ [xemul: actually, the reason we don't support filesystems other than
+ virtual and tmpfs is because we simply can't (easily) get the
+ mount options for them to cpt and restore ]
+
+ Bug #131737
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a1d028ce2f1e87b5d64fb9fb7ed46740c1d73ed2
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:10 2010 +0300
+
+ CPT: Restore information about tcp listening sockets
+
+ Not all options are important. Only missed ipv6only can cause
+ error if other application want to listen the same port for IPv4 any address.
+
+ tp->XXX are inherited by children (noticed by Alexey Kuznetsov), so we need also
+ to restore these options.
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+
+ Comment from Alexey:
+ It [everything before] was not OK. The feature which are broken are important,
+ but not actually critical except for ipv6only.
+
+ F.e. DEFER_ACCEPT is broken -> but nobody will notice, it just will not
+ be deferred.
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 6364b5498e48bcb600472bb2fafb865206f35068
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:10 2010 +0300
+
+ CPT: put 'expect' after insert to the 'conntrack'
+
+ During restore conntrack, we need to put expect after allocating
+ ip_conntrack_expect and do something with one. Expect will be
+ freed or immediate (if nobody has this expect) or during cleanup/timer
+ hooks. Otherwise expect never will be freed.
+
+ Note: Approaches for kernels 2.6.18 and 2.6.9 are different. For example
+ see help() in "net/ipv4/netfilter/ip_conntrack_netbios_ns.c"
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b3d4348ca6322edad5a0a0d56b15d1eb8db718bd
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:09 2010 +0300
+
+ CPT: Fix ip_conntrack_ftp usage counter leak
+
+ Function ip_conntrack_helper_find_get() gets module counter. So put a
+ conntrack after putting in the hash and handling the conntrack's expect
+ list.
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 74e373eeb5e71b1c8253c04bee92250e5f6640cf
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:58:08 2010 +0300
+
+ CPT: dump and restore global snmp statistics
+
+ Per device exists for ipv6 only and is probably not used now, but
+ anyway - I'll do it later.
+
+ This patch adds new section CPT_SECT_SNMP_STATS that is populated
+ with CPT_OBJ_BITS set of objects - one for each type of statistics.
+ Objects have variable length. Stats are stored as a plain array of
+ __u32 numbers and thus the order in which stats types are stored is
+ implicitly hard-coded.
+
+ In case we do not have an IPV6 turned on all ipv6 stats are dumped
+ as CPT_OBJ_BITS/CPT_CONTENT_VOID and are skipped on restore.
+
+ When we restore from an image with more stats in any type, the not
+ supported ones are dropped with a warning.
+
+ Stats add 28K to image file.
+
+ Bug #113930
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 3b0f4b2e0503c157d596d7426ffcba01e30e930f
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:08 2010 +0300
+
+ CPT: Fix memory corruption if cpt_family is wrong.
+
+ During restore, if parent socket is AF_INET but cpt_family is
+ wrong (non initialized, see bug ##95113), then consider request as
+ related to AF_INET6 is not right and leads to memory corruption.
+
+ As there are a lot of buggy images, so we can't check only on values
+ AF_INET and AF_INET6.
+
+ Desicion:
+ - Check request on AF_INET6 first, and consider
+ request as AF_INET by default.
+ - Additionally checkup for AF_INET6 request (protect from
+ random value cpt_family == AF_INET6)
+
+ Bug #118912
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Acked-by: Denis V. Lunev <den at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 4a7ddd3db9a8030d514d120341bffd904ef57315
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:58:07 2010 +0300
+
+ CPT: fix restoring of /dev/null opened early by init
+
+ The problem is the following:
+ * init from fc9 starts and opens /dev/null for its stdin, stdout
+ and stderr
+ * udev starts and overmounts /dev with tmpfs
+
+ After this cpt cannot dump this ve, since one process holds a file,
+ that is inaccessible from ve root.
+
+ The proposed solution is the following:
+ 1. allow for /dev/null to be over-mounted
+ 2. restore init's file in two stages:
+ stage1: *before* we restored mounts restore init's 0, 1 and
+ 2 file descriptors, since most likely (in fc9 case - definitely)
+ init opened them before any other manipulations with fs;
+ stage2: restore the rest files later, at usual time to make
+ sore that e.g. sockets etc are restored properly.
+
+ Comment from Alexey:
+
+ ACK.
+
+ Though this is really ugly, it really produces 100% correct result
+ for this particular situation.
+
+ Bug #116261
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 937a5462e54d42a70ca0a66c7d3147d02ff40767
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:58:07 2010 +0300
+
+ CPT: lock sock before restoring its synwait queue
+
+ This new socket already has all the necessary TCP timers armed,
+ so tcp_keepalive_timer can fire during the rst_restore_synwait_queue
+ and (for the latter being lockless) can spoil the queue.
+
+ Bug #118912
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c5d30bd0194b026df7684e08f1b6e8e77d06305c
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:58:07 2010 +0300
+
+ CPT: sysctl randomize_va_space
+
+ implement checkpointing for virtualized sysctl kernel.randomize_va_space.
+
+ reuse existing unused pad1 field in cpt_veinfo_image.
+ 0 -> image without rnd_va_space virtualization (default value is used)
+ 1 -> rnd = 0
+ 2 -> rnd = 1
+ etc...
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit bbdcbaadf794e4a6c579cdac4c92ecc278d7606c
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:07 2010 +0300
+
+ CPT: add check for presence of module slm_dmprst if SLM is enabled
+
+ Add a check in "checks" for presence of module slm_dmprst if SLM is enabled.
+ Check will be performed for both source and destination nodes. Changes in
+ vzmigrate are not needed.
+
+ Bug #114312
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 04c139f6c20e5c80a19db1439f8cd2f7e2715b4e
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:07 2010 +0300
+
+ CPT: add diagnostics in case of iptables-restore fail
+
+ It is not clear right now what is wrong if iptables-restore fails.
+ Add some diagnostics in case of error.
+
+ Bug #95952
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f06677625bf53b6aad0a3742b5f01d1376715e1d
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:58:06 2010 +0300
+
+ CPT: Check that VE is not running on restore.
+
+ Bug #99679
+
+ Signed-off-by: Denis V. Lunev <den at parallels.com>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit dcda94043007a5d005e92c2df31ba63eeb1b8a70
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:06 2010 +0300
+
+ CPT: fix check in decode_tuple()
+
+ Tuple structure can be used as a mask and protonum can be 0xffff in 2.6.9
+ kernel. In 2.6.18 kernel all masks for protonum are 0xff and 0xffff will
+ be shrunken to 0xff.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 5a889e32263292bec6e2d4c2710ee41985f35716
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:06 2010 +0300
+
+ CPT: fix restore of conntrack expect timer
+
+ One more fix of restore conntrack procedure.
+ Following code:
+
+ if (ct->helper->timeout && !del_timer(&exp->timeout)) {
+ ...
+ }
+
+ can lead to oops, as exp->timeout is not initialized at this point.
+
+ Actually this optimization is not needed at all.
+ If expectation is dying, then we will let it die by its own death.
+
+ Also in ip_conntrack_expect_insert() there is an initialization of
+ exp->timeout. And we can't just do add_timer() after that (as in add_timer()
+ we have BUG_ON(timer_pending(timer))), we must do mod_timer() instead.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 19dce010faff8960e80b1778afa9f4ad07dd365f
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:06 2010 +0300
+
+ CPT: restore mark value on conntracks
+
+ Restore mark value in conntracks as it is needed for connmark module.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7ec63fdedf332db285f71d857cf395da8cf674d5
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:06 2010 +0300
+
+ CPT: convert conntrack tuple from 2.6.9 kernel image
+
+ Add conversion for conntrack tuple from 2.6.9 kernel image.
+ Check for correct value is added in decode_tuple().
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c34d6367f6cc5ee7f60fdee828c41de7b633a779
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:06 2010 +0300
+
+ CPT: convert conntrack image from 2.6.9 to 2.6.18
+
+ CPT structure in image file for conntracks is different in 2.6.9 and 2.6.18
+ kernels (array cpt_help_data was enlarged in the middle of the structure), so
+ conntracks from 2.6.9 kernel are restored incorrectly on 2.6.18 kernel and
+ lead to kernel oops.
+
+ A simple conversion from 2.6.9 to 2.6.18 is introduced to restore conntracks
+ correctly on 2.6.18 kernel.
+
+ Bug #113290
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 21644501b4651df2c7f271cae528f1996fc23a8d
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:05 2010 +0300
+
+ CPT: create kernel threads in VE0 context
+
+ In current implementation master process which performs checkpointing has
+ owner_env set to VE0 and exec_env set to VE. All auxiliary kernel threads
+ are created with exec_env set to VE and owner_env set to VE0, so after the
+ do_fork_pid() we have the follwing:
+
+ * new thread has owner_env == ve0, exec env == ve
+ * its pid belongs to ve (pid->veid != 0)
+
+ That is why if ve_enter() in thread fails, then we hit BUG_ON in
+ release_task -> detach_pid -> free_pid
+ sequence, since task owner env != pid's veid.
+
+ When enter succeeds the task's owner env becomes ve and this BUG_ON
+ is not triggered.
+
+ To solve this problem exec_env is switched to VE before kernel thread
+ creation and switched back after. Veid is passed to kernel via args. All
+ kernel threads are created with CLONE_VFORK to be sure that parent
+ process will not exit before doing exec() in thread.
+
+ Bug #97124
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 686bb3916a1247b46893078f8d87b8df6b1e305a
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:05 2010 +0300
+
+ CPT: restore rlimits correctly during 32bit-64bit migration
+
+ During 32bit to 64bit migration rlimits were restored incorrectly due to
+ different size of long on 32bit and 64bit archs. Now simple conversion is
+ introduced in case of 32bit-64bit migration. Infinity values are restored as
+ infinity values. Error is returned if value greater than RLIM_INFINITY32 is
+ found in dump during restore on 32bit arch.
+
+ Bug #111965
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c3e4a29b420b871a6543955728b1f8a5de75e955
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:05 2010 +0300
+
+ CPT: restore packet control block from kernels with and without IPv6
+
+ More generic mechanism for restoring packet control blocks. Unfortunately we
+ do not save length of control block in dump and we can only try to calculate
+ it during restore. This method is based on knowledge that the flags value in
+ TCP control block is not zero for all packets in queue.
+ Since this image version TCP control block will be saved in IPv6 form
+ regardless to IPv6 config option.
+ Restore of control block is splitted in 4 ways for any IPv6 and non-IPv6
+ kernel combinations.
+ Check is added to be sure that all control block were restored in the same
+ way. If it will be found that some control blocks were restored incorrectly,
+ then undump process will be terminated.
+
+ Bug #111370.
+
+ Merged 4 patches sent earlier:
+ 1. Increase image version.
+ 2. Save TCP control block regardless to IPv6 config option.
+ 3. Restore of control block is splitted in 4 ways...
+ 4. Add appropriate comment on TCP control block restore procedure.
+
+ [xemul:
+ Added do { } while (0) around macro body
+ Mention Alexey in comment about skb_cb->flags being non-zero
+ ]
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1f218bb8d606af3b95cd089b68b44800f91ac7d1
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:05 2010 +0300
+
+ CPT: add binfmt_misc fs in supported list
+
+ Just add binfmt_misc in list of supported file systems. With this small
+ quick fix migration will be allowed, but all binfmt_misc entries will
+ be dropped during migration.
+
+ This fix is only for the first time. Later will be implemented generic
+ mechanism for checkpointing/restore of external modules. And this quick
+ fix will be replaced with full support for binfmt_misc in CPT.
+
+ Bugs #100709, #101061
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 85da0ddab187bb9e6000ba6c98b7454095055799
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:05 2010 +0300
+
+ CPT: relax check for several bind mounts on the same mount point
+
+ Relax check for special bind mounts which mounted several times on the same
+ mount point. We need to check only dentry, mount check can be skipped in this
+ case.
+ We can't remove completely mount check as there are exist cases when we need
+ to check mnt too. E.g. /dev is mounted with NODEV over /dev and some file is
+ opened from underlying mount. If mount check is removed, then we will be able
+ to checkpoint such state, but we will not be able to restore it.
+
+ Correct sollution will be to dump/restore whole mount tree with overmounts.
+ But we can't implement this right now for number of reasons.
+
+ Bug #84310
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit bc4769bb4acc7547f4e537b23a093019e78652d7
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:04 2010 +0300
+
+ CPT: fix reopen dentries procedure
+
+ Dentries were not reopened correctly during checkpointing and restore.
+ Two bugs fixed:
+ 1. In case of huge files (more then 2Gb) dentry_open() returns -EFBIG if
+ O_LARGEFILE flag is not set. This flag should be used for temporary files
+ used during checkpointing and restore process.
+ Bug #99544
+ https://bugzilla.sw.ru/show_bug.cgi?id=99544
+
+ 2. In dump_content_regular() we have following code:
+ file = dentry_open(dget(file->f_dentry),
+ mntget(file->f_vfsmnt), O_RDONLY);
+ if (IS_ERR(file)) {
+ cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
+ eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(file));
+ return PTR_ERR(file);
+ }
+
+ Which results in kernel oops if dentry_open() returns error
+ (e.g. -EFBIG because of bug #99544)
+
+ Bug #99542
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 08b8f8ba476ec8e67b2eac74028fa5f4a3586c2f
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:04 2010 +0300
+
+ CPT: fix save/restore of open requests
+
+ Open requests were saved and restored sometimes incorrectly:
+
+ 1. Family of open request was not saved (commented out)
+ 2. Restore was broken, would crash because rsk_ops was cleared by memset.
+ 3. And finally, all the coded restoring open requests was skipped.
+
+ Tested with http_load.
+
+ Bug #95113
+ http://bugzilla.openvz.org/show_bug.cgi?id=784
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 0a6789976c6ff602e11a4f00123ae70b62738f21
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:04 2010 +0300
+
+ cpt: add lost dcache_lock protection around __d_path()
+
+ Protect __d_path() call with dcache_lock spinlock.
+ Protect other checks with env->op_sem semaphore.
+
+ Bug #98833
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 22c792c3605e5d0f916308678319e25eb18cf4a6
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:58:04 2010 +0300
+
+ cpt: fix restore of inotify on symlink
+
+ Inside VE file /etc/mtab is a symlink to /proc/mounts.
+ FreeNX server with KDE creates inotify on /etc/mtab file.
+ To restore such inotify we need to obtain dentry with path_lookup() and
+ restore inotify on it.
+
+ Bug #96464
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 66a6c3e51c35096b204b8866ee50afe0b1d13d59
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:58:04 2010 +0300
+
+ quota: compat layer for compat quota
+
+ This patch implements compatibility quotactls for old quota tools.
+
+ replace:
+ diff-fs-quotcompat-ia32emul-fix-20050921
+ diff-fs-quotcompat-comp-fix-20080710
+ diff-fs-quotcompat-xencomp-fix-20080806
+ diff-fs-quota-compat-proper-split-20081027
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1b04f79cb59f8cd8fb1ca26e19a6a4e8295a088f
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:58:03 2010 +0300
+
+ ve: Don't check for CAP_SETVEID - use more ... imagination
+
+ This patch:
+ The proposed check correctly detects the root in ve0.
+ However, we lose the ability to create containers with
+ some fancy tool, that has the CAP_SETVEID capability
+ *only*, but we don't have such.
+
+ The cap itself is declared to be obsoleted, but there's
+ no need in rewriting vzctl in a rush - things will still
+ work. If we'll want to manipulate audit caps from the
+ vzctl we'll make it via features.
+
+ Overall history:
+
+ Don't ban CAP_AUDIT_XXX capabilities in container to make the
+ dbus-daemon work.
+
+ After two (maybe tree) days of brain storm me and Den finally
+ gave birth to this solution. So...
+
+ First of all AUDIT will be banned in container. Since dbus refused
+ not to set audit caps we don't want it to mess with it in any case.
+
+ Next step is to note, that CAP_AUDIT_CONTROL coincides with the
+ CAP_VE_ADMIN, which is not that bad (besides, dbus doesn't try to
+ set this one up) and we leave one alone.
+
+ And finally - the CAP_AUDIT_WRITE, which coincides with the most
+ delicate one - CAP_SETVEID. The latter one is explicitly dropped
+ on container start and there's no way to set one (dbus tries this
+ and fails) back. Simple "don't clear it" solution is too dangerous.
+
+ TO handle *this* case we
+ 1. replace all checks to capable(CAP_SETVEID) to more complicated,
+ but still matching ve0's root only;
+ 2. don't ban the CAP_SETVEID (== CAP_AUDIT_WRITE == the_one_dbus_needs);
+ 3. remember, that this capability is present on ve startup and thus
+ we automatically have the CAP_AUDIT_WRITE required by dbus;
+ 4. carefully handle the case, when we enter container in do_env_create
+ and try to call fairsched system calls.
+
+ That's it. No fraud, just manual dexterity ;)
+
+ Bug #117448
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 153eca7d4bf56bd34e7c5957b1ff8ec331713a0b
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:58:03 2010 +0300
+
+ fairsched: Sanitize fairsched manipulations on ve startup
+
+ First of all we won't be able to call them after we fix
+ capability checks. Second of it is that taking the fairsched
+ mutex 4 times on startup is an overkill.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit e2fb9c79fd348a0603c4b881c4e1f179945b55b5
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:58:03 2010 +0300
+
+ ms: lutime lchmod syscalls
+
+ Add possibility to change owner/permissions on symbolic links
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 0b7042d24abe59baba84a78e37b95a88624f9308
+Author: Konstantin Khorenko <khorenko at openvz.org>
+Date: Sat Feb 27 16:58:02 2010 +0300
+
+ ve-net: permit changing of netdev's tx_queue_len from inside a CT
+
+ In particular it makes OpenVPN happy.
+
+ Bug #457318
+
+ Signed-off-by: Konstantin Khorenko <khorenko at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit eb3139203f525babc452556dd5071c73382050dd
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:58:02 2010 +0300
+
+ venet: Core support for external ip filtering
+
+ Allow VE emit packets with configured source IP address.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 98ec6de33c046e4f053c6b21152d3e07bead7804
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date: Sat Feb 27 16:58:01 2010 +0300
+
+ vzethdev: stat tx dropped acount
+
+ Veth get_stats() should return the number of tx_dropped packets
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 57a5848f98e677abefa203f9ad5f1b4bf3d28ace
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:58:01 2010 +0300
+
+ venet: add TSO support in venet and vzethdev
+
+ venet and veth support checksumming and scatter-gather features, but TSO
+ feature still wasn't added.
+
+ TSO increases bandwidth up to 50% or appreciably decreases CPU usage.
+
+ Approach is the same as for checksumming:
+ 1. TSO is off by default
+ 2. For veth: tso can be enabled/disabled in VE or VE0 for
+ pair {veth in VE, veth in VE0}
+ 3. For venet: tso can be enabled/disabled only in VE0 (for
+ all venet devices at once)
+
+ To use this feature just enable:
+ 1. Tx checksumming: ethtool -K DEVNAME tx on
+ 2. Scatter-gather: ethtool -K DEVNAME sg on
+ 3. TSO: ethtool -K DEVNAME ts on
+
+ Some performance info (tested via netperf):
+
+ 1. Traffic VE->VE0 (via venet), TCP STREAM test, message size 32K, socket size 256K:
+
+ TSO off 2300 10^6 bits/s
+ TSO on 5600 10^6 bits/s
+
+ Notes:
+ Admins need to set TSO on {venet,veth} only if physical ethernet device supports TSO.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f0fe2ba7ff9d91a2bfef1ec95fddbeada5be14d3
+Author: Vasily Averin <vvs at openvz.org>
+Date: Sat Feb 27 16:58:01 2010 +0300
+
+ ve: Kill not-yet-closed TCP sockets on VE stop herder
+
+ Idea proposed by Alexey Kuznetsov <alexey at openvz.org>
+ tcp_v4_kill_ve_sockets() can hangs in loop because NFS can hold some sockets in
+ host node rpciod/nfsdiod queues.
+ This patch resets such sockets if it's possible or delays its cleanup.
+
+ changes in 20090429: fixed wrong locking and another xemul@ notices
+ Bug #429296
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 5ad4c74a16b2f9812a1d79287bba724243454ecc
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:58:00 2010 +0300
+
+ bc: compat system calls for bc and fairsched
+
+ correct UB_MAXVALUE convertion and wire compat syscalls
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 20fd4dd54736b40a815ad07d34c4339d5c627f7e
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:58:00 2010 +0300
+
+ ub-dcache: sleep in dput
+
+ ub: dentry->dentry_bc.d_ub is unreliable after the sleep
+
+ d_kill can sleep inside. In this case dentry->dentry_bc.d_ub saved before
+ is unreliable as we can have dcache accounting on event during sleep. In this
+ case we'll have saved ub == NULL and OOPS/leak inside dcache_uncharge.
+
+ Another problem here is that we should decrement inuse count on the
+ dentry appropriately.
+
+ Bug #116095
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 76038f85b0523d4d2a48b20b5443a81dee3531e4
+Author: Cyrill Gorcunov <gorcunov at openvz.org>
+Date: Sat Feb 27 16:58:00 2010 +0300
+
+ ve-fs: implement "ve-xattr-policy" sysctl entry
+
+ "ve-xattr-policy" sysctl entry allows to control how to react on xattr
+ change from inside of a container.
+
+ There are three options allowed:
+
+ 0 - accept any xattr modifications (VE0 always and VE by default)
+ 1 - ignore
+ 2 - reject
+
+ Note that any other value assigned to "ve-xattr-policy"
+ leads to "accept" policy being applied without any warning.
+
+ The sysctl is placed at /proc/sys/fs/ve-xattr-policy on HW node.
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=1050
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 5cab8bf42b5da73a02d5288951aeeec8fd8b4716
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date: Sat Feb 27 16:57:59 2010 +0300
+
+ ve-kmsg: printk va copy add
+
+ Copy args variable in ve_printk() function
+
+ x64 can corrupt va_list after return from the called function.
+
+ Bug #440939
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b55fc66f70948758037a4639e8a63663792ec1f5
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:57:59 2010 +0300
+
+ ve-kmsg: printk lockdep fixup
+
+ printk: fix lockdep warnings if kernel compiled with CONFIG_LOCKDEP
+
+ vprintk() to VE causes:
+
+ =====================================
+ [ BUG: lock held at task exit time! ]
+ -------------------------------------
+ iptables/8203 is exiting with locks still held!
+ 1 lock held by iptables/8203:
+ #0: (sk_lock-AF_INET){--..}, at: [<ffffffff81213341>] ip_setsockopt+0x61/0xa0
+
+ stack backtrace:
+
+ Call Trace:
+ [<ffffffff8100b78a>] show_trace+0xca/0x3b0
+ [<ffffffff8100ba85>] dump_stack+0x15/0x20
+ [<ffffffff8105e469>] debug_check_no_locks_held+0x89/0xa0
+ [<ffffffff8103aa7e>] do_exit+0xe2e/0xe80
+ [<ffffffff8103aba0>] sys_exit_group+0x0/0x20
+ [<0000000000000001>]
+
+ Note: to reproduce this you can type in VE:
+ iptables -A INPUT -m tcp --dport 22 -j DROP
+
+ Signed-off-by: Vitaliy Gusev <vgusev at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 84ac295d2315ecf649e3910735d81e8d217396c3
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:58 2010 +0300
+
+ ve-proc: mangle mounts devname harder
+
+ mounts: show /dev/xxx devices near ve root mounts, rather than just xxx
+ Required for fixing autofs in rhel5 container:
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 454ad87b41380655cb31a85f682ddb8289e8e1f9
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:58 2010 +0300
+
+ ve-sysctl: randomize_va_space
+
+ virtualize sysctl kernel.randomize_va_space
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a44c3498bcf70065a85236b7daa77fe0320313f2
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:58 2010 +0300
+
+ ve-sysctl: add proc_dointvec_ve helper
+
+ add generic method for proc access to per ve int values.
+
+ extra1 field of ctl_table contains data field offset from ve_struct begin.
+ without CONFIG_VE use address from .data field.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 34e6684b531637ad4fd34502d32f6e3c74e2dac6
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:57 2010 +0300
+
+ ve: drop oom immunity at enter
+
+ At CT enter switch to default OOM adjustment level if task is OOM-immune.
+
+ This is a very bad idea to have OOM-unkillable tasks inside container,
+ because all forked tasks inherit this setting.
+
+ Proc interface for changing OOM adjustment (/proc/<pid>/oom_adj)
+ allready restricted in CT by diff-ve-oom-adjust-20070604.
+
+ On some systems sshd got OOM protection at start and not drop it after fork.
+ (example: ssh root at HN -> vzctl enter -> restart apache -- apache now OOM immune)
+ (example from xemul@: ssh root at HN vzctl start - VE is now OOM immune)
+
+ http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=480020
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit c7cf5c388378abf4d6e8e2e18c6c815eccab4fd7
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:57:57 2010 +0300
+
+ ms: ext4 use get host
+
+ Force ext4 page fault handlers use ->get_host callbacks
+ This is required not to use vzfs file in ->page_mkwrite callback.
+ Bug #454968
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a7de88181858ae8f9ec51cee11ae7f955e76430d
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:57:57 2010 +0300
+
+ nfs: disable nfs-v2
+
+ nfs: disable NFSv2 as it is broken
+ According to Alexey: "who is going to turn v2 on, having
+ a v3, which works better, nearby?"
+
+ Bug #114720
+
+ Signed-off-by: Denis V. Lunev <den at parallels.com>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7805f36534f20e530fb84e83a360993ec78f3bb6
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:57:56 2010 +0300
+
+ ve: vfs sillyrename
+
+ i_nlink count on private inodes after silly rename is 1. So, virtual inodes
+ gain i_nlink == 1 and remains in unused_list instead of to be cleaned.
+
+ Bug #114672 #112999
+
+ Signed-off-by: Denis V. Lunev <den at parallels.com>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d252a93b32d6d251fcc73863b75b91edaa801b95
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:57:56 2010 +0300
+
+ mm mmap zero length kludge
+
+ Return -EINVAL in case of zero length file to all applications except
+ rpm. For (legacy) rpm address will be returned.
+
+ Such hack is introduced just not to break compatibility with old
+ tools, sorry :(
+
+ Bug #74964
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 437d113149802cb91254246f29134e3ade55e411
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date: Sat Feb 27 16:57:56 2010 +0300
+
+ nfs: use file private macro
+
+ Minor fix to nfs, which allows to use vzfs over nfs mounts.
+
+ It survives fsstress test. I think normal vzfs tests can be started
+ asap to catch the pointes of possile misbehaviour.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 3c07eb700d9bbe7fd6b7dcf52103faf58ef4a035
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:55 2010 +0300
+
+ vzdq: cleanup fake qmblk destroy
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 8d622018ad2a3d025576578c0838c18ebfd3fdab
+Author: Konstantin Ozerkov <kozerkov at openvz.org>
+Date: Sat Feb 27 16:57:55 2010 +0300
+
+ vzdq: qmblk dq_sem to mutex
+
+ vzquota: replace quota master block semaphore with mutex
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 769b3bbe8d7859d168b42daa35720f12372e10db
+Author: Konstantin Ozerkov <kozerkov at openvz.org>
+Date: Sat Feb 27 16:57:54 2010 +0300
+
+ vzdq: vz_quota sem to mutex
+
+ vzquota: replace master lock semaphore with mutex
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 085883fb2366ae47c84fb18aa50f832e93ab56aa
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:54 2010 +0300
+
+ vzdq: vzaquota proc nlink
+
+ Produce correct nlink count for /proc/vz/vzaquota
+
+ Use count mounpoints accessible from VE as upper estimate for
+ count subdirectories inside /proc/vz/vzaquot.
+ Concept stolen from vzdq_aquotd_readdir.
+
+ Disable enumation in VE0 for performance reason (like in _readdir and _lookup)
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b9a8ce596cba9f5161769ca0408c71f8e6a059c7
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date: Sat Feb 27 16:57:54 2010 +0300
+
+ vzdq: swap noquota
+
+ swap_inode did not do anything for inodes not covered by vzquota,
+ which was wrong. F.e. mkdir, which creates inode with i_blocks!=0,
+ triggered message "detached inode not in creation".
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 20d11fba2ae882456b343ae78f466e27cc19d000
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date: Sat Feb 27 16:57:54 2010 +0300
+
+ vzdq: nfs support
+
+ It works differently and requires different interface.
+ Block accounting and quota check are separate now, we account
+ without checks and check for space in places, where an operation
+ could allocate more space.
+
+ Chunk-by-chunk:
+
+ 1. Added new operation - swap_inode. Normally, virtual inode
+ is created/accounted/checked simultaneously. It is impossible for NFS.
+ So, each operation creating a new inode starts from allocating
+ space in quota using a dummy inode. If the operation succeeds and real
+ inode is created, we swap quota accounting information.
+ TODO: optimize out dummy inode. All that we need is qlnk.
+
+ 2. DQUOT_CHECK_SPACE() to check that quota is not full.
+
+ 3. DQUOT_SYNC_BLOCKS() to resync i_blocks obtained from NFS server
+ with our accounting.
+
+ 4. is_nfs_root(). NFS does not have root inode. Instead each mount
+ has pointer to a disconnected inode. vzquota has to undestand this.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fd4f6b28860495f939f10abfaec8f255797a4fe8
+Author: Alexey Kuznetsov <alexey at openvz.org>
+Date: Sat Feb 27 16:57:53 2010 +0300
+
+ vzdq: fix oops is inode_drop_call
+
+ I suppose this happens when vzcache moves to template a file,
+ which was not under vzquota.
+ Bug #97782
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 71208971e69657168517194564e045781b054526
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:57:53 2010 +0300
+
+ simfs: statfs on root
+
+ Do not use s_root dentry of underlying for statfs
+ The real problem is that s_root on the NFS super block is a crap.
+ Unfortunately, the original dentry (which is asked to be statfs-ed)
+ is not available at this point. The only visible solution for this
+ is to use the dentry to which simfs is point to.
+
+ Signed-off-by: Denis V. Lunev <den at parallels.com>
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 11d902b2933c3292b8e1305e38e37c6419cb9cf2
+Author: Konstant Khorenko <khorenko at openvz.org>
+Date: Sat Feb 27 16:57:52 2010 +0300
+
+ virtinfo hook in daemonize
+
+ #427726
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 95a5273372efb164d0b3a4ab6eefca8b671d13e4
+Author: Andrey Mirkin <major at openvz.org>
+Date: Sat Feb 27 16:57:52 2010 +0300
+
+ virtinfo add cpttest
+
+ Add VIRTINFO_SCP_TEST event to virtinfo calls
+
+ This will be responsible for checking CPT features
+ during checkpoint/restore process.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit e2e5984d43c91b3aa674123af73849e9643bffb3
+Author: Konstantin Khorenko <khorenko at openvz.org>
+Date: Sat Feb 27 16:57:52 2010 +0300
+
+ ve-proc: fake sysrq trigger
+
+ Add dummy /proc/sysrq-trigger file inside a Container
+
+ Oracle 11g Release 1 RAC tries to open one and refuses to start on fail.
+ Writing to the file inside a CT leads to nothing, first 10 writes are logged.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit fc17c7e942ccbcf6909ef9fdb7c4f170acaf1d72
+Author: Vitaliy Gusev <vgusev at openvz.org>
+Date: Sat Feb 27 16:57:51 2010 +0300
+
+ ve-proc: add devices
+
+ Proc: add empty /proc/devices to CT
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 3cfd7ac2a553a88af0053a59ac9870f1ce82760f
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:57:51 2010 +0300
+
+ ve: decrease ve_struct size in case of huge nr_cpus
+
+ kstat_lat_pcpu_struct contains array of NR_CPUS elements.
+ Replace it with alloc_percpu data which helps to keep ve_struct
+ relatively small and prevents allocation fails of huge order.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 010370ec6b62618648c8b8882d3887e5e4073fc8
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date: Mon Apr 26 17:22:10 2010 +0400
+
+ percpu: Return ve0/ub0 percpu-s back
+
+ With the DEFINE_PER_CPU and init-s made in proper place we can
+ use them as alloc_percpu-ed ones.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 541c4b4da4f9c522593f3fd622e5d20fa6a6b294
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:51 2010 +0300
+
+ ve: fix fs umount at ct stop
+
+ Don't umount some mount multiple times on ct stop
+
+ umount_tree kill argument must be empty list,
+ otherwise it can detach each vfsmount multiple times and
+ produce negative d_mounted count on mountpoint dentry.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 543578c2947332cda5aea3b195c4d6a80a3d317b
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:50 2010 +0300
+
+ ve: ptys idr mem leak
+
+ Plug minor memory leak in idr_layer_cache slab on ve start-stop
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 965adae71aaa774796aeac8087806b77bbb0709f
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:50 2010 +0300
+
+ ve: tmpfs virtualize default size
+
+ set default size to half of physpages from meminfo
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 79c0a2ab51af39b665f7e8162c26c5573eca1872
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:57:50 2010 +0300
+
+ ve: meminfo dont use subub
+
+ Get parent UB instead of sub-group one to calculate usage
+
+ Signed-off-by: Denis V. Lunev <den at openvz.org>
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 223f044cc32146df3a5f6dc61aab2bd053277de8
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:50 2010 +0300
+
+ ve: move veinfo to vzmon
+
+ Since some people wish to run openvz w/o venet device, but
+ vzlist tool relies on /proc/vz/veinfo file presence, vzmon
+ module is a better place for this file.
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=394
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f267ef18a62f50bd5293a876e43b89467c8253f4
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:57:49 2010 +0300
+
+ ve: virtualize binfmt-misc
+
+ Nothing special. SUN jdk complains since can't use binfmt.
+ Not serious and java surely works fine w/o it, but just to
+ make it and its users happy let's virtualize binfmt_misc.
+
+ Signed-off-by: Pavel Emelianov <xemul at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1ff4faada1dabfdc4592e2824ce53a357373c83e
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:49 2010 +0300
+
+ bc: pb hash cookie
+
+ add random hash cookie to ub to use in pb_hash instead of non-random ub_uid
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 31f588463c8294df47ff6357829b286abd580782
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date: Sat Feb 27 16:57:49 2010 +0300
+
+ bc: uncharge files harder
+
+ There is a chance when we do not start uncharging because
+ ub_barrier_farnr() is not hit for UB_NUMFILE and ub_barrier_farsz()
+ is not hit for UB_KMEMSIZE (SLM for example set ubc barrier to a
+ huge value).
+
+ This fact can lead us to the situation when two tasks are able
+ to consume all of UB_NUMFILE and UB_KMEMSIZE despite they close
+ opened files.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 76cd7c1686940c2eeef94926e978b8893f9bb9e2
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:48 2010 +0300
+
+ ve: show proc swaps in ct
+
+ Fill the size/used values with the ones from the meminfo virtinfo notifier.
+
+ Show one fake swap partition (/dev/null) with the same size/used as in
+ /proc/meminfo. If --meminfo == none show overall swap statisctics from HN.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit bf8c54dbd1c7b09abdab952da58e1f2c8f439ea4
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:48 2010 +0300
+
+ ve: mangle swapinfo
+
+ Fill swap size/usage with data from UB_SWAPPAGES in meminfo notifier.
+ Don't show swap if the limit is unlimited (default state).
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 1c2b5b4b1cbaafa707cb56da94dd5099dbdcc73d
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:48 2010 +0300
+
+ cpt: bc resources array
+
+ restore only bc resources really presented in cpt image.
+
+ store UB_RESOURCES in cpt_beancounter_image while checkpointing.
+ (leave all new added resources with default limits filled at bc alloc)
+
+ change cpt_content of cpt_beancounter_image to CPT_CONTENT_ARRAY to detect
+ structure version without bumping cpt image version, because in old images
+ __cpt_pad field (reused for cpt_ub_resources) uninitilized.
+
+ add missed error handling inside rst_undump_ubc -- toss errors
+ from restore_one_bc to higher level.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 7b8bbb51527e58abadcd0eeb3e7103ba4048a57f
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:47 2010 +0300
+
+ bc-swap: add swappages bc resource
+
+ The limit value will be used as configured CT swap size to show
+ in /proc/swaps and /proc/meminfo. Default is UB_MAXVALUE
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit e7416bee163fb262076d9b7dfa93c0dbf304891d
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:57:47 2010 +0300
+
+ bc-rss: show how much page beancounters each bc has
+
+ Essentially, this is the per-UB rss value calculated
+ (unline physpages and privvmpages) w/o taking sharing
+ into account.
+
+ With this statistics (shown via /proc/bc/XXX/vmaux:rss)
+ we can evaluate the portion of pages, that are shared
+ accross beancounters (i.e. CTs) like this:
+
+ (\sum (bc.rss + bc.tmpfs_respages) - \sum (bc.physpages)) /
+ (\sum (bc.rss + bc.tmpfs_respages))
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b03577fcbea66508aca033f9c9c78bc060c02c24
+Author: Denis Lunev <den at openvz.org>
+Date: Sat Feb 27 16:57:47 2010 +0300
+
+ bc-ioacct: define page_io_mark in right place
+
+ fix compilation without CONFIG_BC_IO_ACCOUNTING
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 35fe6d0b31e36227f572550dff53154491760fb1
+Author: Marat Stanichenko <mstanichenko at openvz.org>
+Date: Sat Feb 27 16:57:47 2010 +0300
+
+ bc-ioprio: sys_ioprio_set lost unlock
+
+ sys_ioprio_set() may exit without releasing tasklist_lock. Fix it.
+
+ Acked-by: Pavel Emelyanov <xemul at openvz.org>
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 2cba7730c015206352563731d9f25cd027bd88f5
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:45 2010 +0300
+
+ ve-proc: fix root entry nlink
+
+ * Add entries from local tree, similar as in proc_getattr;
+ * Use per-ve process count for VE's root, rather than the
+ total number of processes in the system.
+
+ All of the above is an upper estimation, that is perfectly
+ fine with 'find' utlity.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit a2a22de6b8939570239c99973d3be7fb2eb4e70a
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:45 2010 +0300
+
+ ve-proc: fix nlink in getattr
+
+ Fix nlink correction in proc_getattr
+ and change it right in the stat buffer insted of inode nlink
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit f665309226859e081bcae5c0c7fd3a3bdd9ecfbc
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:45 2010 +0300
+
+ bc-proc: bc nlink count
+
+ Override getattr callback on /proc/bc and ubc entries to get correct nlink.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 85051b1c71ad37949ef448ff8ddb342b75d706b0
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:45 2010 +0300
+
+ bc-proc: add bc and sub-bc counters
+
+ Add counter of ubc, protected with ub_hash_lock.
+ Needed for correct proc n_link calculation.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d5ee7014d3f4995249cdadf3d00d1be778a3b10a
+Author: Pavel Emelianov <xemul at openvz.org>
+Date: Sat Feb 27 16:57:44 2010 +0300
+
+ bc-proc: fix sub-bc inode number
+
+ fix subbeancounter inode number calculations in /proc/bc
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit b93ef081a586e08e226273599bcf7800907c731b
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:44 2010 +0300
+
+ simfs: compilation without quota
+
+ fix simfs compilation if CONFIG_QUOTA=n
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 4fa1e482478bcde0552e9a97db1ddca620ebbe05
+Author: Konstantin Khlebnikov <khlebnikov at openvz.org>
+Date: Sat Feb 27 16:57:43 2010 +0300
+
+ sysrq: smp nmi show regs v2
+
+ Rework nmi show regs, make it clean and tollerable to nmi ipi losts.
+
+ Signed-off-by: Konstantin Khlebnikov <khlebnikov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit cab0d970b18692b61e62e2095392e63c5097bf29
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date: Mon Apr 26 15:09:43 2010 +0400
+
+ sysrq: revert nmi ipi callback
+
+ next patch will implement this in less intrusive manner,
+ and without deadlocks at nmi ipi loss
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
commit 6b5607eeec54fcef60c25fa7a72bc30f69446933
Author: Pavel Emelyanov <xemul at openvz.org>
Date: Fri Apr 16 12:34:01 2010 +0400
@@ -2799,14 +4590,14 @@
+library. If this is what you want to do, use the GNU Library General
+Public License instead of this License.
diff --git a/Makefile b/Makefile
-index 78611d9..6c58263 100644
+index 573578f..12ba193 100644
--- a/Makefile
+++ b/Makefile
@@ -2,6 +2,7 @@ VERSION = 2
PATCHLEVEL = 6
SUBLEVEL = 32
EXTRAVERSION =
-+VZVERSION = atkov
++VZVERSION = avdeyev
NAME = Man-Eating Seals of Antiquity
# *DOCUMENTATION*
@@ -2849,7 +4640,7 @@
+
+source "kernel/bc/Kconfig"
diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
-index 5294d84..cd218a8 100644
+index 5294d84..a920d42 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -617,7 +617,7 @@ ia32_sys_call_table:
@@ -2870,6 +4661,32 @@
.quad quiet_ni_syscall /* query_module */
.quad sys_poll
.quad compat_sys_nfsservctl
+@@ -841,4 +841,25 @@ ia32_sys_call_table:
+ .quad compat_sys_pwritev
+ .quad compat_sys_rt_tgsigqueueinfo /* 335 */
+ .quad sys_perf_event_open
++ .rept 500-(.-ia32_sys_call_table)/8
++ .quad sys_ni_syscall
++ .endr
++ .quad sys_fairsched_mknod /* 500 */
++ .quad sys_fairsched_rmnod
++ .quad sys_fairsched_chwt
++ .quad sys_fairsched_mvpr
++ .quad sys_fairsched_rate
++ .quad sys_fairsched_vcpus /* 505 */
++ .quad sys_ni_syscall
++ .quad sys_ni_syscall
++ .quad sys_ni_syscall
++ .quad sys_ni_syscall
++ .quad sys_getluid /* 510 */
++ .quad sys_setluid
++ .quad compat_sys_setublimit
++ .quad compat_sys_ubstat
++ .quad sys_ni_syscall
++ .quad sys_ni_syscall /* 515 */
++ .quad sys_lchmod
++ .quad compat_sys_lutime
+ ia32_syscall_end:
diff --git a/arch/x86/ia32/sys_ia32.c b/arch/x86/ia32/sys_ia32.c
index 016218c..f368a9a 100644
--- a/arch/x86/ia32/sys_ia32.c
@@ -2922,21 +4739,6 @@
#define compat_arch_setup_additional_pages syscall32_setup_pages
extern unsigned long arch_randomize_brk(struct mm_struct *mm);
-diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h
-index 139d4c1..5fd7d01 100644
---- a/arch/x86/include/asm/nmi.h
-+++ b/arch/x86/include/asm/nmi.h
-@@ -25,6 +25,10 @@ extern void release_perfctr_nmi(unsigned int);
- extern int reserve_evntsel_nmi(unsigned int);
- extern void release_evntsel_nmi(unsigned int);
-
-+typedef int (*nmi_callback_t)(struct pt_regs *regs, int cpu);
-+void set_nmi_ipi_callback(nmi_callback_t callback);
-+void unset_nmi_ipi_callback(void);
-+
- extern void setup_apic_nmi_watchdog(void *);
- extern void stop_apic_nmi_watchdog(void *);
- extern void disable_timer_nmi_watchdog(void);
diff --git a/arch/x86/include/asm/pgalloc.h b/arch/x86/include/asm/pgalloc.h
index 271de94..e255a04 100644
--- a/arch/x86/include/asm/pgalloc.h
@@ -3017,10 +4819,10 @@
#endif
rdtscll(ret);
diff --git a/arch/x86/include/asm/unistd_32.h b/arch/x86/include/asm/unistd_32.h
-index 6fb3c20..e7a2442 100644
+index 6fb3c20..c870519 100644
--- a/arch/x86/include/asm/unistd_32.h
+++ b/arch/x86/include/asm/unistd_32.h
-@@ -342,10 +342,20 @@
+@@ -342,10 +342,22 @@
#define __NR_pwritev 334
#define __NR_rt_tgsigqueueinfo 335
#define __NR_perf_event_open 336
@@ -3034,6 +4836,8 @@
+#define __NR_setluid 511
+#define __NR_setublimit 512
+#define __NR_ubstat 513
++#define __NR_lchmod 516
++#define __NR_lutime 517
#ifdef __KERNEL__
@@ -3043,10 +4847,10 @@
#define __ARCH_WANT_IPC_PARSE_VERSION
#define __ARCH_WANT_OLD_READDIR
diff --git a/arch/x86/include/asm/unistd_64.h b/arch/x86/include/asm/unistd_64.h
-index 8d3ad0a..dc19a9c 100644
+index 8d3ad0a..15bc00e 100644
--- a/arch/x86/include/asm/unistd_64.h
+++ b/arch/x86/include/asm/unistd_64.h
-@@ -661,6 +661,26 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
+@@ -661,6 +661,30 @@ __SYSCALL(__NR_pwritev, sys_pwritev)
__SYSCALL(__NR_rt_tgsigqueueinfo, sys_rt_tgsigqueueinfo)
#define __NR_perf_event_open 298
__SYSCALL(__NR_perf_event_open, sys_perf_event_open)
@@ -3070,10 +4874,14 @@
+__SYSCALL(__NR_fairsched_mvpr, sys_fairsched_mvpr)
+#define __NR_fairsched_rate 508
+__SYSCALL(__NR_fairsched_rate, sys_fairsched_rate)
++#define __NR_lchmod 509
++__SYSCALL(__NR_lchmod, sys_lchmod)
++#define __NR_lutime 510
++__SYSCALL(__NR_lutime, sys_lutime)
#ifndef __NO_STUBS
#define __ARCH_WANT_OLD_READDIR
-@@ -685,6 +705,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
+@@ -685,6 +709,7 @@ __SYSCALL(__NR_perf_event_open, sys_perf_event_open)
#define __ARCH_WANT_SYS_RT_SIGSUSPEND
#define __ARCH_WANT_SYS_TIME
#define __ARCH_WANT_COMPAT_SYS_TIME
@@ -3094,32 +4902,10 @@
* Given a pointer to the vDSO image, find the pointer to VDSO32_name
* as that symbol is defined in the vDSO sources or linker script.
diff --git a/arch/x86/kernel/apic/nmi.c b/arch/x86/kernel/apic/nmi.c
-index 7ff61d6..e5c7f78 100644
+index 7ff61d6..ee58297 100644
--- a/arch/x86/kernel/apic/nmi.c
+++ b/arch/x86/kernel/apic/nmi.c
-@@ -386,6 +386,21 @@ void touch_nmi_watchdog(void)
- }
- EXPORT_SYMBOL(touch_nmi_watchdog);
-
-+void smp_show_regs(struct pt_regs *regs, void *info)
-+{
-+ static DEFINE_SPINLOCK(show_regs_lock);
-+
-+ if (regs == NULL)
-+ return;
-+
-+ spin_lock(&show_regs_lock);
-+ bust_spinlocks(1);
-+ printk("----------- IPI show regs -----------");
-+ show_regs(regs);
-+ bust_spinlocks(0);
-+ spin_unlock(&show_regs_lock);
-+}
-+
- notrace __kprobes int
- nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
- {
-@@ -435,10 +450,10 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
+@@ -435,10 +435,10 @@ nmi_watchdog_tick(struct pt_regs *regs, unsigned reason)
if (!touched && __get_cpu_var(last_irq_sum) == sum) {
/*
* Ayiee, looks like this CPU is stuck ...
@@ -3132,40 +4918,30 @@
/*
* die_nmi will return ONLY if NOTIFY_STOP happens..
*/
+diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c
+index bb62b3e..ce8a3f5 100644
+--- a/arch/x86/kernel/cpu/transmeta.c
++++ b/arch/x86/kernel/cpu/transmeta.c
+@@ -1,6 +1,7 @@
+ #include <linux/kernel.h>
+ #include <linux/mm.h>
+ #include <linux/init.h>
++#include <linux/sched.h>
+ #include <asm/processor.h>
+ #include <asm/msr.h>
+ #include "cpu.h"
diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c
-index 2d8a371..155d6c6 100644
+index 2d8a371..0d1ce00 100644
--- a/arch/x86/kernel/dumpstack.c
+++ b/arch/x86/kernel/dumpstack.c
-@@ -303,6 +303,21 @@ void die(const char *str, struct pt_regs *regs, long err)
- oops_end(flags, regs, sig);
- }
-
-+/*
-+ * Voyager doesn't implement these
-+ */
-+void __attribute__((weak)) smp_show_regs(struct pt_regs *regs, void *info)
-+{
-+}
-+
-+#ifdef CONFIG_SMP
-+int __attribute__((weak))
-+smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
-+{
-+ return 0;
-+}
-+#endif
-+
- void notrace __kprobes
- die_nmi(char *str, struct pt_regs *regs, int do_panic)
- {
-@@ -319,6 +334,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
- printk(KERN_EMERG "%s", str);
+@@ -320,6 +320,7 @@ die_nmi(char *str, struct pt_regs *regs, int do_panic)
printk(" on CPU%d, ip %08lx, registers:\n",
smp_processor_id(), regs->ip);
-+ smp_nmi_call_function(smp_show_regs, NULL, 1);
show_registers(regs);
++ nmi_show_regs(regs, 1);
oops_end(flags, regs, 0);
if (do_panic || panic_on_oops)
+ panic("Non maskable interrupt");
diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c
index f7dd2a7..24c02de 100644
--- a/arch/x86/kernel/dumpstack_32.c
@@ -3506,107 +5282,21 @@
if (syscall_get_nr(current, regs) >= 0) {
/* Restart the system call - no handlers present */
diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c
-index ec1de97..b74f73d 100644
+index ec1de97..29df6fd 100644
--- a/arch/x86/kernel/smp.c
+++ b/arch/x86/kernel/smp.c
-@@ -22,6 +22,7 @@
- #include <linux/interrupt.h>
- #include <linux/cpu.h>
-
-+#include <linux/nmi.h>
- #include <asm/mtrr.h>
- #include <asm/tlbflush.h>
- #include <asm/mmu_context.h>
-@@ -146,6 +147,89 @@ void native_send_call_func_ipi(const struct cpumask *mask)
- free_cpumask_var(allbutself);
+@@ -221,6 +221,11 @@ void smp_call_function_single_interrupt(struct pt_regs *regs)
+ irq_exit();
}
-+static DEFINE_SPINLOCK(nmi_call_lock);
-+static struct nmi_call_data_struct {
-+ smp_nmi_function func;
-+ void *info;
-+ atomic_t started;
-+ atomic_t finished;
-+ cpumask_t cpus_called;
-+ int wait;
-+} *nmi_call_data;
-+
-+static int smp_nmi_callback(struct pt_regs *regs, int cpu)
-+{
-+ smp_nmi_function func;
-+ void *info;
-+ int wait;
-+
-+ func = nmi_call_data->func;
-+ info = nmi_call_data->info;
-+ wait = nmi_call_data->wait;
-+ ack_APIC_irq();
-+ /* prevent from calling func() multiple times */
-+ if (cpu_test_and_set(cpu, nmi_call_data->cpus_called))
-+ return 0;
-+ /*
-+ * notify initiating CPU that I've grabbed the data and am
-+ * about to execute the function
-+ */
-+ mb();
-+ atomic_inc(&nmi_call_data->started);
-+ /* at this point the nmi_call_data structure is out of scope */
-+ irq_enter();
-+ func(regs, info);
-+ irq_exit();
-+ if (wait)
-+ atomic_inc(&nmi_call_data->finished);
-+
-+ return 1;
-+}
-+
-+/*
-+ * This function tries to call func(regs, info) on each cpu.
-+ * Func must be fast and non-blocking.
-+ * May be called with disabled interrupts and from any context.
-+ */
-+int smp_nmi_call_function(smp_nmi_function func, void *info, int wait)
++void send_nmi_ipi_allbutself(void)
+{
-+ struct nmi_call_data_struct data;
-+ int cpus;
-+
-+ cpus = num_online_cpus() - 1;
-+ if (!cpus)
-+ return 0;
-+
-+ data.func = func;
-+ data.info = info;
-+ data.wait = wait;
-+ atomic_set(&data.started, 0);
-+ atomic_set(&data.finished, 0);
-+ cpus_clear(data.cpus_called);
-+ /* prevent this cpu from calling func if NMI happens */
-+ cpu_set(smp_processor_id(), data.cpus_called);
-+
-+ if (!spin_trylock(&nmi_call_lock))
-+ return -1;
-+
-+ nmi_call_data = &data;
-+ set_nmi_ipi_callback(smp_nmi_callback);
-+ mb();
-+
-+ /* Send a message to all other CPUs and wait for them to respond */
-+ apic->send_IPI_allbutself(APIC_DM_NMI);
-+ while (atomic_read(&data.started) != cpus)
-+ barrier();
-+
-+ unset_nmi_ipi_callback();
-+ if (wait)
-+ while (atomic_read(&data.finished) != cpus)
-+ barrier();
-+ spin_unlock(&nmi_call_lock);
-+
-+ return 0;
++ apic->send_IPI_allbutself(NMI_VECTOR);
+}
+
- /*
- * this function calls the 'stop' function on all other CPUs in the system.
- */
+ struct smp_ops smp_ops = {
+ .smp_prepare_boot_cpu = native_smp_prepare_boot_cpu,
+ .smp_prepare_cpus = native_smp_prepare_cpus,
diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c
index 28e963d..54a0ecf 100644
--- a/arch/x86/kernel/smpboot.c
@@ -3625,10 +5315,10 @@
start_ip = setup_trampoline();
diff --git a/arch/x86/kernel/syscall_table_32.S b/arch/x86/kernel/syscall_table_32.S
-index 76d70a4..0defa11 100644
+index 76d70a4..477e261 100644
--- a/arch/x86/kernel/syscall_table_32.S
+++ b/arch/x86/kernel/syscall_table_32.S
-@@ -336,3 +336,22 @@ ENTRY(sys_call_table)
+@@ -336,3 +336,24 @@ ENTRY(sys_call_table)
.long sys_pwritev
.long sys_rt_tgsigqueueinfo /* 335 */
.long sys_perf_event_open
@@ -3650,52 +5340,23 @@
+ .long sys_setublimit
+ .long sys_ubstat
+ .long sys_ni_syscall
-+ .long sys_ni_syscall
++ .long sys_ni_syscall /* 515 */
++ .long sys_lchmod
++ .long sys_lutime
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
-index 7e37dce..e1ceccb 100644
+index 7e37dce..d1fd061 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
-@@ -385,6 +385,13 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs)
- printk(KERN_EMERG "Dazed and confused, but trying to continue\n");
- }
-
-+static int dummy_nmi_callback(struct pt_regs *regs, int cpu)
-+{
-+ return 0;
-+}
-+
-+static nmi_callback_t nmi_ipi_callback = dummy_nmi_callback;
-+
- static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
- {
- unsigned char reason = 0;
-@@ -439,12 +446,24 @@ do_nmi(struct pt_regs *regs, long error_code)
-
- inc_irq_stat(__nmi_count);
-
-- if (!ignore_nmis)
-- default_do_nmi(regs);
-+ if (!ignore_nmis) {
-+ if (!nmi_ipi_callback(regs, smp_processor_id()))
-+ default_do_nmi(regs);
-+ }
-
- nmi_exit();
- }
-
-+void set_nmi_ipi_callback(nmi_callback_t callback)
-+{
-+ nmi_ipi_callback = callback;
-+}
-+
-+void unset_nmi_ipi_callback(void)
-+{
-+ nmi_ipi_callback = dummy_nmi_callback;
-+}
-+
- void stop_nmi(void)
- {
- acpi_nmi_disable();
+@@ -405,7 +405,8 @@ static notrace __kprobes void default_do_nmi(struct pt_regs *regs)
+ * Ok, so this is none of the documented NMI sources,
+ * so it must be the NMI watchdog.
+ */
+- if (nmi_watchdog_tick(regs, reason))
++ if (nmi_watchdog_tick(regs, reason) +
++ do_nmi_show_regs(regs, cpu))
+ return;
+ if (!do_nmi_callback(regs, cpu))
+ unknown_nmi_error(reason, regs);
diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c
index f379309..6c44e77 100644
--- a/arch/x86/kernel/tsc_sync.c
@@ -4762,34 +6423,32 @@
#else
diff --git a/drivers/char/sysrq.c b/drivers/char/sysrq.c
-index 44203ff..2f26e57 100644
+index 44203ff..4288c77 100644
--- a/drivers/char/sysrq.c
+++ b/drivers/char/sysrq.c
-@@ -37,6 +37,8 @@
+@@ -37,7 +37,10 @@
#include <linux/vt_kern.h>
#include <linux/workqueue.h>
#include <linux/hrtimer.h>
+#include <linux/kallsyms.h>
+#include <linux/slab.h>
#include <linux/oom.h>
++#include <linux/nmi.h>
#include <asm/ptrace.h>
-@@ -250,8 +252,14 @@ static struct sysrq_key_op sysrq_showallcpus_op = {
+ #include <asm/irq_regs.h>
+@@ -250,8 +253,8 @@ static struct sysrq_key_op sysrq_showallcpus_op = {
static void sysrq_handle_showregs(int key, struct tty_struct *tty)
{
struct pt_regs *regs = get_irq_regs();
+- if (regs)
+- show_regs(regs);
+
-+ bust_spinlocks(1);
- if (regs)
- show_regs(regs);
-+ bust_spinlocks(0);
-+#if defined(__i386__) || defined(__x86_64__)
-+ smp_nmi_call_function(smp_show_regs, NULL, 1);
-+#endif
++ nmi_show_regs(regs, 0);
perf_event_print_debug();
}
static struct sysrq_key_op sysrq_showregs_op = {
-@@ -303,6 +311,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
+@@ -303,6 +306,7 @@ static struct sysrq_key_op sysrq_ftrace_dump_op = {
static void sysrq_handle_showmem(int key, struct tty_struct *tty)
{
show_mem();
@@ -4797,7 +6456,7 @@
}
static struct sysrq_key_op sysrq_showmem_op = {
.handler = sysrq_handle_showmem,
-@@ -318,7 +327,7 @@ static void send_sig_all(int sig)
+@@ -318,7 +322,7 @@ static void send_sig_all(int sig)
{
struct task_struct *p;
@@ -4806,7 +6465,7 @@
if (p->mm && !is_global_init(p))
/* Not swapper, init nor kernel thread */
force_sig(sig, p);
-@@ -394,7 +403,267 @@ static struct sysrq_key_op sysrq_unrt_op = {
+@@ -394,7 +398,267 @@ static struct sysrq_key_op sysrq_unrt_op = {
/* Key Operations table and lock */
static DEFINE_SPINLOCK(sysrq_key_table_lock);
@@ -5075,7 +6734,7 @@
&sysrq_loglevel_op, /* 0 */
&sysrq_loglevel_op, /* 1 */
&sysrq_loglevel_op, /* 2 */
-@@ -417,7 +686,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
+@@ -417,7 +681,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
&sysrq_term_op, /* e */
&sysrq_moom_op, /* f */
/* g: May be registered for the kernel debugger */
@@ -5087,7 +6746,7 @@
NULL, /* h - reserved for help */
&sysrq_kill_op, /* i */
#ifdef CONFIG_BLOCK
-@@ -449,8 +722,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
+@@ -449,8 +717,11 @@ static struct sysrq_key_op *sysrq_key_table[36] = {
/* y: May be registered on sparc64 for global register dump */
NULL, /* y */
&sysrq_ftrace_dump_op, /* z */
@@ -5099,7 +6758,7 @@
/* key2index calculation, -1 on invalid index */
static int sysrq_key_table_key2index(int key)
{
-@@ -460,6 +736,10 @@ static int sysrq_key_table_key2index(int key)
+@@ -460,6 +731,10 @@ static int sysrq_key_table_key2index(int key)
retval = key - '0';
else if ((key >= 'a') && (key <= 'z'))
retval = key + 10 - 'a';
@@ -5110,7 +6769,7 @@
else
retval = -1;
return retval;
-@@ -470,21 +750,21 @@ static int sysrq_key_table_key2index(int key)
+@@ -470,21 +745,21 @@ static int sysrq_key_table_key2index(int key)
*/
struct sysrq_key_op *__sysrq_get_key_op(int key)
{
@@ -5139,7 +6798,7 @@
}
/*
-@@ -507,25 +787,25 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
+@@ -507,25 +782,25 @@ void __handle_sysrq(int key, struct tty_struct *tty, int check_mask)
*/
orig_log_level = console_loglevel;
console_loglevel = 7;
@@ -5171,7 +6830,7 @@
if (sysrq_key_table[i]) {
int j;
-@@ -555,7 +835,7 @@ void handle_sysrq(int key, struct tty_struct *tty)
+@@ -555,7 +830,7 @@ void handle_sysrq(int key, struct tty_struct *tty)
EXPORT_SYMBOL(handle_sysrq);
static int __sysrq_swap_key_ops(int key, struct sysrq_key_op *insert_op_p,
@@ -5180,9 +6839,13 @@
{
int retval;
-@@ -592,11 +872,16 @@ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
+@@ -591,12 +866,29 @@ EXPORT_SYMBOL(unregister_sysrq_key);
+ static ssize_t write_sysrq_trigger(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
++ struct ve_struct *cur = get_exec_env();
++ static int pnum = 10;
++
if (count) {
- char c;
+ int i, cnt;
@@ -5195,13 +6858,31 @@
- __handle_sysrq(c, NULL, 0);
+
+
-+ for (i = 0; i < cnt && c[i] != '\n'; i++)
++ for (i = 0; i < cnt && c[i] != '\n'; i++) {
++ if (!ve_is_super(cur)) {
++ if (!pnum)
++ continue;
++ printk("SysRq: CT#%u sent '%c' magic key.\n",
++ cur->veid, c[i]);
++ pnum--;
++ continue;
++ }
+ __handle_sysrq(c[i], NULL, 0);
++ }
}
return count;
}
+@@ -607,7 +899,7 @@ static const struct file_operations proc_sysrq_trigger_operations = {
+
+ static int __init sysrq_init(void)
+ {
+- proc_create("sysrq-trigger", S_IWUSR, NULL, &proc_sysrq_trigger_operations);
++ proc_create("sysrq-trigger", S_IWUSR, &glob_proc_root, &proc_sysrq_trigger_operations);
+ return 0;
+ }
+ module_init(sysrq_init);
diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c
-index 05cab2c..f973a9f 100644
+index 53ffcfc..2571f59 100644
--- a/drivers/char/tty_io.c
+++ b/drivers/char/tty_io.c
@@ -96,6 +96,8 @@
@@ -5287,7 +6968,7 @@
{
struct tty_struct *tty;
int retval;
-@@ -1705,7 +1729,7 @@ void tty_release_dev(struct file *filp)
+@@ -1707,7 +1731,7 @@ void tty_release_dev(struct file *filp)
static int __tty_open(struct inode *inode, struct file *filp)
{
@@ -5296,7 +6977,7 @@
int noctty, retval;
struct tty_driver *driver;
int index;
-@@ -1729,6 +1753,7 @@ retry_open:
+@@ -1731,6 +1755,7 @@ retry_open:
}
driver = tty_driver_kref_get(tty->driver);
index = tty->index;
@@ -5304,7 +6985,7 @@
filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */
/* noctty = 1; */
/* FIXME: Should we take a driver reference ? */
-@@ -1738,6 +1763,12 @@ retry_open:
+@@ -1740,6 +1765,12 @@ retry_open:
#ifdef CONFIG_VT
if (device == MKDEV(TTY_MAJOR, 0)) {
extern struct tty_driver *console_driver;
@@ -5317,7 +6998,7 @@
driver = tty_driver_kref_get(console_driver);
index = fg_console;
noctty = 1;
-@@ -1746,6 +1777,12 @@ retry_open:
+@@ -1748,6 +1779,12 @@ retry_open:
#endif
if (device == MKDEV(TTYAUX_MAJOR, 1)) {
struct tty_driver *console_driver = console_device(&index);
@@ -5330,7 +7011,7 @@
if (console_driver) {
driver = tty_driver_kref_get(console_driver);
if (driver) {
-@@ -1780,7 +1817,7 @@ got_driver:
+@@ -1782,7 +1819,7 @@ got_driver:
if (retval)
tty = ERR_PTR(retval);
} else
@@ -5339,7 +7020,7 @@
mutex_unlock(&tty_mutex);
tty_driver_kref_put(driver);
-@@ -2076,6 +2113,8 @@ static int tioccons(struct file *file)
+@@ -2078,6 +2115,8 @@ static int tioccons(struct file *file)
{
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -5348,7 +7029,7 @@
if (file->f_op->write == redirected_tty_write) {
struct file *f;
spin_lock(&redirect_lock);
-@@ -2656,7 +2695,7 @@ void __do_SAK(struct tty_struct *tty)
+@@ -2658,7 +2697,7 @@ void __do_SAK(struct tty_struct *tty)
/* Now kill any processes that happen to have the
* tty open.
*/
@@ -5357,7 +7038,7 @@
if (p->signal->tty == tty) {
printk(KERN_NOTICE "SAK: killed process %d"
" (%s): task_session(p)==tty->session\n",
-@@ -2688,7 +2727,7 @@ void __do_SAK(struct tty_struct *tty)
+@@ -2690,7 +2729,7 @@ void __do_SAK(struct tty_struct *tty)
spin_unlock(&p->files->file_lock);
}
task_unlock(p);
@@ -5366,7 +7047,7 @@
read_unlock(&tasklist_lock);
#endif
}
-@@ -2755,6 +2794,7 @@ void initialize_tty_struct(struct tty_struct *tty,
+@@ -2757,6 +2796,7 @@ void initialize_tty_struct(struct tty_struct *tty,
tty->ops = driver->ops;
tty->index = idx;
tty_line_name(driver, idx, tty->name);
@@ -5374,7 +7055,7 @@
}
/**
-@@ -2847,6 +2887,7 @@ struct tty_driver *alloc_tty_driver(int lines)
+@@ -2849,6 +2889,7 @@ struct tty_driver *alloc_tty_driver(int lines)
driver->magic = TTY_DRIVER_MAGIC;
driver->num = lines;
/* later we'll move allocation of tables here */
@@ -5382,7 +7063,7 @@
}
return driver;
}
-@@ -2881,6 +2922,7 @@ static void destruct_tty_driver(struct kref *kref)
+@@ -2883,6 +2924,7 @@ static void destruct_tty_driver(struct kref *kref)
kfree(p);
cdev_del(&driver->cdev);
}
@@ -5390,7 +7071,7 @@
kfree(driver);
}
-@@ -2955,6 +2997,7 @@ int tty_register_driver(struct tty_driver *driver)
+@@ -2957,6 +2999,7 @@ int tty_register_driver(struct tty_driver *driver)
}
mutex_lock(&tty_mutex);
@@ -5398,7 +7079,7 @@
list_add(&driver->tty_drivers, &tty_drivers);
mutex_unlock(&tty_mutex);
-@@ -3128,3 +3171,43 @@ static int __init tty_init(void)
+@@ -3130,3 +3173,43 @@ static int __init tty_init(void)
return 0;
}
module_init(tty_init);
@@ -6228,10 +7909,10 @@
}
diff --git a/drivers/net/venet_core.c b/drivers/net/venet_core.c
new file mode 100644
-index 0000000..5aeb82b
+index 0000000..317fbb0
--- /dev/null
+++ b/drivers/net/venet_core.c
-@@ -0,0 +1,775 @@
+@@ -0,0 +1,864 @@
+/*
+ * venet_core.c
+ *
@@ -6321,6 +8002,86 @@
+ return NULL;
+}
+
++struct ext_entry_struct *venet_ext_lookup(struct ve_struct *ve,
++ struct ve_addr_struct *addr)
++{
++ struct ext_entry_struct *entry;
++
++ if (ve->veip == NULL)
++ return NULL;
++
++ list_for_each_entry (entry, &ve->veip->ext_lh, list)
++ if (memcmp(&entry->addr, addr, sizeof(*addr)) == 0)
++ return entry;
++ return NULL;
++}
++
++int venet_ext_add(struct ve_struct *ve, struct ve_addr_struct *addr)
++{
++ struct ext_entry_struct *entry, *found;
++ int err;
++
++ if (ve->veip == NULL)
++ return -ENONET;
++
++ entry = kzalloc(sizeof(struct ext_entry_struct), GFP_KERNEL);
++ if (entry == NULL)
++ return -ENOMEM;
++
++ write_lock_irq(&veip_hash_lock);
++ err = -EADDRINUSE;
++ found = venet_ext_lookup(ve, addr);
++ if (found != NULL)
++ goto out_unlock;
++
++ entry->addr = *addr;
++ list_add(&entry->list, &ve->veip->ext_lh);
++ err = 0;
++ entry = NULL;
++out_unlock:
++ write_unlock_irq(&veip_hash_lock);
++ if (entry != NULL)
++ kfree(entry);
++ return err;
++}
++
++int venet_ext_del(struct ve_struct *ve, struct ve_addr_struct *addr)
++{
++ struct ext_entry_struct *found;
++ int err;
++
++ if (ve->veip == NULL)
++ return -ENONET;
++
++ err = -EADDRNOTAVAIL;
++ write_lock_irq(&veip_hash_lock);
++ found = venet_ext_lookup(ve, addr);
++ if (found == NULL)
++ goto out;
++
++ list_del(&found->list);
++ kfree(found);
++ err = 0;
++out:
++ write_unlock_irq(&veip_hash_lock);
++ return err;
++}
++
++void venet_ext_clean(struct ve_struct *ve)
++{
++ struct ext_entry_struct *entry, *tmp;
++
++ if (ve->veip == NULL)
++ return;
++
++ write_lock_irq(&veip_hash_lock);
++ list_for_each_entry_safe (entry, tmp, &ve->veip->ext_lh, list) {
++ list_del(&entry->list);
++ kfree(entry);
++ }
++ write_unlock_irq(&veip_hash_lock);
++}
++
+struct veip_struct *veip_find(envid_t veid)
+{
+ struct veip_struct *ptr;
@@ -6348,6 +8109,7 @@
+ INIT_LIST_HEAD(&ptr->ip_lh);
+ INIT_LIST_HEAD(&ptr->src_lh);
+ INIT_LIST_HEAD(&ptr->dst_lh);
++ INIT_LIST_HEAD(&ptr->ext_lh);
+ ptr->veid = veid;
+ list_add(&ptr->list, &veip_lh);
+ return ptr;
@@ -6641,6 +8403,20 @@
+ return venet_set_op(dev, data, ethtool_op_set_tx_csum);
+}
+
++static int
++venet_op_set_tso(struct net_device *dev, u32 data)
++{
++ if (!ve_is_super(get_exec_env()))
++ return -EPERM;
++
++ if (data)
++ common_features |= NETIF_F_TSO;
++ else
++ common_features &= ~NETIF_F_TSO;
++
++ return venet_set_op(dev, data, ethtool_op_set_tso);
++}
++
+#define venet_op_set_rx_csum venet_op_set_tx_csum
+
+static struct ethtool_ops venet_ethtool_ops = {
@@ -6651,6 +8427,7 @@
+ .get_rx_csum = ethtool_op_get_tx_csum,
+ .set_rx_csum = venet_op_set_rx_csum,
+ .get_tso = ethtool_op_get_tso,
++ .set_tso = venet_op_set_tso,
+};
+
+static void venet_cpt(struct net_device *dev,
@@ -6685,15 +8462,10 @@
+}
+
+#ifdef CONFIG_PROC_FS
-+static int veinfo_seq_show(struct seq_file *m, void *v)
++static void veaddr_seq_print(struct seq_file *m, struct ve_struct *ve)
+{
-+ struct ve_struct *ve;
+ struct ip_entry_struct *entry;
+
-+ ve = list_entry((struct list_head *)v, struct ve_struct, ve_list);
-+
-+ seq_printf(m, "%10u %5u %5u", ve->veid,
-+ ve->class_id, atomic_read(&ve->pcounter));
+ read_lock(&veip_hash_lock);
+ if (ve->veip == NULL)
+ goto unlock;
@@ -6711,29 +8483,8 @@
+ }
+unlock:
+ read_unlock(&veip_hash_lock);
-+ seq_putc(m, '\n');
-+ return 0;
-+}
-+
-+static struct seq_operations veinfo_seq_op = {
-+ .start = ve_seq_start,
-+ .next = ve_seq_next,
-+ .stop = ve_seq_stop,
-+ .show = veinfo_seq_show,
-+};
-+
-+static int veinfo_open(struct inode *inode, struct file *file)
-+{
-+ return seq_open(file, &veinfo_seq_op);
+}
+
-+static struct file_operations proc_veinfo_operations = {
-+ .open = veinfo_open,
-+ .read = seq_read,
-+ .llseek = seq_lseek,
-+ .release = seq_release,
-+};
-+
+static void *veip_seq_start(struct seq_file *m, loff_t *pos)
+{
+ loff_t l;
@@ -6804,7 +8555,7 @@
+ struct ve_addr_struct addr;
+
+ err = -EPERM;
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ goto out;
+
+ err = sockaddr_to_veaddr(uaddr, addrlen, &addr);
@@ -6829,6 +8580,28 @@
+ case VE_IP_DEL:
+ err = veip_entry_del(veid, &addr);
+ break;
++ case VE_IP_EXT_ADD:
++ ve = get_ve_by_id(veid);
++ err = -ESRCH;
++ if (!ve)
++ goto out;
++
++ down_read(&ve->op_sem);
++ err = venet_ext_add(ve, &addr);
++ up_read(&ve->op_sem);
++ put_ve(ve);
++ break;
++ case VE_IP_EXT_DEL:
++ ve = get_ve_by_id(veid);
++ err = -ESRCH;
++ if (!ve)
++ goto out;
++
++ down_read(&ve->op_sem);
++ err = venet_ext_del(ve, &addr);
++ up_read(&ve->op_sem);
++ put_ve(ve);
++ break;
+ default:
+ err = -EINVAL;
+ }
@@ -6940,6 +8713,7 @@
+ struct net_device *dev;
+
+ env = (struct ve_struct *)data;
++ venet_ext_clean(env);
+ veip_stop(env);
+
+ dev = env->_venet_dev;
@@ -6976,11 +8750,6 @@
+ return err;
+
+#ifdef CONFIG_PROC_FS
-+ de = proc_create("veinfo", S_IFREG | S_IRUSR, glob_proc_vz_dir,
-+ &proc_veinfo_operations);
-+ if (de == NULL)
-+ printk(KERN_WARNING "venet: can't make veinfo proc entry\n");
-+
+ de = proc_create("veip", S_IFREG | S_IRUSR, proc_vz_dir,
+ &proc_veip_operations);
+ if (de == NULL)
@@ -6989,17 +8758,18 @@
+
+ ve_hook_register(VE_SS_CHAIN, &venet_ve_hook);
+ vzioctl_register(&venetcalls);
++ vzmon_register_veaddr_print_cb(veaddr_seq_print);
+ return 0;
+}
+
+__exit void venet_exit(void)
+{
++ vzmon_unregister_veaddr_print_cb(veaddr_seq_print);
+ vzioctl_unregister(&venetcalls);
+ ve_hook_unregister(&venet_ve_hook);
+
+#ifdef CONFIG_PROC_FS
+ remove_proc_entry("veip", proc_vz_dir);
-+ remove_proc_entry("veinfo", glob_proc_vz_dir);
+#endif
+ venet_stop(get_ve0());
+ veip_cleanup();
@@ -7021,10 +8791,10 @@
diff --git a/drivers/net/vzethdev.c b/drivers/net/vzethdev.c
new file mode 100644
-index 0000000..e073e3e
+index 0000000..ed8ed97
--- /dev/null
+++ b/drivers/net/vzethdev.c
-@@ -0,0 +1,741 @@
+@@ -0,0 +1,749 @@
+/*
+ * veth.c
+ *
@@ -7278,6 +9048,7 @@
+ stats->tx_bytes += dev_stats->tx_bytes;
+ stats->rx_packets += dev_stats->rx_packets;
+ stats->tx_packets += dev_stats->tx_packets;
++ stats->tx_dropped += dev_stats->tx_dropped;
+ }
+
+ return stats;
@@ -7418,6 +9189,12 @@
+ return veth_set_op(dev, data, ethtool_op_set_tx_csum);
+}
+
++static int
++veth_op_set_tso(struct net_device *dev, u32 data)
++{
++ return veth_set_op(dev, data, ethtool_op_set_tso);
++}
++
+#define veth_op_set_rx_csum veth_op_set_tx_csum
+
+static struct ethtool_ops veth_ethtool_ops = {
@@ -7428,6 +9205,7 @@
+ .get_rx_csum = ethtool_op_get_tx_csum,
+ .set_rx_csum = veth_op_set_rx_csum,
+ .get_tso = ethtool_op_get_tso,
++ .set_tso = veth_op_set_tso,
+};
+
+static void veth_cpt(struct net_device *dev,
@@ -7799,16 +9577,15 @@
starget->id = id;
starget->channel = channel;
diff --git a/fs/Kconfig b/fs/Kconfig
-index 64d44ef..998c68e 100644
+index 64d44ef..f48e240 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
-@@ -63,6 +63,15 @@ source "fs/autofs/Kconfig"
+@@ -63,6 +63,14 @@ source "fs/autofs/Kconfig"
source "fs/autofs4/Kconfig"
source "fs/fuse/Kconfig"
+config SIM_FS
+ tristate "VPS filesystem"
-+ depends on VZ_QUOTA
+ default m
+ help
+ This file system is a part of Virtuozzo. It intoduces a fake
@@ -8202,11 +9979,257 @@
if (retval < 0) {
send_sig(SIGKILL, current, 0);
goto out;
+diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
+index c4e8353..8180165 100644
+--- a/fs/binfmt_misc.c
++++ b/fs/binfmt_misc.c
+@@ -28,6 +28,7 @@
+ #include <linux/mount.h>
+ #include <linux/syscalls.h>
+ #include <linux/fs.h>
++#include <linux/ve_proto.h>
+
+ #include <asm/uaccess.h>
+
+@@ -35,8 +36,15 @@ enum {
+ VERBOSE_STATUS = 1 /* make it zero to save 400 bytes kernel memory */
+ };
+
++#ifdef CONFIG_VE
++#define bm_entries(ve) ((ve)->bm_entries)
++#define bm_enabled(ve) ((ve)->bm_enabled)
++#else
+ static LIST_HEAD(entries);
+ static int enabled = 1;
++#define bm_entries(ve) (entries)
++#define bm_enabled(ve) (enabled)
++#endif
+
+ enum {Enabled, Magic};
+ #define MISC_FMT_PRESERVE_ARGV0 (1<<31)
+@@ -56,21 +64,30 @@ typedef struct {
+ } Node;
+
+ static DEFINE_RWLOCK(entries_lock);
++#ifdef CONFIG_VE
++#define bm_fs_type(ve) (*(ve)->bm_fs_type)
++#define bm_mnt(ve) ((ve)->bm_mnt)
++#define bm_entry_count(ve) ((ve)->bm_entry_count)
++#else
+ static struct file_system_type bm_fs_type;
+ static struct vfsmount *bm_mnt;
+ static int entry_count;
++#define bm_fs_type(ve) (bm_fs_type)
++#define bm_mnt(ve) (bm_mnt)
++#define bm_entry_count(ve) (bm_entry_count)
++#endif
+
+ /*
+ * Check if we support the binfmt
+ * if we do, return the node, else NULL
+ * locking is done in load_misc_binary
+ */
+-static Node *check_file(struct linux_binprm *bprm)
++static Node *check_file(struct ve_struct *ve, struct linux_binprm *bprm)
+ {
+ char *p = strrchr(bprm->interp, '.');
+ struct list_head *l;
+
+- list_for_each(l, &entries) {
++ list_for_each(l, &bm_entries(ve)) {
+ Node *e = list_entry(l, Node, list);
+ char *s;
+ int j;
+@@ -111,9 +128,10 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+ char *iname_addr = iname;
+ int retval;
+ int fd_binary = -1;
++ struct ve_struct *ve = get_exec_env();
+
+ retval = -ENOEXEC;
+- if (!enabled)
++ if (!bm_enabled(ve))
+ goto _ret;
+
+ retval = -ENOEXEC;
+@@ -122,7 +140,7 @@ static int load_misc_binary(struct linux_binprm *bprm, struct pt_regs *regs)
+
+ /* to keep locking time low, we copy the interpreter string */
+ read_lock(&entries_lock);
+- fmt = check_file(bprm);
++ fmt = check_file(ve, bprm);
+ if (fmt)
+ strlcpy(iname, fmt->interpreter, BINPRM_BUF_SIZE);
+ read_unlock(&entries_lock);
+@@ -507,7 +525,7 @@ static void bm_clear_inode(struct inode *inode)
+ kfree(inode->i_private);
+ }
+
+-static void kill_node(Node *e)
++static void kill_node(struct ve_struct *ve, Node *e)
+ {
+ struct dentry *dentry;
+
+@@ -523,7 +541,7 @@ static void kill_node(Node *e)
+ dentry->d_inode->i_nlink--;
+ d_drop(dentry);
+ dput(dentry);
+- simple_release_fs(&bm_mnt, &entry_count);
++ simple_release_fs(&bm_mnt(ve), &bm_entry_count(ve));
+ }
+ }
+
+@@ -562,7 +580,7 @@ static ssize_t bm_entry_write(struct file *file, const char __user *buffer,
+ case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+ mutex_lock(&root->d_inode->i_mutex);
+
+- kill_node(e);
++ kill_node(get_exec_env(), e);
+
+ mutex_unlock(&root->d_inode->i_mutex);
+ dput(root);
+@@ -587,6 +605,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
+ struct dentry *root, *dentry;
+ struct super_block *sb = file->f_path.mnt->mnt_sb;
+ int err = 0;
++ struct ve_struct *ve = get_exec_env();
+
+ e = create_entry(buffer, count);
+
+@@ -610,7 +629,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
+ if (!inode)
+ goto out2;
+
+- err = simple_pin_fs(&bm_fs_type, &bm_mnt, &entry_count);
++ err = simple_pin_fs(&bm_fs_type(ve), &bm_mnt(ve), &bm_entry_count(ve));
+ if (err) {
+ iput(inode);
+ inode = NULL;
+@@ -623,7 +642,7 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
+
+ d_instantiate(dentry, inode);
+ write_lock(&entries_lock);
+- list_add(&e->list, &entries);
++ list_add(&e->list, &bm_entries(ve));
+ write_unlock(&entries_lock);
+
+ err = 0;
+@@ -649,26 +668,31 @@ static const struct file_operations bm_register_operations = {
+ static ssize_t
+ bm_status_read(struct file *file, char __user *buf, size_t nbytes, loff_t *ppos)
+ {
+- char *s = enabled ? "enabled\n" : "disabled\n";
++ struct ve_struct *ve = get_exec_env();
++ char *s = bm_enabled(ve) ? "enabled\n" : "disabled\n";
+
+ return simple_read_from_buffer(buf, nbytes, ppos, s, strlen(s));
+ }
+
++static void dm_genocide(struct ve_struct *ve)
++{
++ while (!list_empty(&bm_entries(ve)))
++ kill_node(ve, list_entry(bm_entries(ve).next, Node, list));
++}
++
+ static ssize_t bm_status_write(struct file * file, const char __user * buffer,
+ size_t count, loff_t *ppos)
+ {
++ struct ve_struct *ve = get_exec_env();
+ int res = parse_command(buffer, count);
+ struct dentry *root;
+
+ switch (res) {
+- case 1: enabled = 0; break;
+- case 2: enabled = 1; break;
++ case 1: bm_enabled(ve) = 0; break;
++ case 2: bm_enabled(ve) = 1; break;
+ case 3: root = dget(file->f_path.mnt->mnt_sb->s_root);
+ mutex_lock(&root->d_inode->i_mutex);
+-
+- while (!list_empty(&entries))
+- kill_node(list_entry(entries.next, Node, list));
+-
++ dm_genocide(ve);
+ mutex_unlock(&root->d_inode->i_mutex);
+ dput(root);
+ default: return res;
+@@ -719,6 +743,53 @@ static struct file_system_type bm_fs_type = {
+ .kill_sb = kill_litter_super,
+ };
+
++#ifdef CONFIG_VE
++static void __ve_binfmt_init(struct ve_struct *ve, struct file_system_type *fs)
++{
++ ve->bm_fs_type = fs;
++ INIT_LIST_HEAD(&ve->bm_entries);
++ ve->bm_enabled = 1;
++ ve->bm_mnt = NULL;
++ ve->bm_entry_count = 0;
++}
++
++static int ve_binfmt_init(void *x)
++{
++ struct ve_struct *ve = x;
++ struct file_system_type *fs_type;
++ int err;
++
++ err = register_ve_fs_type(ve, &bm_fs_type, &fs_type, NULL);
++ if (err == 0)
++ __ve_binfmt_init(ve, fs_type);
++
++ return err;
++}
++
++static void ve_binfmt_fini(void *x)
++{
++ struct ve_struct *ve = x;
++
++ /*
++ * no locks since exec_ve is dead and noone will
++ * mess with bm_xxx fields any longer
++ */
++ if (!ve->bm_fs_type)
++ return;
++ dm_genocide(ve);
++ unregister_ve_fs_type(ve->bm_fs_type, NULL);
++ kfree(ve->bm_fs_type);
++ ve->bm_fs_type = NULL;
++}
++
++static struct ve_hook ve_binfmt_hook = {
++ .init = ve_binfmt_init,
++ .fini = ve_binfmt_fini,
++ .priority = HOOK_PRIO_FS,
++ .owner = THIS_MODULE,
++};
++#endif
++
+ static int __init init_misc_binfmt(void)
+ {
+ int err = register_filesystem(&bm_fs_type);
+@@ -727,11 +798,17 @@ static int __init init_misc_binfmt(void)
+ if (err)
+ unregister_filesystem(&bm_fs_type);
+ }
++
++ if (!err) {
++ __ve_binfmt_init(get_ve0(), &bm_fs_type);
++ ve_hook_register(VE_SS_CHAIN, &ve_binfmt_hook);
++ }
+ return err;
+ }
+
+ static void __exit exit_misc_binfmt(void)
+ {
++ ve_hook_unregister(&ve_binfmt_hook);
+ unregister_binfmt(&misc_format);
+ unregister_filesystem(&bm_fs_type);
+ }
diff --git a/fs/block_dev.c b/fs/block_dev.c
-index 34e2d20..b170595 100644
+index 9b9e3dc..fe0cca1 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
-@@ -1601,7 +1601,7 @@ int __invalidate_device(struct block_device *bdev)
+@@ -1602,7 +1602,7 @@ int __invalidate_device(struct block_device *bdev)
* hold).
*/
shrink_dcache_sb(sb);
@@ -8240,7 +10263,7 @@
}
diff --git a/fs/compat.c b/fs/compat.c
-index 6c19040..204915d 100644
+index 6c19040..5141257 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -26,6 +26,7 @@
@@ -8270,7 +10293,29 @@
/*
* Not all architectures have sys_utime, so implement this in terms
* of sys_utimes.
-@@ -269,6 +282,8 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
+@@ -91,6 +104,21 @@ asmlinkage long compat_sys_utime(char __user *filename, struct compat_utimbuf __
+ return do_utimes(AT_FDCWD, filename, t ? tv : NULL, 0);
+ }
+
++asmlinkage long compat_sys_lutime(char __user * filename,
++ struct compat_utimbuf __user *t)
++{
++ struct timespec tv[2];
++
++ if (t) {
++ if (get_user(tv[0].tv_sec, &t->actime) ||
++ get_user(tv[1].tv_sec, &t->modtime))
++ return -EFAULT;
++ tv[0].tv_nsec = 0;
++ tv[1].tv_nsec = 0;
++ }
++ return do_utimes(AT_FDCWD, filename, t ? tv : NULL, AT_SYMLINK_NOFOLLOW);
++}
++
+ asmlinkage long compat_sys_utimensat(unsigned int dfd, char __user *filename, struct compat_timespec __user *t, int flags)
+ {
+ struct timespec tv[2];
+@@ -269,6 +297,8 @@ asmlinkage long compat_sys_statfs(const char __user *pathname, struct compat_sta
struct kstatfs tmp;
error = vfs_statfs(path.dentry, &tmp);
if (!error)
@@ -8279,7 +10324,7 @@
error = put_compat_statfs(buf, &tmp);
path_put(&path);
}
-@@ -287,6 +302,8 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
+@@ -287,6 +317,8 @@ asmlinkage long compat_sys_fstatfs(unsigned int fd, struct compat_statfs __user
goto out;
error = vfs_statfs(file->f_path.dentry, &tmp);
if (!error)
@@ -8288,7 +10333,7 @@
error = put_compat_statfs(buf, &tmp);
fput(file);
out:
-@@ -337,6 +354,8 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
+@@ -337,6 +369,8 @@ asmlinkage long compat_sys_statfs64(const char __user *pathname, compat_size_t s
struct kstatfs tmp;
error = vfs_statfs(path.dentry, &tmp);
if (!error)
@@ -8297,7 +10342,7 @@
error = put_compat_statfs64(buf, &tmp);
path_put(&path);
}
-@@ -358,6 +377,8 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
+@@ -358,6 +392,8 @@ asmlinkage long compat_sys_fstatfs64(unsigned int fd, compat_size_t sz, struct c
goto out;
error = vfs_statfs(file->f_path.dentry, &tmp);
if (!error)
@@ -8306,7 +10351,7 @@
error = put_compat_statfs64(buf, &tmp);
fput(file);
out:
-@@ -1469,6 +1490,10 @@ int compat_do_execve(char * filename,
+@@ -1469,6 +1505,10 @@ int compat_do_execve(char * filename,
bool clear_in_exec;
int retval;
@@ -8331,7 +10376,7 @@
current->comm, current->pid,
(int)fd, (unsigned int)cmd, buf,
diff --git a/fs/dcache.c b/fs/dcache.c
-index a100fa3..48c4d04 100644
+index a100fa3..7fce87d 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -26,6 +26,7 @@
@@ -8375,16 +10420,7 @@
/*drops the locks, at that point nobody can reach this dentry */
dentry_iput(dentry);
if (IS_ROOT(dentry))
-@@ -214,21 +223,31 @@ static struct dentry *d_kill(struct dentry *dentry)
-
- void dput(struct dentry *dentry)
- {
-+ struct user_beancounter *ub;
-+ unsigned long d_ubsize;
-+
- if (!dentry)
- return;
-
+@@ -220,15 +229,22 @@ void dput(struct dentry *dentry)
repeat:
if (atomic_read(&dentry->d_count) == 1)
might_sleep();
@@ -8414,7 +10450,7 @@
/*
* AV: ->d_delete() is _NOT_ allowed to block now.
-@@ -244,8 +263,12 @@ repeat:
+@@ -244,8 +260,12 @@ repeat:
dentry->d_flags |= DCACHE_REFERENCED;
dentry_lru_add(dentry);
}
@@ -8427,20 +10463,23 @@
return;
unhash_it:
-@@ -253,9 +276,18 @@ unhash_it:
+@@ -253,9 +273,21 @@ unhash_it:
kill_it:
/* if dentry was on the d_lru list delete it from there */
dentry_lru_del(dentry);
+
-+ ub = dentry->dentry_bc.d_ub;
-+ d_ubsize = dentry->dentry_bc.d_ubsize;
- dentry = d_kill(dentry);
-- if (dentry)
-+ preempt_disable();
+ if (unlikely(ub_dentry_on)) {
-+ uncharge_dcache(ub, d_ubsize);
++ struct user_beancounter *ub;
++
++ ub = dentry->dentry_bc.d_ub;
++ BUG_ON(!ub_dput_testzero(dentry));
++ uncharge_dcache(ub, dentry->dentry_bc.d_ubsize);
+ put_beancounter(ub);
+ }
++
+ dentry = d_kill(dentry);
+- if (dentry)
++ preempt_disable();
+ if (dentry)
goto repeat;
+ preempt_enable();
@@ -9482,10 +11521,10 @@
/*
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
-index 427496c..a7a6210 100644
+index ca3068f..0c4978f 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
-@@ -2988,7 +2988,7 @@ static struct file_system_type ext3_fs_type = {
+@@ -2986,7 +2986,7 @@ static struct file_system_type ext3_fs_type = {
.name = "ext3",
.get_sb = ext3_get_sb,
.kill_sb = kill_block_super,
@@ -9494,6 +11533,27 @@
};
static int __init init_ext3_fs(void)
+diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
+index 16efcee..3833fe9 100644
+--- a/fs/ext4/inode.c
++++ b/fs/ext4/inode.c
+@@ -5770,9 +5770,14 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
+ int ret = -EINVAL;
+ void *fsdata;
+ struct file *file = vma->vm_file;
+- struct inode *inode = file->f_path.dentry->d_inode;
+- struct address_space *mapping = inode->i_mapping;
++ struct inode *inode;
++ struct address_space *mapping;
++
++ if (file->f_op->get_host)
++ file = file->f_op->get_host(file);
+
++ inode = file->f_path.dentry->d_inode;
++ mapping = inode->i_mapping;
+ /*
+ * Get i_alloc_sem to stop truncates messing with the inode. We cannot
+ * get i_mutex because we are already holding mmap_sem.
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index b63d193..0ae6e52 100644
--- a/fs/ext4/ioctl.c
@@ -10135,7 +12195,7 @@
fuse_sysfs_cleanup();
fuse_fs_cleanup();
diff --git a/fs/inode.c b/fs/inode.c
-index 4d8e3be..5460538 100644
+index 4d8e3be..ab63b5f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -8,10 +8,13 @@
@@ -10364,8 +12424,17 @@
}
}
#endif
+@@ -1258,7 +1339,7 @@ int generic_detach_inode(struct inode *inode)
+ if (!(inode->i_state & (I_DIRTY|I_SYNC)))
+ list_move(&inode->i_list, &inode_unused);
+ inodes_stat.nr_unused++;
+- if (sb->s_flags & MS_ACTIVE) {
++ if (sb->s_flags & MS_ACTIVE && !(inode->i_flags & S_NOUNUSE)) {
+ spin_unlock(&inode_lock);
+ return 0;
+ }
diff --git a/fs/ioprio.c b/fs/ioprio.c
-index c7c0b28..c14af3f 100644
+index c7c0b28..2a7e8ae 100644
--- a/fs/ioprio.c
+++ b/fs/ioprio.c
@@ -26,6 +26,7 @@
@@ -10389,7 +12458,7 @@
switch (class) {
case IOPRIO_CLASS_RT:
-@@ -137,17 +141,23 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
+@@ -137,17 +141,25 @@ SYSCALL_DEFINE3(ioprio_set, int, which, int, who, int, ioprio)
if (!user)
break;
@@ -10407,15 +12476,17 @@
free_uid(user);
break;
+ case IOPRIO_WHO_UBC:
-+ if (class != IOPRIO_CLASS_BE)
-+ return -ERANGE;
++ if (class != IOPRIO_CLASS_BE) {
++ ret = -ERANGE;
++ break;
++ }
+
+ ret = 0; /* bc_set_ioprio(who, data); */
+ break;
default:
ret = -EINVAL;
}
-@@ -192,9 +202,9 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+@@ -192,9 +204,9 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
{
struct task_struct *g, *p;
struct user_struct *user;
@@ -10426,7 +12497,7 @@
read_lock(&tasklist_lock);
switch (which) {
-@@ -230,7 +240,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+@@ -230,7 +242,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
if (!user)
break;
@@ -10435,7 +12506,7 @@
if (__task_cred(p)->uid != user->uid)
continue;
tmpio = get_task_ioprio(p);
-@@ -240,7 +250,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
+@@ -240,7 +252,7 @@ SYSCALL_DEFINE2(ioprio_get, int, which, int, who)
ret = tmpio;
else
ret = ioprio_best(ret, tmpio);
@@ -11147,7 +13218,7 @@
return 0;
diff --git a/fs/namespace.c b/fs/namespace.c
-index bdc3cb4..2536eff 100644
+index bdc3cb4..d811360 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -29,6 +29,7 @@
@@ -11182,7 +13253,24 @@
atomic_set(&mnt->mnt_count, 1);
INIT_LIST_HEAD(&mnt->mnt_hash);
INIT_LIST_HEAD(&mnt->mnt_child);
-@@ -629,6 +633,7 @@ repeat:
+@@ -517,7 +521,7 @@ static void commit_tree(struct vfsmount *mnt)
+ touch_mnt_namespace(n);
+ }
+
+-static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
++struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
+ {
+ struct list_head *next = p->mnt_mounts.next;
+ if (next == &p->mnt_mounts) {
+@@ -532,6 +536,7 @@ static struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root)
+ }
+ return list_entry(next, struct vfsmount, mnt_child);
+ }
++EXPORT_SYMBOL(next_mnt);
+
+ static struct vfsmount *skip_mnt_tree(struct vfsmount *p)
+ {
+@@ -629,6 +634,7 @@ repeat:
spin_unlock(&vfsmount_lock);
acct_auto_close_mnt(mnt);
security_sb_umount_close(mnt);
@@ -11190,7 +13278,7 @@
goto repeat;
}
}
-@@ -789,15 +794,48 @@ static void show_type(struct seq_file *m, struct super_block *sb)
+@@ -789,15 +795,50 @@ static void show_type(struct seq_file *m, struct super_block *sb)
}
}
@@ -11223,9 +13311,10 @@
- int err = 0;
+ int err;
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
-+ char *path_buf, *path;
-
+-
- mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
++ char *path_buf, *path;
++
+ err = prepare_mnt_root_mangle(&mnt_path, &path_buf, &path);
+ if (err < 0)
+ return (err == -EACCES ? 0 : err);
@@ -11233,8 +13322,10 @@
+ if (ve_is_super(get_exec_env()) ||
+ !(mnt->mnt_sb->s_type->fs_flags & FS_MANGLE_PROC))
+ mangle(m, mnt->mnt_devname ? mnt->mnt_devname : "none");
-+ else
++ else {
++ seq_puts(m, "/dev/");
+ mangle(m, mnt->mnt_sb->s_type->name);
++ }
seq_putc(m, ' ');
- seq_path(m, &mnt_path, " \t\n\\");
+ mangle(m, path);
@@ -11242,7 +13333,7 @@
seq_putc(m, ' ');
show_type(m, mnt->mnt_sb);
seq_puts(m, __mnt_is_readonly(mnt) ? " ro" : " rw");
-@@ -884,18 +922,27 @@ static int show_vfsstat(struct seq_file *m, void *v)
+@@ -884,18 +925,27 @@ static int show_vfsstat(struct seq_file *m, void *v)
{
struct vfsmount *mnt = list_entry(v, struct vfsmount, mnt_list);
struct path mnt_path = { .dentry = mnt->mnt_root, .mnt = mnt };
@@ -11273,7 +13364,7 @@
seq_putc(m, ' ');
/* file system type */
-@@ -1107,6 +1154,34 @@ static int do_umount(struct vfsmount *mnt, int flags)
+@@ -1107,6 +1157,36 @@ static int do_umount(struct vfsmount *mnt, int flags)
return retval;
}
@@ -11296,8 +13387,10 @@
+ }
+
+ while (!list_empty(&kill)) {
++ LIST_HEAD(kill2);
+ mnt = list_entry(kill.next, struct vfsmount, mnt_list);
-+ umount_tree(mnt, 1, &umount_list);
++ umount_tree(mnt, 1, &kill2);
++ list_splice(&kill2, &umount_list);
+ }
+ spin_unlock(&vfsmount_lock);
+ up_write(&namespace_sem);
@@ -11308,7 +13401,7 @@
/*
* Now umount can handle mount points as well as block devices.
* This is important for filesystems which use unnamed block devices.
-@@ -1130,7 +1205,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
+@@ -1130,7 +1210,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags)
goto dput_and_out;
retval = -EPERM;
@@ -11317,7 +13410,7 @@
goto dput_and_out;
retval = do_umount(path.mnt, flags);
-@@ -1156,7 +1231,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
+@@ -1156,7 +1236,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
static int mount_is_safe(struct path *path)
{
@@ -11326,7 +13419,7 @@
return 0;
return -EPERM;
#ifdef notyet
-@@ -1425,6 +1500,8 @@ static int do_change_type(struct path *path, int flag)
+@@ -1425,6 +1505,8 @@ static int do_change_type(struct path *path, int flag)
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
@@ -11335,7 +13428,7 @@
down_write(&namespace_sem);
if (type == MS_SHARED) {
-@@ -1447,7 +1524,7 @@ static int do_change_type(struct path *path, int flag)
+@@ -1447,7 +1529,7 @@ static int do_change_type(struct path *path, int flag)
* do loopback mount.
*/
static int do_loopback(struct path *path, char *old_name,
@@ -11344,7 +13437,7 @@
{
struct path old_path;
struct vfsmount *mnt = NULL;
-@@ -1477,6 +1554,7 @@ static int do_loopback(struct path *path, char *old_name,
+@@ -1477,6 +1559,7 @@ static int do_loopback(struct path *path, char *old_name,
if (!mnt)
goto out;
@@ -11352,7 +13445,7 @@
err = graft_tree(mnt, path);
if (err) {
LIST_HEAD(umount_list);
-@@ -1520,7 +1598,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
+@@ -1520,7 +1603,7 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
int err;
struct super_block *sb = path->mnt->mnt_sb;
@@ -11361,7 +13454,7 @@
return -EPERM;
if (!check_mnt(path->mnt))
-@@ -1529,6 +1607,9 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
+@@ -1529,6 +1612,9 @@ static int do_remount(struct path *path, int flags, int mnt_flags,
if (path->dentry != path->mnt->mnt_root)
return -EINVAL;
@@ -11371,7 +13464,7 @@
down_write(&sb->s_umount);
if (flags & MS_BIND)
err = change_mount_flags(path->mnt, flags);
-@@ -1562,7 +1643,7 @@ static int do_move_mount(struct path *path, char *old_name)
+@@ -1562,7 +1648,7 @@ static int do_move_mount(struct path *path, char *old_name)
struct path old_path, parent_path;
struct vfsmount *p;
int err = 0;
@@ -11380,7 +13473,7 @@
return -EPERM;
if (!old_name || !*old_name)
return -EINVAL;
-@@ -1570,6 +1651,10 @@ static int do_move_mount(struct path *path, char *old_name)
+@@ -1570,6 +1656,10 @@ static int do_move_mount(struct path *path, char *old_name)
if (err)
return err;
@@ -11391,7 +13484,7 @@
down_write(&namespace_sem);
while (d_mountpoint(path->dentry) &&
follow_down(path))
-@@ -1627,6 +1712,7 @@ out:
+@@ -1627,6 +1717,7 @@ out:
up_write(&namespace_sem);
if (!err)
path_put(&parent_path);
@@ -11399,7 +13492,7 @@
path_put(&old_path);
return err;
}
-@@ -1644,7 +1730,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
+@@ -1644,7 +1735,7 @@ static int do_new_mount(struct path *path, char *type, int flags,
return -EINVAL;
/* we need capabilities... */
@@ -11408,7 +13501,7 @@
return -EPERM;
lock_kernel();
-@@ -1685,6 +1771,11 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
+@@ -1685,6 +1776,11 @@ int do_add_mount(struct vfsmount *newmnt, struct path *path,
goto unlock;
newmnt->mnt_flags = mnt_flags;
@@ -11420,7 +13513,7 @@
if ((err = graft_tree(newmnt, path)))
goto unlock;
-@@ -1959,7 +2050,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
+@@ -1959,7 +2055,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page,
retval = do_remount(&path, flags & ~MS_REMOUNT, mnt_flags,
data_page);
else if (flags & MS_BIND)
@@ -11429,7 +13522,7 @@
else if (flags & (MS_SHARED | MS_PRIVATE | MS_SLAVE | MS_UNBINDABLE))
retval = do_change_type(&path, flags);
else if (flags & MS_MOVE)
-@@ -2122,6 +2213,7 @@ out_dir:
+@@ -2122,6 +2218,7 @@ out_dir:
out_type:
return ret;
}
@@ -11437,7 +13530,7 @@
/*
* pivot_root Semantics:
-@@ -2281,7 +2373,7 @@ void __init mnt_init(void)
+@@ -2281,7 +2378,7 @@ void __init mnt_init(void)
init_rwsem(&namespace_sem);
mnt_cache = kmem_cache_create("mnt_cache", sizeof(struct vfsmount),
@@ -11447,7 +13540,7 @@
mount_hashtable = (struct list_head *)__get_free_page(GFP_ATOMIC);
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
-index 99ea196..986fe94 100644
+index 69d6a46..b9a8f89 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -125,6 +125,7 @@ static struct nfs_client *nfs_alloc_client(const struct nfs_client_initdata *cl_
@@ -11514,7 +13607,7 @@
if (clp->rpc_ops != data->rpc_ops)
continue;
diff --git a/fs/nfs/super.c b/fs/nfs/super.c
-index 4bf23f6..79e65e4 100644
+index 4bf23f6..253438f 100644
--- a/fs/nfs/super.c
+++ b/fs/nfs/super.c
@@ -53,6 +53,9 @@
@@ -11619,7 +13712,19 @@
#ifdef CONFIG_NFS_V4
unregister_filesystem(&nfs4_fs_type);
#endif
-@@ -2079,6 +2135,10 @@ static int nfs_compare_super(struct super_block *sb, void *data)
+@@ -1794,6 +1850,11 @@ static int nfs_validate_mount_data(void *options,
+ goto out_v3_not_compiled;
+ #endif /* !CONFIG_NFS_V3 */
+
++ if (!(args->flags & NFS_MOUNT_VER3)) {
++ printk("NFSv2 is broken and not supported\n");
++ return -EPROTONOSUPPORT;
++ }
++
+ return 0;
+
+ out_no_data:
+@@ -2079,6 +2140,10 @@ static int nfs_compare_super(struct super_block *sb, void *data)
struct nfs_server *server = sb_mntdata->server, *old = NFS_SB(sb);
int mntflags = sb_mntdata->mntflags;
@@ -11630,7 +13735,7 @@
if (!nfs_compare_super_address(old, server))
return 0;
/* Note: NFS_MOUNT_UNSHARED == NFS4_MOUNT_UNSHARED */
-@@ -2107,6 +2167,11 @@ static int nfs_get_sb(struct file_system_type *fs_type,
+@@ -2107,6 +2172,11 @@ static int nfs_get_sb(struct file_system_type *fs_type,
.mntflags = flags,
};
int error = -ENOMEM;
@@ -11642,7 +13747,7 @@
data = nfs_alloc_parsed_mount_data(3);
mntfh = kzalloc(sizeof(*mntfh), GFP_KERNEL);
-@@ -2237,6 +2302,11 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
+@@ -2237,6 +2307,11 @@ static int nfs_xdev_get_sb(struct file_system_type *fs_type, int flags,
.mntflags = flags,
};
int error;
@@ -11968,7 +14073,7 @@
goto path_put_and_out;
diff --git a/fs/open.c b/fs/open.c
-index 4f01e06..23011b6 100644
+index 4f01e06..77f73fc 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -25,6 +25,7 @@
@@ -12070,7 +14175,51 @@
if (!error && copy_to_user(buf, &tmp, sizeof(tmp)))
error = -EFAULT;
fput(file);
-@@ -707,6 +731,7 @@ out_release:
+@@ -630,14 +654,20 @@ out:
+ return err;
+ }
+
+-SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
++static int do_fchmodat(int dfd, const char __user *filename, mode_t mode, int flag)
+ {
+ struct path path;
+ struct inode *inode;
+ int error;
+ struct iattr newattrs;
++ int follow;
+
+- error = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path);
++ error = -EINVAL;
++ if ((flag & ~AT_SYMLINK_NOFOLLOW) != 0)
++ goto out;
++
++ follow = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
++ error = user_path_at(dfd, filename, follow, &path);
+ if (error)
+ goto out;
+ inode = path.dentry->d_inode;
+@@ -659,9 +689,19 @@ out:
+ return error;
+ }
+
++SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, mode_t, mode)
++{
++ return do_fchmodat(dfd, filename, mode, 0);
++}
++
+ SYSCALL_DEFINE2(chmod, const char __user *, filename, mode_t, mode)
+ {
+- return sys_fchmodat(AT_FDCWD, filename, mode);
++ return do_fchmodat(AT_FDCWD, filename, mode, 0);
++}
++
++SYSCALL_DEFINE2(lchmod, const char __user *, filename, mode_t, mode)
++{
++ return do_fchmodat(AT_FDCWD, filename, mode, AT_SYMLINK_NOFOLLOW);
+ }
+
+ static int chown_common(struct dentry * dentry, uid_t user, gid_t group)
+@@ -707,6 +747,7 @@ out_release:
out:
return error;
}
@@ -12078,7 +14227,7 @@
SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
gid_t, group, int, flag)
-@@ -948,6 +973,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
+@@ -948,6 +989,7 @@ struct file *nameidata_to_filp(struct nameidata *nd, int flags)
return filp;
}
@@ -12086,7 +14235,7 @@
/*
* dentry_open() will have done dput(dentry) and mntput(mnt) if it returns an
* error.
-@@ -972,6 +998,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
+@@ -972,6 +1014,9 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags,
return ERR_PTR(-EINVAL);
}
@@ -12096,7 +14245,7 @@
error = -ENFILE;
f = get_empty_filp();
if (f == NULL) {
-@@ -1062,6 +1091,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
+@@ -1062,6 +1107,7 @@ SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, int, mode)
asmlinkage_protect(3, ret, filename, flags, mode);
return ret;
}
@@ -12405,10 +14554,18 @@
mmput(mm);
return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
-index 6d71c67..de26c5c 100644
+index 13b0378..eb8a70f 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
-@@ -156,10 +156,14 @@ static int get_fs_path(struct task_struct *task, struct path *path, bool root)
+@@ -49,6 +49,7 @@
+
+ #include <asm/uaccess.h>
+
++#include <linux/module.h>
+ #include <linux/errno.h>
+ #include <linux/time.h>
+ #include <linux/proc_fs.h>
+@@ -156,10 +157,14 @@ static int get_fs_path(struct task_struct *task, struct path *path, bool root)
fs = task->fs;
if (fs) {
read_lock(&fs->lock);
@@ -12426,7 +14583,7 @@
}
task_unlock(task);
return result;
-@@ -549,17 +553,31 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
+@@ -550,17 +555,31 @@ static int proc_pid_syscall(struct task_struct *task, char *buffer)
static int proc_fd_access_allowed(struct inode *inode)
{
struct task_struct *task;
@@ -12461,7 +14618,7 @@
}
static int proc_setattr(struct dentry *dentry, struct iattr *attr)
-@@ -1038,6 +1056,8 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
+@@ -1039,6 +1058,8 @@ static ssize_t oom_adjust_write(struct file *file, const char __user *buf,
if ((oom_adjust < OOM_ADJUST_MIN || oom_adjust > OOM_ADJUST_MAX) &&
oom_adjust != OOM_DISABLE)
return -EINVAL;
@@ -12470,7 +14627,7 @@
task = get_proc_task(file->f_path.dentry->d_inode);
if (!task)
-@@ -1294,6 +1314,7 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
+@@ -1295,6 +1316,7 @@ void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
mm->exe_file = new_exe_file;
mm->num_exe_file_vmas = 0;
}
@@ -12478,7 +14635,7 @@
struct file *get_mm_exe_file(struct mm_struct *mm)
{
-@@ -1332,10 +1353,15 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
+@@ -1333,10 +1355,15 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
exe_file = get_mm_exe_file(mm);
mmput(mm);
if (exe_file) {
@@ -12497,7 +14654,7 @@
} else
return -ENOENT;
}
-@@ -1343,13 +1369,14 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
+@@ -1344,13 +1371,14 @@ static int proc_exe_link(struct inode *inode, struct path *exe_path)
static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
@@ -12514,7 +14671,7 @@
goto out;
error = PROC_I(inode)->op.proc_get_link(inode, &nd->path);
-@@ -1384,12 +1411,13 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
+@@ -1385,12 +1413,13 @@ static int do_proc_readlink(struct path *path, char __user *buffer, int buflen)
static int proc_pid_readlink(struct dentry * dentry, char __user * buffer, int buflen)
{
@@ -12530,7 +14687,7 @@
goto out;
error = PROC_I(inode)->op.proc_get_link(inode, &path);
-@@ -1640,6 +1668,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+@@ -1641,6 +1670,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
struct files_struct *files = NULL;
struct file *file;
int fd = proc_fd(inode);
@@ -12538,7 +14695,7 @@
if (task) {
files = get_files_struct(task);
-@@ -1652,7 +1681,8 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+@@ -1653,7 +1683,8 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
*/
spin_lock(&files->file_lock);
file = fcheck_files(files, fd);
@@ -12548,7 +14705,7 @@
if (path) {
*path = file->f_path;
path_get(&file->f_path);
-@@ -1670,7 +1700,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
+@@ -1671,7 +1702,7 @@ static int proc_fd_info(struct inode *inode, struct path *path, char *info)
spin_unlock(&files->file_lock);
put_files_struct(files);
}
@@ -12557,7 +14714,7 @@
}
static int proc_fd_link(struct inode *inode, struct path *path)
-@@ -2457,7 +2487,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
+@@ -2458,7 +2489,7 @@ static int do_io_accounting(struct task_struct *task, char *buffer, int whole)
struct task_struct *t = task;
task_io_accounting_add(&acct, &task->signal->ioac);
@@ -12566,6 +14723,42 @@
task_io_accounting_add(&acct, &t->ioac);
unlock_task_sighand(task, &flags);
+@@ -3161,3 +3192,35 @@ static const struct file_operations proc_task_operations = {
+ .read = generic_read_dir,
+ .readdir = proc_task_readdir,
+ };
++
++/* Check whether dentry belongs to a task that already died */
++int proc_dentry_of_dead_task(struct dentry *dentry)
++{
++ if (dentry->d_inode->i_fop == &dummy_proc_pid_file_operations)
++ return 1;
++
++ return (dentry->d_op == &pid_dentry_operations &&
++ proc_pid(dentry->d_inode)->tasks[PIDTYPE_PID].first == NULL);
++}
++EXPORT_SYMBOL(proc_dentry_of_dead_task);
++
++/* Place it here to avoid use vzrst module count */
++static ssize_t dummy_proc_pid_read(struct file * file, char __user * buf,
++ size_t count, loff_t *ppos)
++{
++ return -ESRCH;
++}
++
++static ssize_t dummy_proc_pid_write(struct file * file, const char * buf,
++ size_t count, loff_t *ppos)
++{
++ return -ESRCH;
++}
++
++struct file_operations dummy_proc_pid_file_operations = {
++ .read = dummy_proc_pid_read,
++ .write = dummy_proc_pid_write,
++};
++
++EXPORT_SYMBOL(dummy_proc_pid_file_operations);
++
diff --git a/fs/proc/cmdline.c b/fs/proc/cmdline.c
index 82676e3..2ad657d 100644
--- a/fs/proc/cmdline.c
@@ -12606,8 +14799,39 @@
return 0;
}
module_init(proc_cpuinfo_init);
+diff --git a/fs/proc/devices.c b/fs/proc/devices.c
+index 59ee7da..d485f24 100644
+--- a/fs/proc/devices.c
++++ b/fs/proc/devices.c
+@@ -2,6 +2,7 @@
+ #include <linux/init.h>
+ #include <linux/proc_fs.h>
+ #include <linux/seq_file.h>
++#include <linux/sched.h>
+
+ static int devinfo_show(struct seq_file *f, void *v)
+ {
+@@ -25,6 +26,9 @@ static int devinfo_show(struct seq_file *f, void *v)
+
+ static void *devinfo_start(struct seq_file *f, loff_t *pos)
+ {
++ if (!ve_is_super(get_exec_env()))
++ return NULL;
++
+ if (*pos < (BLKDEV_MAJOR_HASH_SIZE + CHRDEV_MAJOR_HASH_SIZE))
+ return pos;
+ return NULL;
+@@ -64,7 +68,7 @@ static const struct file_operations proc_devinfo_operations = {
+
+ static int __init proc_devices_init(void)
+ {
+- proc_create("devices", 0, NULL, &proc_devinfo_operations);
++ proc_create("devices", 0, &glob_proc_root, &proc_devinfo_operations);
+ return 0;
+ }
+ module_init(proc_devices_init);
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
-index fa678ab..56f268b 100644
+index fa678ab..a66517d 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -255,6 +255,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
@@ -12637,7 +14861,33 @@
out:
return error;
}
-@@ -411,28 +418,60 @@ static const struct dentry_operations proc_dentry_operations =
+@@ -274,11 +281,22 @@ static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry,
+ struct kstat *stat)
+ {
+ struct inode *inode = dentry->d_inode;
+- struct proc_dir_entry *de = PROC_I(inode)->pde;
+- if (de && de->nlink)
+- inode->i_nlink = de->nlink;
++ struct proc_dir_entry *de = PDE(inode);
++ struct proc_dir_entry *lde = LPDE(inode);
+
+ generic_fillattr(inode, stat);
++
++ if (de && de->nlink)
++ stat->nlink = de->nlink;
++ /* if dentry is found in both trees and it is a directory
++ * then inode's nlink count must be altered, because local
++ * and global subtrees may differ.
++ * on the other hand, they may intersect, so actual nlink
++ * value is difficult to calculate - upper estimate is used
++ * instead of it.
++ */
++ if (lde && lde != de && lde->nlink > 1)
++ stat->nlink += lde->nlink - 2;
+ return 0;
+ }
+
+@@ -411,28 +429,60 @@ static const struct dentry_operations proc_dentry_operations =
.d_delete = proc_delete_dentry,
};
@@ -12705,7 +14955,7 @@
goto out_unlock;
}
}
-@@ -446,13 +485,15 @@ out_unlock:
+@@ -446,13 +496,15 @@ out_unlock:
}
if (de)
de_put(de);
@@ -12722,7 +14972,7 @@
}
/*
-@@ -464,13 +505,14 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
+@@ -464,13 +516,14 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
* value of the readdir() call, as long as it's non-negative
* for success..
*/
@@ -12739,7 +14989,7 @@
ino = inode->i_ino;
i = filp->f_pos;
-@@ -491,25 +533,19 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+@@ -491,25 +544,19 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
/* fall through */
default:
spin_lock(&proc_subdir_lock);
@@ -12774,7 +15024,7 @@
spin_unlock(&proc_subdir_lock);
if (filldir(dirent, de->name, de->namelen, filp->f_pos,
de->low_ino, de->mode >> 12) < 0) {
-@@ -518,10 +554,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
+@@ -518,10 +565,17 @@ int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
}
spin_lock(&proc_subdir_lock);
filp->f_pos++;
@@ -12793,7 +15043,7 @@
spin_unlock(&proc_subdir_lock);
}
ret = 1;
-@@ -533,7 +576,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
+@@ -533,7 +587,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
{
struct inode *inode = filp->f_path.dentry->d_inode;
@@ -13111,7 +15361,7 @@
}
diff --git a/fs/proc/root.c b/fs/proc/root.c
-index b080b79..39e1923 100644
+index b080b79..36f59af 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -42,6 +42,9 @@ static int proc_get_sb(struct file_system_type *fs_type,
@@ -13176,7 +15426,28 @@
#if defined(CONFIG_SUN_OPENPROMFS) || defined(CONFIG_SUN_OPENPROMFS_MODULE)
/* just give it a mountpoint */
proc_mkdir("openprom", NULL);
-@@ -205,6 +219,22 @@ struct proc_dir_entry proc_root = {
+@@ -141,8 +155,19 @@ void __init proc_root_init(void)
+ static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat
+ )
+ {
++ struct ve_struct *ve = get_exec_env();
++
+ generic_fillattr(dentry->d_inode, stat);
+- stat->nlink = proc_root.nlink + nr_processes();
++ stat->nlink = glob_proc_root.nlink;
++ if (ve_is_super(ve))
++ stat->nlink += nr_processes();
++#ifdef CONFIG_VE
++ else
++ /* thread count. not really processes count */
++ stat->nlink += atomic_read(&ve->pcounter);
++ /* the same logic as in the proc_getattr */
++ stat->nlink += ve->proc_root->nlink - 2;
++#endif
+ return 0;
+ }
+
+@@ -205,6 +230,22 @@ struct proc_dir_entry proc_root = {
.parent = &proc_root,
};
@@ -13407,7 +15678,7 @@
+
+obj-y += vzdquota/
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
-index 2ed79a9..acfde60 100644
+index 4fdb0eb..e7aff07 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -170,8 +170,9 @@ static struct quota_format_type *find_quota_format(int id)
@@ -13423,7 +15694,7 @@
if (!actqf || !try_module_get(actqf->qf_owner)) {
int qm;
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
-index 95c5b42..41a6f18 100644
+index 95c5b42..7d9d4b4 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -18,6 +18,7 @@
@@ -13501,7 +15772,7 @@
sb = get_super(bdev);
bdput(bdev);
if (!sb)
-@@ -379,6 +390,215 @@ static struct super_block *quotactl_block(const char __user *special)
+@@ -379,6 +390,231 @@ static struct super_block *quotactl_block(const char __user *special)
#endif
}
@@ -13534,6 +15805,21 @@
+ __kernel_time_t dqb_itime;
+};
+
++#ifdef CONFIG_COMPAT
++
++struct compat_compat_dqblk {
++ compat_uint_t dqb_ihardlimit;
++ compat_uint_t dqb_isoftlimit;
++ compat_uint_t dqb_curinodes;
++ compat_uint_t dqb_bhardlimit;
++ compat_uint_t dqb_bsoftlimit;
++ compat_u64 dqb_curspace;
++ compat_time_t dqb_btime;
++ compat_time_t dqb_itime;
++};
++
++#endif
++
+struct compat_dqinfo {
+ unsigned int dqi_bgrace;
+ unsigned int dqi_igrace;
@@ -13556,6 +15842,7 @@
+};
+
+asmlinkage long sys_quotactl(unsigned int cmd, const char __user *special, qid_t id, void __user *addr);
++
+static long compat_quotactl(unsigned int cmds, unsigned int type,
+ const char __user *special, qid_t id,
+ void __user *addr)
@@ -13717,7 +16004,7 @@
/*
* This is the system call interface. This communicates with
* the user-level programs. Currently this only supports diskquota
-@@ -395,6 +615,11 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
+@@ -395,6 +631,11 @@ SYSCALL_DEFINE4(quotactl, unsigned int, cmd, const char __user *, special,
cmds = cmd >> SUBCMDSHIFT;
type = cmd & SUBCMDMASK;
@@ -13729,6 +16016,62 @@
if (cmds != Q_SYNC || special) {
sb = quotactl_block(special);
if (IS_ERR(sb))
+@@ -459,6 +700,11 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+ compat_uint_t data;
+ u16 xdata;
+ long ret;
++#ifdef CONFIG_QUOTA_COMPAT
++ struct compat_dqblk __user *cdq;
++ struct compat_compat_dqblk __user *compat_cdq;
++ compat_time_t time;
++#endif
+
+ cmds = cmd >> SUBCMDSHIFT;
+
+@@ -519,6 +765,43 @@ asmlinkage long sys32_quotactl(unsigned int cmd, const char __user *special,
+ break;
+ ret = 0;
+ break;
++#ifdef CONFIG_QUOTA_COMPAT
++ case QC_GETQUOTA:
++ cdq = compat_alloc_user_space(sizeof(struct compat_dqblk));
++ compat_cdq = addr;
++ ret = sys_quotactl(cmd, special, id, cdq);
++ if (ret)
++ break;
++ ret = -EFAULT;
++ if (copy_in_user(compat_cdq, cdq, sizeof(struct compat_compat_dqblk) -
++ offsetof(struct compat_compat_dqblk, dqb_curspace)) ||
++ copy_in_user(&compat_cdq->dqb_curspace, &cdq->dqb_curspace,
++ sizeof(cdq->dqb_curspace)) ||
++ get_user(time, &cdq->dqb_btime) ||
++ put_user(time, &compat_cdq->dqb_btime) ||
++ get_user(time, &cdq->dqb_itime) ||
++ put_user(time, &compat_cdq->dqb_itime))
++ break;
++ ret = 0;
++ break;
++ case QC_SETQUOTA:
++ case QC_SETUSE:
++ case QC_SETQLIM:
++ cdq = compat_alloc_user_space(sizeof(struct compat_dqblk));
++ compat_cdq = addr;
++ ret = -EFAULT;
++ if (copy_in_user(cdq, compat_cdq, sizeof(struct compat_compat_dqblk) -
++ offsetof(struct compat_compat_dqblk, dqb_curspace)) ||
++ copy_in_user(&cdq->dqb_curspace, &compat_cdq->dqb_curspace,
++ sizeof(cdq->dqb_curspace)) ||
++ get_user(time, &compat_cdq->dqb_btime) ||
++ put_user(time, &cdq->dqb_btime) ||
++ get_user(time, &compat_cdq->dqb_itime) ||
++ put_user(time, &cdq->dqb_itime))
++ break;
++ ret = sys_quotactl(cmd, special, id, cdq);
++ break;
++#endif
+ default:
+ ret = sys_quotactl(cmd, special, id, addr);
+ }
diff --git a/fs/quota/vzdquota/Makefile b/fs/quota/vzdquota/Makefile
new file mode 100644
index 0000000..03fdee3
@@ -13741,10 +16084,10 @@
+vzdquota-$(CONFIG_VZ_QUOTA_UGID) += vzdq_file.o
diff --git a/fs/quota/vzdquota/vzdq_file.c b/fs/quota/vzdquota/vzdq_file.c
new file mode 100644
-index 0000000..0355917
+index 0000000..3ac9f05
--- /dev/null
+++ b/fs/quota/vzdquota/vzdq_file.c
-@@ -0,0 +1,928 @@
+@@ -0,0 +1,956 @@
+/*
+ *
+ * Copyright (C) 2005 SWsoft
@@ -13783,12 +16126,12 @@
+ * File read operation
+ *
+ * FIXME: functions in this section (as well as many functions in vzdq_ugid.c,
-+ * perhaps) abuse vz_quota_sem.
-+ * Taking a global semaphore for lengthy and user-controlled operations inside
++ * perhaps) abuse vz_quota_mutex.
++ * Taking a global mutex for lengthy and user-controlled operations inside
+ * VPSs is not a good idea in general.
-+ * In this case, the reasons for taking this semaphore are completely unclear,
++ * In this case, the reasons for taking this mutex are completely unclear,
+ * especially taking into account that the only function that has comments
-+ * about the necessity to be called under this semaphore
++ * about the necessity to be called under this mutex
+ * (create_proc_quotafile) is actually called OUTSIDE it.
+ *
+ * --------------------------------------------------------------------- */
@@ -13817,7 +16160,7 @@
+ int type; /* type of the tree */
+};
+
-+/* serialized by vz_quota_sem */
++/* serialized by vz_quota_mutex */
+static LIST_HEAD(qf_data_head);
+
+static const u_int32_t vzquota_magics[] = V2_INITQMAGICS;
@@ -14054,8 +16397,8 @@
+ return -ENOMEM;
+
+ qtd = data;
-+ down(&vz_quota_sem);
-+ down(&qtd->qmblk->dq_sem);
++ mutex_lock(&vz_quota_mutex);
++ mutex_lock(&qtd->qmblk->dq_mutex);
+
+ res = 0;
+ tree = QUGID_TREE(qtd->qmblk, qtd->type);
@@ -14094,8 +16437,8 @@
+out_err:
+ *start += count;
+out_dq:
-+ up(&qtd->qmblk->dq_sem);
-+ up(&vz_quota_sem);
++ mutex_unlock(&qtd->qmblk->dq_mutex);
++ mutex_unlock(&vz_quota_mutex);
+ kfree(tmp);
+
+ return res;
@@ -14617,6 +16960,33 @@
+ return ERR_PTR(-ENOENT);
+}
+
++static int vzdq_aquotd_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ struct ve_struct *ve, *old_ve;
++ struct list_head mntlist, *pos;
++
++ generic_fillattr(dentry->d_inode, stat);
++ ve = dentry->d_sb->s_type->owner_env;
++#ifdef CONFIG_VE
++ /*
++ * The only reason of disabling getattr for the host system is that
++ * this getattr can be slow and CPU consuming with large number of VPSs
++ * (or just mount points).
++ */
++ if (ve_is_super(ve))
++ return 0;
++#endif
++ INIT_LIST_HEAD(&mntlist);
++ old_ve = set_exec_env(ve);
++ if (!vzdq_aquot_buildmntlist(ve, &mntlist))
++ list_for_each(pos, &mntlist)
++ stat->nlink++;
++ vzdq_aquot_releasemntlist(ve, &mntlist);
++ (void)set_exec_env(old_ve);
++ return 0;
++}
++
+static struct file_operations vzdq_aquotd_file_operations = {
+ .read = &generic_read_dir,
+ .readdir = &vzdq_aquotd_readdir,
@@ -14624,6 +16994,7 @@
+
+static struct inode_operations vzdq_aquotd_inode_operations = {
+ .lookup = &vzdq_aquotd_lookup,
++ .getattr = &vzdq_aquotd_getattr,
+};
+
+
@@ -14675,7 +17046,7 @@
+}
diff --git a/fs/quota/vzdquota/vzdq_mgmt.c b/fs/quota/vzdquota/vzdq_mgmt.c
new file mode 100644
-index 0000000..5e078ed
+index 0000000..bd066de
--- /dev/null
+++ b/fs/quota/vzdquota/vzdq_mgmt.c
@@ -0,0 +1,754 @@
@@ -14764,7 +17135,7 @@
+ struct vz_quota_stat qstat;
+ struct vz_quota_master *qmblk;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -EFAULT;
+ if (!compat) {
@@ -14792,7 +17163,7 @@
+ if (IS_ERR(qmblk)) /* ENOMEM or EEXIST */
+ err = PTR_ERR(qmblk);
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -14816,7 +17187,7 @@
+ struct super_block *dqsb;
+
+ dqsb = NULL;
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -14856,7 +17227,7 @@
+ goto out_init;
+ qmblk->dq_state = VZDQ_WORKING;
+
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ return 0;
+
+out_init:
@@ -14871,7 +17242,7 @@
+out:
+ if (dqsb)
+ vzquota_put_super(dqsb);
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ return err;
+}
+
@@ -14889,7 +17260,7 @@
+ struct vz_quota_master *qmblk;
+ struct path root;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -14907,14 +17278,14 @@
+
+ if (qmblk->dq_sb)
+ vzquota_put_super(qmblk->dq_sb);
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ qmblk_put(qmblk);
+ path_put(&root);
+ return 0;
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ return err;
+}
+
@@ -14992,7 +17363,7 @@
+ int err, ret;
+ struct vz_quota_master *qmblk;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -15015,7 +17386,7 @@
+ /* vzquota_destroy will free resources */
+ qmblk->dq_state = VZDQ_STOPING;
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -15090,7 +17461,7 @@
+ struct vz_quota_stat qstat;
+ struct vz_quota_master *qmblk;
+
-+ down(&vz_quota_sem); /* for hash list protection */
++ mutex_lock(&vz_quota_mutex); /* for hash list protection */
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -15118,7 +17489,7 @@
+ qmblk_data_write_unlock(qmblk);
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ return err;
+}
+
@@ -15133,7 +17504,7 @@
+ struct vz_quota_stat qstat;
+ struct vz_quota_master *qmblk;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -15160,7 +17531,7 @@
+ err = -EFAULT;
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ return err;
+}
+
@@ -15354,7 +17725,7 @@
+ p += len;
+ }
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ /* traverse master hash table for all records */
+ for (i = 0; i < vzquota_hash_size; i++) {
@@ -15395,7 +17766,7 @@
+
+ *eof = 1; /* checked all hash */
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ len = 0;
+ if (*start != NULL) {
@@ -15435,10 +17806,10 @@
+#endif
diff --git a/fs/quota/vzdquota/vzdq_ops.c b/fs/quota/vzdquota/vzdq_ops.c
new file mode 100644
-index 0000000..e22d573
+index 0000000..904ff5e
--- /dev/null
+++ b/fs/quota/vzdquota/vzdq_ops.c
-@@ -0,0 +1,632 @@
+@@ -0,0 +1,644 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
+ * All rights reserved.
@@ -15942,11 +18313,13 @@
+ * of vzquota.
+ *
+ * To be safe, we reacquire vzquota lock.
++ * The assumption is that it would not hurt to call
++ * vzquota_inode_drop() more than once, but it must
++ * be called at least once after S_NOQUOTA is set.
+ */
+ inode_qmblk_lock(inode->i_sb);
+ inode->i_flags |= S_NOQUOTA;
+ inode_qmblk_unlock(inode->i_sb);
-+ return;
+ } else {
+ loff_t bytes = inode_get_bytes(inode);
+#ifdef CONFIG_VZ_QUOTA_UGID
@@ -15969,9 +18342,8 @@
+#endif
+
+ vzquota_data_unlock(inode, &data);
-+
-+ vzquota_inode_drop_call(inode);
+ }
++ vzquota_inode_drop_call(inode);
+}
+
+
@@ -16035,6 +18407,12 @@
+ NO_QUOTA : QUOTA_OK;
+}
+
++static void vzquota_swap_inode(struct inode *inode, struct inode *tmpl)
++{
++ vzquota_inode_swap_call(inode, tmpl);
++}
++
++
+#else /* CONFIG_VZ_QUOTA_UGID */
+
+static int vzquota_transfer(struct inode *inode, struct iattr *iattr)
@@ -16058,6 +18436,8 @@
+ NO_QUOTA : QUOTA_OK;
+}
+
++extern void vzquota_shutdown_super(struct super_block *sb);
++
+/*
+ * Structure of superblock diskquota operations.
+ */
@@ -16070,6 +18450,9 @@
+ .free_inode = vzquota_free_inode,
+ .transfer = vzquota_transfer,
+ .rename = vzquota_rename,
++
++ .swap_inode = vzquota_swap_inode,
++ .shutdown = vzquota_shutdown_super,
+};
diff --git a/fs/quota/vzdquota/vzdq_tree.c b/fs/quota/vzdquota/vzdq_tree.c
new file mode 100644
@@ -16365,10 +18748,10 @@
+}
diff --git a/fs/quota/vzdquota/vzdq_ugid.c b/fs/quota/vzdquota/vzdq_ugid.c
new file mode 100644
-index 0000000..60e0981
+index 0000000..a3e9e8c
--- /dev/null
+++ b/fs/quota/vzdquota/vzdq_ugid.c
-@@ -0,0 +1,1220 @@
+@@ -0,0 +1,1216 @@
+/*
+ * Copyright (C) 2002 SWsoft
+ * All rights reserved.
@@ -16410,10 +18793,6 @@
+
+static struct kmem_cache *vz_quota_ugid_cachep;
+
-+/* guard to protect vz_quota_master from destroy in quota_on/off. Also protects
-+ * list on the hash table */
-+extern struct semaphore vz_quota_sem;
-+
+inline struct vz_quota_ugid *vzquota_get_ugid(struct vz_quota_ugid *qugid)
+{
+ if (qugid != VZ_QUOTA_UGBAD)
@@ -16436,7 +18815,7 @@
+
+/*
+ * destroy ugid, if it have zero refcount, limits and usage
-+ * must be called under qmblk->dq_sem
++ * must be called under qmblk->dq_mutex
+ */
+void vzquota_put_ugid(struct vz_quota_master *qmblk,
+ struct vz_quota_ugid *qugid)
@@ -16483,7 +18862,7 @@
+}
+
+/*
-+ * requires dq_sem
++ * requires dq_mutex
+ */
+struct vz_quota_ugid *__vzquota_find_ugid(struct vz_quota_master *qmblk,
+ unsigned int quota_id, int type, int flags)
@@ -16541,16 +18920,16 @@
+}
+
+/*
-+ * takes dq_sem, may schedule
++ * takes dq_mutex, may schedule
+ */
+struct vz_quota_ugid *vzquota_find_ugid(struct vz_quota_master *qmblk,
+ unsigned int quota_id, int type, int flags)
+{
+ struct vz_quota_ugid *qugid;
+
-+ down(&qmblk->dq_sem);
++ mutex_lock(&qmblk->dq_mutex);
+ qugid = __vzquota_find_ugid(qmblk, quota_id, type, flags);
-+ up(&qmblk->dq_sem);
++ mutex_unlock(&qmblk->dq_mutex);
+
+ return qugid;
+}
@@ -16705,7 +19084,7 @@
+ if (err < 0)
+ goto out_put;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+ mask2 = 0;
+ sb->dq_op = &vz_quota_operations2;
+ sb->s_qcop = &vz_quotactl_operations;
@@ -16724,7 +19103,7 @@
+ DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED, type);
+
+out_sem:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+out_put:
+ qmblk_put(qmblk);
+out:
@@ -16738,7 +19117,7 @@
+ int err;
+
+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+ err = -ESRCH;
+ if (qmblk == NULL)
+ goto out;
@@ -16759,7 +19138,7 @@
+ err = 0;
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+ qmblk_put(qmblk);
+ return err;
@@ -16778,7 +19157,7 @@
+ int err;
+
+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+ err = -ESRCH;
+ if (qmblk == NULL)
+ goto out;
@@ -16807,13 +19186,13 @@
+ }
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+ qmblk_put(qmblk);
+ return err;
+}
+
-+/* must be called under vz_quota_sem */
++/* must be called under vz_quota_mutex */
+static int __vz_set_dqblk(struct vz_quota_master *qmblk,
+ int type, qid_t id, struct if_dqblk *di)
+{
@@ -16882,7 +19261,7 @@
+ int err;
+
+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+ err = -ESRCH;
+ if (qmblk == NULL)
+ goto out;
@@ -16891,7 +19270,7 @@
+ goto out;
+ err = __vz_set_dqblk(qmblk, type, id, di);
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+ qmblk_put(qmblk);
+ return err;
@@ -16904,7 +19283,7 @@
+ int err;
+
+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+ err = -ESRCH;
+ if (qmblk == NULL)
+ goto out;
@@ -16919,13 +19298,13 @@
+ ii->dqi_valid = IIF_ALL;
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+ qmblk_put(qmblk);
+ return err;
+}
+
-+/* must be called under vz_quota_sem */
++/* must be called under vz_quota_mutex */
+static int __vz_set_dqinfo(struct vz_quota_master *qmblk,
+ int type, struct if_dqinfo *ii)
+{
@@ -16947,7 +19326,7 @@
+ int err;
+
+ qmblk = vzquota_find_qmblk(sb);
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+ err = -ESRCH;
+ if (qmblk == NULL)
+ goto out;
@@ -16956,7 +19335,7 @@
+ goto out;
+ err = __vz_set_dqinfo(qmblk, type, ii);
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ if (qmblk != NULL && qmblk != VZ_QUOTA_BAD)
+ qmblk_put(qmblk);
+ return err;
@@ -17003,8 +19382,8 @@
+ if (!kbuf)
+ goto out;
+
-+ down(&vz_quota_sem);
-+ down(&qmblk->dq_sem);
++ mutex_lock(&vz_quota_mutex);
++ mutex_lock(&qmblk->dq_mutex);
+ for (ugid = vzquota_get_byindex(qmblk, idx, type), count = 0;
+ ugid != NULL && count < Q_GETQUOTI_SIZE;
+ count++)
@@ -17019,8 +19398,8 @@
+ ugid = vzquota_get_next(qmblk, ugid);
+ BUG_ON(ugid != NULL && ugid->qugid_type != type);
+ }
-+ up(&qmblk->dq_sem);
-+ up(&vz_quota_sem);
++ mutex_unlock(&qmblk->dq_mutex);
++ mutex_unlock(&vz_quota_mutex);
+
+ err = count;
+ if (copy_to_user(dqblk, kbuf, count * sizeof(*kbuf)))
@@ -17060,7 +19439,7 @@
+ struct vz_quota_master *qmblk;
+ int ret;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ ret = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -17114,7 +19493,7 @@
+ vzquota_put_ugid(qmblk, ugid);
+ }
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return ret;
+}
@@ -17127,7 +19506,7 @@
+ struct dq_info *target;
+ int err, type;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -17161,7 +19540,7 @@
+ target->iexpire = dq_info[type].iexpire;
+ }
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -17230,16 +19609,16 @@
+ if (k_ugid_buf == NULL)
+ return -ENOMEM;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
+ if (qmblk == NULL)
+ goto out;
+
-+ down(&qmblk->dq_sem);
++ mutex_lock(&qmblk->dq_mutex);
+ err = do_quota_ugid_getstat(qmblk, index, size, k_ugid_buf);
-+ up(&qmblk->dq_sem);
++ mutex_unlock(&qmblk->dq_mutex);
+ if (err < 0)
+ goto out;
+
@@ -17265,7 +19644,7 @@
+ }
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ vfree(k_ugid_buf);
+ return err;
+}
@@ -17278,7 +19657,7 @@
+ struct dq_info *target;
+ int err, type;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -17307,7 +19686,7 @@
+#endif
+ }
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -17319,7 +19698,7 @@
+ struct vz_quota_ugid_stat kinfo;
+ int err;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -17334,7 +19713,7 @@
+ if (copy_to_user(info, &kinfo, sizeof(kinfo)))
+ err = -EFAULT;
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -17346,7 +19725,7 @@
+ struct vz_quota_ugid_stat kinfo;
+ int err;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ENOENT;
+ qmblk = vzquota_find_master(quota_id);
@@ -17366,7 +19745,7 @@
+ }
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -17378,7 +19757,7 @@
+ struct vz_quota_ugid_setlimit lim;
+ int err;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ESRCH;
+ qmblk = vzquota_find_master(quota_id);
@@ -17392,7 +19771,7 @@
+ err = __vz_set_dqblk(qmblk, lim.type, lim.id, &lim.dqb);
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -17404,7 +19783,7 @@
+ struct vz_quota_ugid_setinfo info;
+ int err;
+
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+
+ err = -ESRCH;
+ qmblk = vzquota_find_master(quota_id);
@@ -17418,7 +19797,7 @@
+ err = __vz_set_dqinfo(qmblk, info.type, &info.dqi);
+
+out:
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+
+ return err;
+}
@@ -17509,14 +19888,14 @@
+ qmblk = vzquota_find_qmblk(sb);
+ if ((qmblk == NULL) || (qmblk == VZ_QUOTA_BAD))
+ return;
-+ down(&vz_quota_sem);
++ mutex_lock(&vz_quota_mutex);
+ if (qmblk->dq_flags & VZDQ_USRQUOTA)
+ sb->s_dquot.flags |= dquot_state_flag(DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED, USRQUOTA);
+ if (qmblk->dq_flags & VZDQ_GRPQUOTA)
+ sb->s_dquot.flags |= dquot_state_flag(DQUOT_USAGE_ENABLED |
+ DQUOT_LIMITS_ENABLED, GRPQUOTA);
-+ up(&vz_quota_sem);
++ mutex_unlock(&vz_quota_mutex);
+ qmblk_put(qmblk);
+}
+
@@ -17591,10 +19970,10 @@
+}
diff --git a/fs/quota/vzdquota/vzdquot.c b/fs/quota/vzdquota/vzdquot.c
new file mode 100644
-index 0000000..6f2f22a
+index 0000000..f091943
--- /dev/null
+++ b/fs/quota/vzdquota/vzdquot.c
-@@ -0,0 +1,1961 @@
+@@ -0,0 +1,1994 @@
+/*
+ * Copyright (C) 2001, 2002, 2004, 2005 SWsoft
+ * All rights reserved.
@@ -17639,7 +20018,7 @@
+ * Serializes on/off and all other do_vzquotactl operations.
+ * Protects qmblk hash.
+ */
-+struct semaphore vz_quota_sem;
++struct mutex vz_quota_mutex;
+
+/*
+ * Data access locks
@@ -17703,7 +20082,7 @@
+ *
+ * Master hash table handling.
+ *
-+ * SMP not safe, serialied by vz_quota_sem within quota syscalls
++ * SMP not safe, serialied by vz_quota_mutex within quota syscalls
+ *
+ * --------------------------------------------------------------------- */
+
@@ -17757,7 +20136,7 @@
+#endif
+
+ qmblk->dq_state = VZDQ_STARTING;
-+ init_MUTEX(&qmblk->dq_sem);
++ mutex_init(&qmblk->dq_mutex);
+ spin_lock_init(&qmblk->dq_data_lock);
+
+ qmblk->dq_id = quota_id;
@@ -17811,7 +20190,7 @@
+ * vzquota_find_master - find master record with given id
+ *
+ * Returns qmblk without touching its refcounter.
-+ * Called under vz_quota_sem.
++ * Called under vz_quota_mutex.
+ */
+struct vz_quota_master *vzquota_find_master(unsigned int quota_id)
+{
@@ -17830,7 +20209,7 @@
+ * vzquota_free_master - release resources taken by qmblk, freeing memory
+ *
+ * qmblk is assumed to be already taken out from the hash.
-+ * Should be called outside vz_quota_sem.
++ * Should be called outside vz_quota_mutex.
+ */
+void vzquota_free_master(struct vz_quota_master *qmblk)
+{
@@ -17912,7 +20291,7 @@
+ * quotas. We keep a counter of such subtrees and set VZ quota operations or
+ * reset the default ones.
+ *
-+ * Called under vz_quota_sem (from quota_on).
++ * Called under vz_quota_mutex (from quota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
@@ -17954,7 +20333,7 @@
+ __module_get(THIS_MODULE);
+ up(&sb->s_dquot.dqonoff_sem);
+ }
-+ /* protected by vz_quota_sem */
++ /* protected by vz_quota_mutex */
+ __VZ_QUOTA_SBREF(sb)++;
+ return 0;
+}
@@ -17962,7 +20341,7 @@
+/**
+ * quota_put_super - release superblock when one quota tree goes away
+ *
-+ * Called under vz_quota_sem.
++ * Called under vz_quota_mutex.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
@@ -18004,28 +20383,17 @@
+
+#else
+
-+struct vzquota_new_sop {
-+ struct super_operations new_op;
-+ const struct super_operations *old_op;
-+};
-+
+/**
+ * vzquota_shutdown_super - callback on umount
+ */
+void vzquota_shutdown_super(struct super_block *sb)
+{
+ struct vz_quota_master *qmblk;
-+ struct vzquota_new_sop *sop;
+
+ qmblk = __VZ_QUOTA_NOQUOTA(sb);
+ __VZ_QUOTA_NOQUOTA(sb) = NULL;
+ if (qmblk != NULL)
+ qmblk_put(qmblk);
-+ sop = container_of(sb->s_op, struct vzquota_new_sop, new_op);
-+ sb->s_op = sop->old_op;
-+ kfree(sop);
-+ if (sb->s_op->put_super != NULL)
-+ (*sb->s_op->put_super)(sb);
+}
+
+/**
@@ -18034,12 +20402,11 @@
+ * One superblock can have multiple directory subtrees with different VZ
+ * quotas.
+ *
-+ * Called under vz_quota_sem (from vzquota_on).
++ * Called under vz_quota_mutex (from vzquota_on).
+ */
+int vzquota_get_super(struct super_block *sb)
+{
+ struct vz_quota_master *qnew;
-+ struct vzquota_new_sop *sop;
+ int err;
+
+ mutex_lock(&sb->s_dquot.dqonoff_mutex);
@@ -18059,17 +20426,6 @@
+ }
+
+ if (sb->dq_op != &vz_quota_operations) {
-+ sop = kmalloc(sizeof(*sop), GFP_KERNEL);
-+ if (sop == NULL) {
-+ vzquota_free_master(__VZ_QUOTA_NOQUOTA(sb));
-+ __VZ_QUOTA_NOQUOTA(sb) = NULL;
-+ goto out_up;
-+ }
-+ memcpy(&sop->new_op, sb->s_op, sizeof(sop->new_op));
-+ sop->new_op.put_super = &vzquota_shutdown_super;
-+ sop->old_op = sb->s_op;
-+ sb->s_op = &sop->new_op;
-+
+ sb->dq_op = &vz_quota_operations;
+#ifdef CONFIG_VZ_QUOTA_UGID
+ sb->s_qcop = &vz_quotactl_operations;
@@ -18115,7 +20471,7 @@
+/**
+ * vzquota_put_super - one quota tree less on this superblock
+ *
-+ * Called under vz_quota_sem.
++ * Called under vz_quota_mutex.
+ */
+void vzquota_put_super(struct super_block *sb)
+{
@@ -18194,12 +20550,12 @@
+ quid = qlnk->qugid[USRQUOTA];
+ qgid = qlnk->qugid[GRPQUOTA];
+ if (quid != NULL || qgid != NULL) {
-+ down(&qmblk->dq_sem);
++ mutex_lock(&qmblk->dq_mutex);
+ if (qgid != NULL)
+ vzquota_put_ugid(qmblk, qgid);
+ if (quid != NULL)
+ vzquota_put_ugid(qmblk, quid);
-+ up(&qmblk->dq_sem);
++ mutex_unlock(&qmblk->dq_mutex);
+ }
+ }
+#endif
@@ -18315,10 +20671,10 @@
+ spin_unlock(&dcache_lock);
+ inode_qmblk_unlock(inode->i_sb);
+
-+ down(&qmblk->dq_sem);
++ mutex_lock(&qmblk->dq_mutex);
+ quid = __vzquota_find_ugid(qmblk, inode->i_uid, USRQUOTA, 0);
+ qgid = __vzquota_find_ugid(qmblk, inode->i_gid, GRPQUOTA, 0);
-+ up(&qmblk->dq_sem);
++ mutex_unlock(&qmblk->dq_mutex);
+
+ inode_qmblk_lock(inode->i_sb);
+ spin_lock(&dcache_lock);
@@ -18361,14 +20717,14 @@
+ qmblk_data_write_unlock(qmblk);
+ inode_qmblk_unlock(inode->i_sb);
+
-+ down(&qmblk->dq_sem);
++ mutex_lock(&qmblk->dq_mutex);
+ if (mask & (1 << USRQUOTA))
+ quid = __vzquota_find_ugid(qmblk, iattr->ia_uid,
+ USRQUOTA, 0);
+ if (mask & (1 << GRPQUOTA))
+ qgid = __vzquota_find_ugid(qmblk, iattr->ia_gid,
+ GRPQUOTA, 0);
-+ up(&qmblk->dq_sem);
++ mutex_unlock(&qmblk->dq_mutex);
+
+ inode_qmblk_lock(inode->i_sb);
+ qmblk_data_write_lock(qmblk);
@@ -18529,6 +20885,29 @@
+ return qmblk;
+}
+
++/* NFS root is disconnected dentry. */
++
++static int is_nfs_root(struct inode * inode)
++{
++ struct dentry *de;
++
++ if (inode->i_sb->s_magic != 0x6969)
++ return 0;
++
++ if (list_empty(&inode->i_dentry))
++ return 0;
++
++ list_for_each_entry(de, &inode->i_dentry, d_alias) {
++ if (de->d_parent != de)
++ return 0;
++ if (d_unhashed(de))
++ return 0;
++ if (!(de->d_flags & DCACHE_DISCONNECTED))
++ return 0;
++ }
++ return 1;
++}
++
+static void vzquota_dbranch_actualize(struct inode *inode,
+ struct inode *refinode)
+{
@@ -18539,7 +20918,7 @@
+ vzquota_qlnk_init(&qlnk);
+
+start:
-+ if (inode == inode->i_sb->s_root->d_inode) {
++ if (inode == inode->i_sb->s_root->d_inode || is_nfs_root(inode)) {
+ /* filesystem root */
+ atomic_inc(&inode->i_count);
+ do {
@@ -18594,7 +20973,7 @@
+ struct inode *pinode;
+ struct vz_quota_master *qmblk;
+
-+ if (inode == inode->i_sb->s_root->d_inode) {
++ if (inode == inode->i_sb->s_root->d_inode || is_nfs_root(inode)) {
+ /* filesystem root */
+ do {
+ qmblk = __VZ_QUOTA_NOQUOTA(inode->i_sb);
@@ -18859,6 +21238,39 @@
+ spin_unlock(&dcache_lock);
+}
+
++void vzquota_inode_swap_call(struct inode *inode, struct inode *tmpl)
++{
++ struct vz_quota_master *qmblk;
++
++ __vzquota_inode_init(inode, VZ_QUOTAO_INIT);
++
++ might_sleep();
++
++ inode_qmblk_lock(tmpl->i_sb);
++ if (unlikely(tmpl->i_flags & S_NOQUOTA)) {
++ inode_qmblk_unlock(tmpl->i_sb);
++ return;
++ }
++ __vzquota_inode_init(tmpl, VZ_QUOTAO_INICAL);
++
++ qmblk = INODE_QLNK(tmpl)->qmblk;
++ if (qmblk != VZ_QUOTA_BAD) {
++ void * uq;
++ list_del_init(&INODE_QLNK(tmpl)->list);
++ vzquota_qlnk_swap(INODE_QLNK(tmpl), INODE_QLNK(inode));
++ uq = inode->i_dquot[USRQUOTA];
++ inode->i_dquot[USRQUOTA] = tmpl->i_dquot[USRQUOTA];
++ tmpl->i_dquot[USRQUOTA] = uq;
++ tmpl->i_flags |= S_NOQUOTA;
++ inode_qmblk_unlock(inode->i_sb);
++
++ vzquota_inode_drop(tmpl);
++ } else {
++ inode_qmblk_unlock(tmpl->i_sb);
++ }
++}
++
++
+/**
+ * vzquota_inode_drop_call - call from DQUOT_DROP
+ */
@@ -19513,7 +21925,7 @@
+ goto out_ugid;
+#endif
+
-+ init_MUTEX(&vz_quota_sem);
++ mutex_init(&vz_quota_mutex);
+ vzioctl_register(&vzdqcalls);
+ virtinfo_notifier_register(VITYPE_QUOTA, "a_notifier_block);
+#if defined(CONFIG_VZ_QUOTA_UGID) && defined(CONFIG_PROC_FS)
@@ -19893,10 +22305,10 @@
size_t, sizemask)
diff --git a/fs/simfs.c b/fs/simfs.c
new file mode 100644
-index 0000000..2fccd6d
+index 0000000..e21f911
--- /dev/null
+++ b/fs/simfs.c
-@@ -0,0 +1,335 @@
+@@ -0,0 +1,339 @@
+/*
+ * fs/simfs.c
+ *
@@ -20032,7 +22444,7 @@
+
+ err = -ENOSYS;
+ if (lsb && lsb->s_op && lsb->s_op->statfs)
-+ err = lsb->s_op->statfs(lsb->s_root, &statbuf);
++ err = lsb->s_op->statfs(sb->s_root, &statbuf);
+ if (err)
+ return err;
+
@@ -20074,10 +22486,12 @@
+ return (err ? NOTIFY_BAD : NOTIFY_OK);
+}
+
++#ifdef CONFIG_QUOTA
+static struct inode *sim_quota_root(struct super_block *sb)
+{
+ return sb->s_root->d_inode;
+}
++#endif
+
+/*
+ * NOTE: We need to setup s_bdev field on super block, since sys_quotactl()
@@ -20124,7 +22538,9 @@
+}
+
+static struct super_operations sim_super_ops = {
++#ifdef CONFIG_QUOTA
+ .get_quota_root = sim_quota_root,
++#endif
+};
+
+static int sim_fill_super(struct super_block *s, void *data)
@@ -20277,7 +22693,7 @@
return inode->i_op->getattr(mnt, dentry, stat);
diff --git a/fs/super.c b/fs/super.c
-index aff046b..a2e26f4 100644
+index aff046b..cce99ab 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -37,12 +37,15 @@
@@ -20314,7 +22730,13 @@
/*
* sget() can have s_umount recursion.
*
-@@ -311,7 +316,7 @@ void generic_shutdown_super(struct super_block *sb)
+@@ -307,11 +312,13 @@ void generic_shutdown_super(struct super_block *sb)
+ /* bad name - it should be evict_inodes() */
+ invalidate_inodes(sb);
+
++ if (sb->dq_op && sb->dq_op->shutdown)
++ sb->dq_op->shutdown(sb);
+ if (sop->put_super)
sop->put_super(sb);
/* Forget any remaining inodes */
@@ -20323,7 +22745,7 @@
printk("VFS: Busy inodes after unmount of %s. "
"Self-destruct in 5 seconds. Have a nice day...\n",
sb->s_id);
-@@ -531,17 +536,26 @@ rescan:
+@@ -531,17 +538,26 @@ rescan:
spin_unlock(&sb_lock);
return NULL;
}
@@ -20354,7 +22776,7 @@
err = vfs_statfs(s->s_root, &sbuf);
drop_super(s);
if (err)
-@@ -653,6 +667,13 @@ static DEFINE_IDA(unnamed_dev_ida);
+@@ -653,6 +669,13 @@ static DEFINE_IDA(unnamed_dev_ida);
static DEFINE_SPINLOCK(unnamed_dev_lock);/* protects the above */
static int unnamed_dev_start = 0; /* don't bother trying below it */
@@ -20368,7 +22790,7 @@
int set_anon_super(struct super_block *s, void *data)
{
int dev;
-@@ -672,7 +693,7 @@ int set_anon_super(struct super_block *s, void *data)
+@@ -672,7 +695,7 @@ int set_anon_super(struct super_block *s, void *data)
else if (error)
return -EAGAIN;
@@ -20377,7 +22799,7 @@
spin_lock(&unnamed_dev_lock);
ida_remove(&unnamed_dev_ida, dev);
if (unnamed_dev_start > dev)
-@@ -680,7 +701,7 @@ int set_anon_super(struct super_block *s, void *data)
+@@ -680,7 +703,7 @@ int set_anon_super(struct super_block *s, void *data)
spin_unlock(&unnamed_dev_lock);
return -EMFILE;
}
@@ -20386,7 +22808,7 @@
return 0;
}
-@@ -688,8 +709,9 @@ EXPORT_SYMBOL(set_anon_super);
+@@ -688,8 +711,9 @@ EXPORT_SYMBOL(set_anon_super);
void kill_anon_super(struct super_block *sb)
{
@@ -20856,6 +23278,51 @@
extern struct kmem_cache *sysfs_dir_cachep;
/*
+diff --git a/fs/utimes.c b/fs/utimes.c
+index e4c75db..86a62a1 100644
+--- a/fs/utimes.c
++++ b/fs/utimes.c
+@@ -40,6 +40,20 @@ SYSCALL_DEFINE2(utime, char __user *, filename, struct utimbuf __user *, times)
+
+ #endif
+
++SYSCALL_DEFINE2(lutime, char __user *, filename, struct utimbuf __user *, times)
++{
++ struct timespec tv[2];
++
++ if (times) {
++ if (get_user(tv[0].tv_sec, ×->actime) ||
++ get_user(tv[1].tv_sec, ×->modtime))
++ return -EFAULT;
++ tv[0].tv_nsec = 0;
++ tv[1].tv_nsec = 0;
++ }
++ return do_utimes(AT_FDCWD, filename, times ? tv : NULL, AT_SYMLINK_NOFOLLOW);
++}
++
+ static bool nsec_valid(long nsec)
+ {
+ if (nsec == UTIME_OMIT || nsec == UTIME_NOW)
+diff --git a/fs/xattr.c b/fs/xattr.c
+index 6d4f6d3..3243bd7 100644
+--- a/fs/xattr.c
++++ b/fs/xattr.c
+@@ -115,6 +115,15 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
+ struct inode *inode = dentry->d_inode;
+ int error;
+
++#if defined(CONFIG_VE) && defined(CONFIG_SYSCTL)
++ if (!ve_is_super(get_exec_env())) {
++ if (ve_xattr_policy == VE_XATTR_POLICY_IGNORE)
++ return 0;
++ else if (ve_xattr_policy == VE_XATTR_POLICY_REJECT)
++ return -EPERM;
++ }
++#endif
++
+ error = xattr_permission(inode, name, MAY_WRITE);
+ if (error)
+ return error;
diff --git a/include/asm-generic/mman.h b/include/asm-generic/mman.h
index 32c8bd6..cb151a4 100644
--- a/include/asm-generic/mman.h
@@ -20870,10 +23337,10 @@
#define MCL_FUTURE 2 /* lock all future mappings */
diff --git a/include/bc/beancounter.h b/include/bc/beancounter.h
new file mode 100644
-index 0000000..aabbc72
+index 0000000..7ba4c77
--- /dev/null
+++ b/include/bc/beancounter.h
-@@ -0,0 +1,454 @@
+@@ -0,0 +1,453 @@
+/*
+ * include/bc/beancounter.h
+ *
@@ -20953,12 +23420,12 @@
+/* Add new resources here */
+
+#define UB_NUMXTENT 23
-+#define UB_RESOURCES 24
++#define UB_SWAPPAGES 24
++#define UB_RESOURCES 25
+
+#define UB_UNUSEDPRIVVM (UB_RESOURCES + 0)
+#define UB_TMPFSPAGES (UB_RESOURCES + 1)
-+#define UB_SWAPPAGES (UB_RESOURCES + 2)
-+#define UB_HELDPAGES (UB_RESOURCES + 3)
++#define UB_HELDPAGES (UB_RESOURCES + 2)
+
+struct ubparm {
+ /*
@@ -21017,7 +23484,7 @@
+struct page_private {
+ unsigned long ubp_unused_privvmpages;
+ unsigned long ubp_tmpfs_respages;
-+ unsigned long ubp_swap_pages;
++ unsigned long ubp_pbcs;
+ unsigned long long ubp_held_pages;
+};
+
@@ -21046,7 +23513,6 @@
+#ifdef CONFIG_BC_DEBUG_KMEM
+ long pages_charged;
+ long vmalloc_charged;
-+ long pbcs;
+#endif
+ unsigned long sync;
+ unsigned long sync_done;
@@ -21080,6 +23546,7 @@
+
+ spinlock_t ub_lock;
+ uid_t ub_uid;
++ unsigned int ub_cookie;
+
+ struct ub_rate_info ub_limit_rl;
+ int ub_oom_noproc;
@@ -21087,8 +23554,8 @@
+ struct page_private ppriv;
+#define ub_unused_privvmpages ppriv.ubp_unused_privvmpages
+#define ub_tmpfs_respages ppriv.ubp_tmpfs_respages
-+#define ub_swap_pages ppriv.ubp_swap_pages
+#define ub_held_pages ppriv.ubp_held_pages
++#define ub_pbcs ppriv.ubp_pbcs
+ struct sock_private spriv;
+#define ub_rmem_thres spriv.ubp_rmem_thres
+#define ub_maxadvmss spriv.ubp_maxadvmss
@@ -21100,6 +23567,7 @@
+#define ub_tw_count spriv.ubp_tw_count
+
+ struct user_beancounter *parent;
++ int ub_childs;
+ void *private_data;
+ unsigned long ub_aflags;
+
@@ -21125,6 +23593,8 @@
+#endif
+};
+
++extern int ub_count;
++
+enum ub_severity { UB_HARD, UB_SOFT, UB_FORCE };
+
+#define UB_AFLAG_NOTIF_PAGEIN 0
@@ -21189,16 +23659,12 @@
+#else /* CONFIG_BEANCOUNTERS */
+
+#define ub_percpu_add(ub, field, v) do { \
-+ if (ub->ub_percpu == NULL) \
-+ break; \
+ per_cpu_ptr(ub->ub_percpu, get_cpu())->field += (v); \
+ put_cpu(); \
+ } while (0)
+#define ub_percpu_inc(ub, field) ub_percpu_add(ub, field, 1)
+
+#define ub_percpu_sub(ub, field, v) do { \
-+ if (ub->ub_percpu == NULL) \
-+ break; \
+ per_cpu_ptr(ub->ub_percpu, get_cpu())->field -= (v); \
+ put_cpu(); \
+ } while (0)
@@ -21491,10 +23957,10 @@
+#endif /* __dcache_op.h_ */
diff --git a/include/bc/debug.h b/include/bc/debug.h
new file mode 100644
-index 0000000..7b1feb6
+index 0000000..58c64f3
--- /dev/null
+++ b/include/bc/debug.h
-@@ -0,0 +1,109 @@
+@@ -0,0 +1,103 @@
+/*
+ * include/bc/debug.h
+ *
@@ -21588,17 +24054,11 @@
+ ub_percpu_sub(ub, vmalloc_charged, \
+ vm->nr_pages); \
+ } while (0)
-+
-+#define inc_pbc_count(ub) ub_percpu_inc(ub, pbcs)
-+#define dec_pbc_count(ub) ub_percpu_dec(ub, pbcs)
+#else
+#define init_cache_counters() do { } while (0)
+#define inc_vmalloc_charged(vm, f) do { } while (0)
+#define dec_vmalloc_charged(vm) do { } while (0)
+
-+#define inc_pbc_count(ub) do { } while (0)
-+#define dec_pbc_count(ub) do { } while (0)
-+
+#define ub_free_counters(ub) do { } while (0)
+#define ub_kmemcache_free(cachep) do { } while (0)
+#endif
@@ -21695,7 +24155,7 @@
+#endif /* _LINUX_UBHASH_H */
diff --git a/include/bc/io_acct.h b/include/bc/io_acct.h
new file mode 100644
-index 0000000..d84bf5a
+index 0000000..361b26c
--- /dev/null
+++ b/include/bc/io_acct.h
@@ -0,0 +1,113 @@
@@ -21714,6 +24174,8 @@
+#ifndef __UB_IO_ACCT_H_
+#define __UB_IO_ACCT_H_
+
++#define PAGE_IO_MARK (0x1UL)
++
+#ifdef CONFIG_BC_IO_ACCOUNTING
+#include <bc/beancounter.h>
+#include <bc/rss_pages.h>
@@ -21748,8 +24210,6 @@
+extern void ub_io_save_context(struct page *, size_t);
+extern void ub_io_release_context(struct page *pg, size_t size);
+
-+#define PAGE_IO_MARK (0x1UL)
-+
+static inline struct page_beancounter *iopb_to_pb(struct page_beancounter *pb)
+{
+ if (!((unsigned long)pb & PAGE_IO_MARK))
@@ -22876,7 +25336,7 @@
+
#endif /* __LINUX__AIO_H */
diff --git a/include/linux/capability.h b/include/linux/capability.h
-index c8f2a5f..3f85123 100644
+index c8f2a5f..301d709 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -197,12 +197,9 @@ struct cpu_vfs_cap_data {
@@ -22926,7 +25386,7 @@
/* Allow setting readahead and flushing buffers on block devices */
/* Allow setting geometry in floppy driver */
/* Allow turning DMA on/off in xd driver */
-@@ -340,6 +333,50 @@ struct cpu_vfs_cap_data {
+@@ -340,6 +333,61 @@ struct cpu_vfs_cap_data {
#define CAP_SETFCAP 31
@@ -22938,10 +25398,21 @@
+ */
+
+/* Allow access to all information. In the other case some structures will be
-+ hiding to ensure different Virtual Environment non-interaction on the same
-+ node */
++ * hiding to ensure different Virtual Environment non-interaction on the same
++ * node (NOW OBSOLETED)
++ */
+#define CAP_SETVEID 29
+
++#define capable_setveid() ({ \
++ ve_is_super(get_exec_env()) && \
++ (capable(CAP_SYS_ADMIN) || \
++ capable(CAP_VE_ADMIN)); \
++ })
++
++/*
++ * coinsides with CAP_AUDIT_CONTROL but we don't care, since
++ * audit is disabled in Virtuozzo
++ */
+#define CAP_VE_ADMIN 30
+
+#ifdef CONFIG_VE
@@ -22977,7 +25448,7 @@
/* Override MAC access.
The base kernel enforces no MAC policy.
An LSM may enforce a MAC policy, and if it does and it chooses
-@@ -418,7 +455,16 @@ struct cpu_vfs_cap_data {
+@@ -418,7 +466,16 @@ struct cpu_vfs_cap_data {
#define CAP_INIT_INH_SET CAP_EMPTY_SET
# define cap_clear(c) do { (c) = __cap_empty_set; } while (0)
@@ -22994,7 +25465,7 @@
# define cap_set_init_eff(c) do { (c) = __cap_init_eff_set; } while (0)
#define cap_raise(c, flag) ((c).cap[CAP_TO_INDEX(flag)] |= CAP_TO_MASK(flag))
-@@ -536,6 +582,10 @@ extern const kernel_cap_t __cap_empty_set;
+@@ -536,6 +593,10 @@ extern const kernel_cap_t __cap_empty_set;
extern const kernel_cap_t __cap_full_set;
extern const kernel_cap_t __cap_init_eff_set;
@@ -23061,10 +25532,10 @@
+
diff --git a/include/linux/cpt_image.h b/include/linux/cpt_image.h
new file mode 100644
-index 0000000..6ab78b7
+index 0000000..8185d4e
--- /dev/null
+++ b/include/linux/cpt_image.h
-@@ -0,0 +1,1799 @@
+@@ -0,0 +1,1842 @@
+/*
+ *
+ * include/linux/cpt_image.h
@@ -23187,11 +25658,15 @@
+#define CPT_VERSION_16 0x200
+#define CPT_VERSION_18 0x300
+#define CPT_VERSION_18_1 0x301
++#define CPT_VERSION_18_2 0x302
++#define CPT_VERSION_18_3 0x303
+#define CPT_VERSION_20 0x400
+#define CPT_VERSION_24 0x500
+#define CPT_VERSION_26 0x600
+#define CPT_VERSION_27 0x700
++#define CPT_VERSION_27_3 0x703
+#define CPT_VERSION_32 0x800
++#define CPT_CURRENT_VERSION CPT_VERSION_32
+ __u16 cpt_os_arch; /* Architecture */
+#define CPT_OS_ARCH_I386 0
+#define CPT_OS_ARCH_EMT64 1
@@ -23238,6 +25713,7 @@
+#define CPT_BIND_MOUNT 21
+#define CPT_UNSUPPORTED_NETDEV 22
+#define CPT_UNSUPPORTED_MISC 23
++#define CPT_SLM_DMPRST 24
+
+/* This mask is used to determine whether VE
+ has some unsupported features or not */
@@ -23291,6 +25767,7 @@
+ CPT_SECT_VSYSCALL,
+ CPT_SECT_INOTIFY,
+ CPT_SECT_SYSV_MSG,
++ CPT_SECT_SNMP_STATS,
+ CPT_SECT_MAX
+};
+
@@ -23380,7 +25857,7 @@
+
+ /* later extension */
+ __u32 last_pid;
-+ __u32 pad1;
++ __u32 rnd_va_space;
+ __u64 reserved[8];
+} __attribute__ ((aligned (8)));
+
@@ -23409,6 +25886,8 @@
+#define CPT_DENTRY_INOTIFY 0x40
+#define CPT_DENTRY_FUTEX 0x80
+#define CPT_DENTRY_TUNTAP 0x100
++#define CPT_DENTRY_PROCPID_DEAD 0x200
++#define CPT_DENTRY_HARDLINKED 0x400
+#define CPT_DENTRY_SIGNALFD 0x800
+ __u64 cpt_inode;
+ __u64 cpt_priv;
@@ -24373,6 +26852,8 @@
+
+ __u64 cpt_state;
+ __u64 cpt_flags;
++#define CPT_TASK_FLAGS_MASK (PF_EXITING | PF_FORKNOEXEC | \
++ PF_SUPERPRIV | PF_DUMPCORE | PF_SIGNALED)
+ __u64 cpt_ptrace;
+ __u32 cpt_prio;
+ __u32 cpt_static_prio;
@@ -24771,6 +27252,39 @@
+ __u32 cpt_mark;
+} __attribute__ ((aligned (8)));
+
++/* cpt_ip_conntrack_image struct from 2.6.9 kernel */
++struct cpt_ip_conntrack_image_compat
++{
++ __u64 cpt_next;
++ __u32 cpt_object;
++ __u16 cpt_hdrlen;
++ __u16 cpt_content;
++
++ struct cpt_ipct_tuple cpt_tuple[2];
++ __u64 cpt_status;
++ __u64 cpt_timeout;
++ __u32 cpt_index;
++ __u8 cpt_ct_helper;
++ __u8 cpt_nat_helper;
++ __u16 __cpt_pad1;
++
++ /* union ip_conntrack_proto. Used by tcp and icmp. */
++ __u32 cpt_proto_data[12];
++
++ /* union ip_conntrack_help. Used only by ftp helper. */
++ __u32 cpt_help_data[4];
++
++ /* nat info */
++ __u32 cpt_initialized;
++ __u32 cpt_num_manips;
++ struct cpt_nat_manip cpt_nat_manips[6];
++
++ struct cpt_nat_seq cpt_nat_seq[2];
++
++ __u32 cpt_masq_index;
++ __u32 __cpt_pad2;
++} __attribute__ ((aligned (8)));
++
+struct cpt_ubparm
+{
+ __u64 barrier;
@@ -24789,7 +27303,7 @@
+
+ __u64 cpt_parent;
+ __u32 cpt_id;
-+ __u32 __cpt_pad;
++ __u32 cpt_ub_resources;
+ struct cpt_ubparm cpt_parms[32 * 2];
+} __attribute__ ((aligned (8)));
+
@@ -24866,10 +27380,10 @@
+#endif /* __CPT_IMAGE_H_ */
diff --git a/include/linux/cpt_ioctl.h b/include/linux/cpt_ioctl.h
new file mode 100644
-index 0000000..b8e83cc
+index 0000000..f31b66c
--- /dev/null
+++ b/include/linux/cpt_ioctl.h
-@@ -0,0 +1,43 @@
+@@ -0,0 +1,45 @@
+/*
+ *
+ * include/linux/cpt_ioctl.h
@@ -24911,6 +27425,8 @@
+#define CPT_SET_ERRORFD _IOW(CPTCTLTYPE, 21, int)
+
+#define CPT_ITER _IOW(CPTCTLTYPE, 23, int)
++#define CPT_LINKDIR_ADD _IOW(CPTCTLTYPE, 24, int)
++#define CPT_HARDLNK_ON _IOW(CPTCTLTYPE, 25, int)
+
+#endif
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
@@ -25153,10 +27669,10 @@
static inline void eventpoll_init_file(struct file *file) {}
diff --git a/include/linux/fairsched.h b/include/linux/fairsched.h
new file mode 100644
-index 0000000..e08c84d
+index 0000000..521455c
--- /dev/null
+++ b/include/linux/fairsched.h
-@@ -0,0 +1,86 @@
+@@ -0,0 +1,92 @@
+/*
+ * Fair Scheduler
+ *
@@ -25229,6 +27745,9 @@
+asmlinkage int sys_fairsched_chwt(unsigned int id, unsigned int weight);
+asmlinkage int sys_fairsched_rate(unsigned int id, int op, unsigned rate);
+
++int fairsched_new_node(int id, unsigned int vcpus);
++void fairsched_drop_node(int id);
++
+#else /* CONFIG_VZ_FAIRSCHED */
+
+static inline void fairsched_init_early(void) { }
@@ -25237,6 +27756,9 @@
+static inline void get_task_fairsched_node(struct task_struct *p) { }
+static inline void put_task_fairsched_node(struct task_struct *p) { }
+
++static inline int fairsched_new_node(int id, unsigned int vcpus) { return 0; }
++static inline void fairsched_drop_node(int id) { }
++
+#define INIT_VZ_FAIRSCHED
+
+#endif /* CONFIG_VZ_FAIRSCHED */
@@ -25306,10 +27828,10 @@
+
#endif /* __LINUX_FILE_H */
diff --git a/include/linux/freezer.h b/include/linux/freezer.h
-index 5a361f8..9426083 100644
+index da7e52b..099191c 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
-@@ -160,6 +160,8 @@ static inline void set_freezable_with_signal(void)
+@@ -163,6 +163,8 @@ static inline void set_freezable_with_signal(void)
} while (try_to_freeze()); \
__retval; \
})
@@ -25319,7 +27841,7 @@
static inline int frozen(struct task_struct *p) { return 0; }
static inline int freezing(struct task_struct *p) { return 0; }
diff --git a/include/linux/fs.h b/include/linux/fs.h
-index 692a3ee..53547b0 100644
+index 9b67805..3fef9ef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -53,6 +53,7 @@ struct inodes_stat_t {
@@ -25348,7 +27870,17 @@
#define FS_REVAL_DOT 16384 /* Check the paths ".", ".." for staleness */
#define FS_RENAME_DOES_D_MOVE 32768 /* FS will handle d_move()
* during rename() internally.
-@@ -370,7 +375,6 @@ struct inodes_stat_t {
+@@ -235,6 +240,9 @@ struct inodes_stat_t {
+ #define S_SWAPFILE 256 /* Do not truncate: swapon got its bmaps */
+ #define S_PRIVATE 512 /* Inode is fs-internal */
+
++/* VZ flags -- These are not upstream! */
++#define S_NOUNUSE (1 << 17) /* just destroy inode in cleanup */
++
+ /*
+ * Note that nosuid etc flags are inode-specific: setting some file-system
+ * flags just means all the inodes inherit those flags by default. It might be
+@@ -370,7 +378,6 @@ struct inodes_stat_t {
#include <linux/path.h>
#include <linux/stat.h>
#include <linux/cache.h>
@@ -25356,7 +27888,7 @@
#include <linux/list.h>
#include <linux/radix-tree.h>
#include <linux/prio_tree.h>
-@@ -405,6 +409,7 @@ extern int get_max_files(void);
+@@ -405,6 +412,7 @@ extern int get_max_files(void);
extern int sysctl_nr_open;
extern struct inodes_stat_t inodes_stat;
extern int leases_enable, lease_break_time;
@@ -25364,7 +27896,7 @@
#ifdef CONFIG_DNOTIFY
extern int dir_notify_enable;
#endif
-@@ -464,10 +469,15 @@ struct iattr {
+@@ -464,10 +472,15 @@ struct iattr {
struct file *ia_file;
};
@@ -25380,7 +27912,7 @@
/**
* enum positive_aop_returns - aop return codes with specific semantics
-@@ -754,6 +764,9 @@ struct inode {
+@@ -754,6 +767,9 @@ struct inode {
#ifdef CONFIG_QUOTA
struct dquot *i_dquot[MAXQUOTAS];
#endif
@@ -25390,7 +27922,7 @@
struct list_head i_devices;
union {
struct pipe_inode_info *i_pipe;
-@@ -809,6 +822,8 @@ enum inode_i_mutex_lock_class
+@@ -809,6 +825,8 @@ enum inode_i_mutex_lock_class
I_MUTEX_QUOTA
};
@@ -25399,7 +27931,7 @@
/*
* NOTE: in a 32bit arch with a preemptable kernel and
* an UP compile the i_size_read/write must be atomic
-@@ -929,6 +944,7 @@ struct file {
+@@ -929,6 +947,7 @@ struct file {
struct fown_struct f_owner;
const struct cred *f_cred;
struct file_ra_state f_ra;
@@ -25407,7 +27939,7 @@
u64 f_version;
#ifdef CONFIG_SECURITY
-@@ -945,6 +961,7 @@ struct file {
+@@ -945,6 +964,7 @@ struct file {
#ifdef CONFIG_DEBUG_WRITECOUNT
unsigned long f_mnt_write_state;
#endif
@@ -25415,7 +27947,7 @@
};
extern spinlock_t files_lock;
#define file_list_lock() spin_lock(&files_lock);
-@@ -1063,6 +1080,9 @@ struct file_lock {
+@@ -1063,6 +1083,9 @@ struct file_lock {
fl_owner_t fl_owner;
unsigned char fl_flags;
unsigned char fl_type;
@@ -25425,7 +27957,7 @@
unsigned int fl_pid;
struct pid *fl_nspid;
wait_queue_head_t fl_wait;
-@@ -1509,6 +1529,7 @@ struct file_operations {
+@@ -1509,6 +1532,7 @@ struct file_operations {
ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
int (*setlease)(struct file *, long, struct file_lock **);
@@ -25433,7 +27965,7 @@
};
struct inode_operations {
-@@ -1578,6 +1599,7 @@ struct super_operations {
+@@ -1578,6 +1602,7 @@ struct super_operations {
#ifdef CONFIG_QUOTA
ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
@@ -25441,7 +27973,7 @@
#endif
int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
};
-@@ -1755,8 +1777,14 @@ struct file_system_type {
+@@ -1755,8 +1780,14 @@ struct file_system_type {
struct lock_class_key i_mutex_key;
struct lock_class_key i_mutex_dir_key;
struct lock_class_key i_alloc_sem_key;
@@ -25456,7 +27988,7 @@
extern int get_sb_ns(struct file_system_type *fs_type, int flags, void *data,
int (*fill_super)(struct super_block *, void *, int),
struct vfsmount *mnt);
-@@ -1800,6 +1828,11 @@ extern int register_filesystem(struct file_system_type *);
+@@ -1800,13 +1831,20 @@ extern int register_filesystem(struct file_system_type *);
extern int unregister_filesystem(struct file_system_type *);
extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
#define kern_mount(type) kern_mount_data(type, NULL)
@@ -25466,9 +27998,10 @@
+extern void umount_ve_fs_type(struct file_system_type *local_fs_type);
+#define kern_umount mntput
extern int may_umount_tree(struct vfsmount *);
++extern struct vfsmount *next_mnt(struct vfsmount *p, struct vfsmount *root);
extern int may_umount(struct vfsmount *);
extern long do_mount(char *, char *, char *, unsigned long, void *);
-@@ -1807,6 +1840,7 @@ extern struct vfsmount *collect_mounts(struct path *);
+ extern struct vfsmount *collect_mounts(struct path *);
extern void drop_collected_mounts(struct vfsmount *);
extern int vfs_statfs(struct dentry *, struct kstatfs *);
@@ -25476,7 +28009,7 @@
extern int current_umask(void);
-@@ -2065,7 +2099,8 @@ extern int check_disk_change(struct block_device *);
+@@ -2065,7 +2103,8 @@ extern int check_disk_change(struct block_device *);
extern int __invalidate_device(struct block_device *);
extern int invalidate_partition(struct gendisk *, int);
#endif
@@ -25486,7 +28019,7 @@
unsigned long invalidate_mapping_pages(struct address_space *mapping,
pgoff_t start, pgoff_t end);
-@@ -2477,6 +2512,17 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
+@@ -2478,6 +2517,17 @@ ssize_t simple_attr_read(struct file *file, char __user *buf,
ssize_t simple_attr_write(struct file *file, const char __user *buf,
size_t len, loff_t *ppos);
@@ -26009,7 +28542,7 @@
+
#endif
diff --git a/include/linux/mm.h b/include/linux/mm.h
-index 24c3956..d38e63e 100644
+index 24c3956..7bb1cf3 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -712,6 +712,7 @@ extern void pagefault_out_of_memory(void);
@@ -26040,6 +28573,20 @@
int set_page_dirty(struct page *page);
int set_page_dirty_lock(struct page *page);
int clear_page_dirty_for_io(struct page *page);
+@@ -1294,7 +1297,12 @@ unsigned long shrink_slab(unsigned long scanned, gfp_t gfp_mask,
+ #ifndef CONFIG_MMU
+ #define randomize_va_space 0
+ #else
+-extern int randomize_va_space;
++extern int _randomize_va_space;
++#ifndef CONFIG_VE
++#define randomize_va_space _randomize_va_space
++#else
++#define randomize_va_space (get_exec_env()->_randomize_va_space)
++#endif
+ #endif
+
+ const char * arch_vma_name(struct vm_area_struct *vma);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84a524a..8ecf0ec 100644
--- a/include/linux/mm_types.h
@@ -26607,8 +29154,21 @@
+};
+
+#endif /*_IPT_OWNER_H*/
+diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
+index d09db1b..5b36364 100644
+--- a/include/linux/nfs_fs.h
++++ b/include/linux/nfs_fs.h
+@@ -374,7 +374,7 @@ extern const struct address_space_operations nfs_file_aops;
+
+ static inline struct nfs_open_context *nfs_file_open_context(struct file *filp)
+ {
+- return filp->private_data;
++ return file_private(filp);
+ }
+
+ static inline struct rpc_cred *nfs_file_cred(struct file *file)
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
-index 320569e..8e0d228 100644
+index b26dc51..643e380 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -91,6 +91,7 @@ struct nfs_client {
@@ -26619,6 +29179,17 @@
};
/*
+diff --git a/include/linux/nmi.h b/include/linux/nmi.h
+index b752e80..ed9d975 100644
+--- a/include/linux/nmi.h
++++ b/include/linux/nmi.h
+@@ -47,4 +47,6 @@ static inline bool trigger_all_cpu_backtrace(void)
+ }
+ #endif
+
++extern void nmi_show_regs(struct pt_regs *regs, int in_nmi);
++extern int do_nmi_show_regs(struct pt_regs *regs, int cpu);
+ #endif
diff --git a/include/linux/notifier.h b/include/linux/notifier.h
index 44428d2..a3a0a02 100644
--- a/include/linux/notifier.h
@@ -26763,10 +29334,10 @@
#endif /* KERNEL */
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
-index 379eaed..52c8b17 100644
+index 379eaed..80bd26a 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
-@@ -103,6 +103,8 @@ struct vmcore {
+@@ -103,9 +103,14 @@ struct vmcore {
#ifdef CONFIG_PROC_FS
extern void proc_root_init(void);
@@ -26775,7 +29346,13 @@
void proc_flush_task(struct task_struct *task);
-@@ -149,6 +151,8 @@ extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *);
++extern int proc_dentry_of_dead_task(struct dentry *dentry);
++extern struct file_operations dummy_proc_pid_file_operations;
++
+ extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
+ struct proc_dir_entry *parent);
+ struct proc_dir_entry *proc_create_data(const char *name, mode_t mode,
+@@ -149,6 +154,8 @@ extern struct proc_dir_entry *proc_mkdir(const char *,struct proc_dir_entry *);
extern struct proc_dir_entry *proc_mkdir_mode(const char *name, mode_t mode,
struct proc_dir_entry *parent);
@@ -26784,7 +29361,16 @@
static inline struct proc_dir_entry *proc_create(const char *name, mode_t mode,
struct proc_dir_entry *parent, const struct file_operations *proc_fops)
{
-@@ -268,6 +272,9 @@ struct proc_inode {
+@@ -184,6 +191,8 @@ extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
+ #define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
+ static inline void proc_net_remove(struct net *net, const char *name) {}
+
++static inline int proc_dentry_of_dead_task(struct dentry *dentry) { return 0; }
++
+ static inline void proc_flush_task(struct task_struct *task)
+ {
+ }
+@@ -268,6 +277,9 @@ struct proc_inode {
struct proc_dir_entry *pde;
struct ctl_table_header *sysctl;
struct ctl_table *sysctl_entry;
@@ -26794,7 +29380,7 @@
struct inode vfs_inode;
};
-@@ -281,6 +288,15 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
+@@ -281,6 +293,15 @@ static inline struct proc_dir_entry *PDE(const struct inode *inode)
return PROC_I(inode)->pde;
}
@@ -26811,7 +29397,7 @@
{
return pde->parent->data;
diff --git a/include/linux/quota.h b/include/linux/quota.h
-index 8fd8efc..8cd6b71 100644
+index 8fd8efc..5fa291e 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -173,6 +173,10 @@ enum {
@@ -26834,11 +29420,14 @@
/* Operations working with dquots */
struct dquot_operations {
int (*initialize) (struct inode *, int);
-@@ -316,9 +322,11 @@ struct dquot_operations {
+@@ -316,9 +322,14 @@ struct dquot_operations {
/* get reserved quota for delayed alloc, value returned is managed by
* quota code only */
qsize_t *(*get_reserved_space) (struct inode *);
+ int (*rename) (struct inode *, struct inode *, struct inode *);
++
++ void (*swap_inode) (struct inode *, struct inode *);
++ void (*shutdown) (struct super_block *);
};
/* Operations handling requests from userspace */
@@ -26846,7 +29435,7 @@
struct quotactl_ops {
int (*quota_on)(struct super_block *, int, int, char *, int);
int (*quota_off)(struct super_block *, int, int);
-@@ -331,6 +339,10 @@ struct quotactl_ops {
+@@ -331,6 +342,10 @@ struct quotactl_ops {
int (*set_xstate)(struct super_block *, unsigned int, int);
int (*get_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
int (*set_xquota)(struct super_block *, int, qid_t, struct fs_disk_quota *);
@@ -26857,7 +29446,7 @@
};
struct quota_format_type {
-@@ -385,6 +397,10 @@ struct quota_info {
+@@ -385,6 +400,10 @@ struct quota_info {
struct inode *files[MAXQUOTAS]; /* inodes of quotafiles */
struct mem_dqinfo info[MAXQUOTAS]; /* Information for each quota type */
struct quota_format_ops *ops[MAXQUOTAS]; /* Operations for each type */
@@ -26869,7 +29458,7 @@
int register_quota_format(struct quota_format_type *fmt);
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
-index a529d86..579a15c 100644
+index a529d86..bdbe1f7 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -264,6 +264,19 @@ static inline void vfs_dq_free_inode(struct inode *inode)
@@ -26892,7 +29481,43 @@
/* Cannot be called inside a transaction */
static inline int vfs_dq_off(struct super_block *sb, int remount)
{
-@@ -363,6 +376,12 @@ static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
+@@ -274,6 +287,35 @@ static inline int vfs_dq_off(struct super_block *sb, int remount)
+ return ret;
+ }
+
++static __inline__ void DQUOT_SWAP(struct inode *inode, struct inode *tmpl)
++{
++ if (sb_any_quota_active(tmpl->i_sb) &&
++ tmpl->i_sb->dq_op->swap_inode)
++ tmpl->i_sb->dq_op->swap_inode(inode, tmpl);
++}
++
++static __inline__ int DQUOT_CHECK_SPACE(struct inode *inode)
++{
++ if (vfs_dq_alloc_space_nodirty(inode, 512))
++ return -EDQUOT;
++ vfs_dq_free_space_nodirty(inode, 512);
++ return 0;
++}
++
++static __inline__ void DQUOT_SYNC_BLOCKS(struct inode *inode, blkcnt_t blocks)
++{
++ if (sb_any_quota_active(inode->i_sb)) {
++ if (blocks > inode->i_blocks)
++ inode->i_sb->dq_op->alloc_space(inode,
++ (qsize_t)(blocks-inode->i_blocks)*512,
++ 13 /*DQUOT_CMD_FORCE*/);
++ else if (blocks < inode->i_blocks)
++ inode->i_sb->dq_op->free_space(inode, (qsize_t)(inode->i_blocks-blocks)*512);
++ } else
++ inode->i_blocks = blocks;
++}
++
++
+ #else
+
+ static inline int sb_has_quota_usage_enabled(struct super_block *sb, int type)
+@@ -363,6 +405,12 @@ static inline int vfs_dq_transfer(struct inode *inode, struct iattr *iattr)
return 0;
}
@@ -26905,6 +29530,22 @@
static inline int vfs_dq_prealloc_space_nodirty(struct inode *inode, qsize_t nr)
{
inode_add_bytes(inode, nr);
+@@ -416,6 +464,15 @@ static inline void vfs_dq_free_space(struct inode *inode, qsize_t nr)
+ mark_inode_dirty(inode);
+ }
+
++static inline void DQUOT_SWAP(struct inode *inode, struct inode *tmpl)
++{
++}
++
++static inline void DQUOT_SYNC_BLOCKS(struct inode *inode, blkcnt_t blocks)
++{
++ inode->i_blocks = blocks;
++}
++
+ #endif /* CONFIG_QUOTA */
+
+ static inline int vfs_dq_prealloc_block_nodirty(struct inode *inode, qsize_t nr)
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index cb0ba70..b14f124 100644
--- a/include/linux/rmap.h
@@ -26919,7 +29560,7 @@
static inline void page_dup_rmap(struct page *page)
{
diff --git a/include/linux/sched.h b/include/linux/sched.h
-index 70abfd3..fa44cc6 100644
+index 70abfd3..d6155c1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -94,6 +94,8 @@ struct sched_param {
@@ -26979,15 +29620,7 @@
extern void calc_global_load(void);
-@@ -286,6 +313,7 @@ static inline void show_state(void)
- }
-
- extern void show_regs(struct pt_regs *);
-+extern void smp_show_regs(struct pt_regs *, void *);
-
- /*
- * TASK is a pointer to the task whose backtrace we want to see (or NULL for current
-@@ -553,6 +581,9 @@ struct thread_group_cputimer {
+@@ -553,6 +580,9 @@ struct thread_group_cputimer {
spinlock_t lock;
};
@@ -26997,7 +29630,7 @@
/*
* NOTE! "signal_struct" does not have it's own
* locking, because a shared signal_struct always
-@@ -1283,6 +1314,7 @@ struct task_struct {
+@@ -1283,6 +1313,7 @@ struct task_struct {
unsigned in_execve:1; /* Tell the LSMs that the process is doing an
* execve */
unsigned in_iowait:1;
@@ -27005,7 +29638,7 @@
/* Revert to default priority/policy when forking */
-@@ -1498,6 +1530,14 @@ struct task_struct {
+@@ -1498,6 +1529,14 @@ struct task_struct {
struct rcu_head rcu;
/*
@@ -27020,7 +29653,7 @@
* cache last used pipe for splice
*/
struct pipe_inode_info *splice_pipe;
-@@ -1542,6 +1582,19 @@ struct task_struct {
+@@ -1542,6 +1581,19 @@ struct task_struct {
unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
unsigned long stack_start;
@@ -27040,7 +29673,7 @@
};
/* Future-safe accessor for struct task_struct's cpus_allowed. */
-@@ -1727,6 +1780,43 @@ extern cputime_t task_utime(struct task_struct *p);
+@@ -1727,6 +1779,43 @@ extern cputime_t task_utime(struct task_struct *p);
extern cputime_t task_stime(struct task_struct *p);
extern cputime_t task_gtime(struct task_struct *p);
@@ -27084,7 +29717,7 @@
/*
* Per process flags
*/
-@@ -1736,6 +1826,7 @@ extern cputime_t task_gtime(struct task_struct *p);
+@@ -1736,6 +1825,7 @@ extern cputime_t task_gtime(struct task_struct *p);
#define PF_EXITING 0x00000004 /* getting shut down */
#define PF_EXITPIDONE 0x00000008 /* pi exit done on shut down */
#define PF_VCPU 0x00000010 /* I'm a virtual CPU */
@@ -27092,7 +29725,7 @@
#define PF_FORKNOEXEC 0x00000040 /* forked but didn't exec */
#define PF_MCE_PROCESS 0x00000080 /* process policy on mce errors */
#define PF_SUPERPRIV 0x00000100 /* used super-user privileges */
-@@ -1872,6 +1963,21 @@ extern unsigned long long
+@@ -1872,6 +1962,21 @@ extern unsigned long long
task_sched_runtime(struct task_struct *task);
extern unsigned long long thread_group_sched_runtime(struct task_struct *task);
@@ -27114,7 +29747,7 @@
/* sched_exec is called by processes performing an exec */
#ifdef CONFIG_SMP
extern void sched_exec(void);
-@@ -2151,6 +2257,13 @@ extern int disallow_signal(int);
+@@ -2151,6 +2256,13 @@ extern int disallow_signal(int);
extern int do_execve(char *, char __user * __user *, char __user * __user *, struct pt_regs *);
extern long do_fork(unsigned long, unsigned long, struct pt_regs *, unsigned long, int __user *, int __user *);
@@ -27128,7 +29761,7 @@
struct task_struct *fork_idle(int);
extern void set_task_comm(struct task_struct *tsk, char *from);
-@@ -2168,11 +2281,11 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
+@@ -2168,11 +2280,11 @@ static inline unsigned long wait_task_inactive(struct task_struct *p,
}
#endif
@@ -27143,7 +29776,7 @@
extern bool current_is_single_threaded(void);
-@@ -2180,10 +2293,10 @@ extern bool current_is_single_threaded(void);
+@@ -2180,10 +2292,10 @@ extern bool current_is_single_threaded(void);
* Careful: do_each_thread/while_each_thread is a double loop so
* 'break' will not work as expected - use goto instead.
*/
@@ -27157,7 +29790,7 @@
while ((t = next_thread(t)) != g)
/* de_thread depends on thread_group_leader not being a pid based check */
-@@ -2208,8 +2321,14 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
+@@ -2208,8 +2320,14 @@ int same_thread_group(struct task_struct *p1, struct task_struct *p2)
static inline struct task_struct *next_thread(const struct task_struct *p)
{
@@ -27173,7 +29806,7 @@
}
static inline int thread_group_empty(struct task_struct *p)
-@@ -2254,6 +2373,98 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
+@@ -2254,6 +2372,98 @@ static inline void unlock_task_sighand(struct task_struct *tsk,
spin_unlock_irqrestore(&tsk->sighand->siglock, *flags);
}
@@ -27690,42 +30323,6 @@
if (!s)
return ZERO_SIZE_PTR;
-diff --git a/include/linux/smp.h b/include/linux/smp.h
-index 39c64ba..7b81017 100644
---- a/include/linux/smp.h
-+++ b/include/linux/smp.h
-@@ -13,6 +13,9 @@
-
- extern void cpu_idle(void);
-
-+struct pt_regs;
-+typedef void (*smp_nmi_function)(struct pt_regs *regs, void *info);
-+
- struct call_single_data {
- struct list_head list;
- void (*func) (void *info);
-@@ -66,6 +69,8 @@ extern int __cpu_up(unsigned int cpunum);
- */
- extern void smp_cpus_done(unsigned int max_cpus);
-
-+extern int smp_nmi_call_function(smp_nmi_function func, void *info, int wait);
-+
- /*
- * Call a function on all other processors
- */
-@@ -140,6 +145,12 @@ static inline void smp_send_reschedule(int cpu) { }
- static inline void init_call_single_data(void)
- {
- }
-+static inline int smp_nmi_call_function(smp_nmi_function func,
-+ void *info, int wait)
-+{
-+ return 0;
-+}
-+
- #endif /* !SMP */
-
- /*
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 3273a0c..87cf3d1 100644
--- a/include/linux/socket.h
@@ -28205,10 +30802,10 @@
static inline void get_uts_ns(struct uts_namespace *ns)
diff --git a/include/linux/ve.h b/include/linux/ve.h
new file mode 100644
-index 0000000..8f8d083
+index 0000000..e0e045a
--- /dev/null
+++ b/include/linux/ve.h
-@@ -0,0 +1,361 @@
+@@ -0,0 +1,367 @@
+/*
+ * include/linux/ve.h
+ *
@@ -28340,9 +30937,9 @@
+ cycles_t strt_idle_time;
+ cycles_t used_time;
+ seqcount_t stat_lock;
-+ int nr_running;
-+ int nr_unint;
-+ int nr_iowait;
++ unsigned long nr_running;
++ unsigned long nr_unint;
++ unsigned long nr_iowait;
+ cputime64_t user;
+ cputime64_t nice;
+ cputime64_t system;
@@ -28481,6 +31078,7 @@
+ struct ve_monitor *monitor;
+ struct proc_dir_entry *monitor_proc;
+ unsigned long meminfo_val;
++ int _randomize_va_space;
+
+#if defined(CONFIG_NFS_FS) || defined(CONFIG_NFS_FS_MODULE) \
+ || defined(CONFIG_NFSD) || defined(CONFIG_NFSD_MODULE)
@@ -28491,6 +31089,14 @@
+ struct svc_rqst* _nlmsvc_rqst;
+#endif
+
++#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
++ struct file_system_type *bm_fs_type;
++ struct vfsmount *bm_mnt;
++ int bm_enabled;
++ int bm_entry_count;
++ struct list_head bm_entries;
++#endif
++
+ struct nsproxy *ve_ns;
+ struct user_namespace *user_ns;
+ struct net *ve_netns;
@@ -28507,10 +31113,7 @@
+extern struct ve_cpu_stats static_ve_cpu_stats;
+static inline struct ve_cpu_stats *VE_CPU_STATS(struct ve_struct *ve, int cpu)
+{
-+ if (ve->cpu_stats == NULL)
-+ return &static_ve_cpu_stats;
-+ else
-+ return per_cpu_ptr(ve->cpu_stats, cpu);
++ return per_cpu_ptr(ve->cpu_stats, cpu);
+}
+
+extern int nr_ve;
@@ -28608,10 +31211,10 @@
+#endif
diff --git a/include/linux/ve_proto.h b/include/linux/ve_proto.h
new file mode 100644
-index 0000000..3364e33
+index 0000000..8bc4e01
--- /dev/null
+++ b/include/linux/ve_proto.h
-@@ -0,0 +1,89 @@
+@@ -0,0 +1,96 @@
+/*
+ * include/linux/ve_proto.h
+ *
@@ -28629,6 +31232,13 @@
+
+struct ve_struct;
+
++struct seq_file;
++
++typedef void (*ve_seq_print_t)(struct seq_file *, struct ve_struct *);
++
++void vzmon_register_veaddr_print_cb(ve_seq_print_t);
++void vzmon_unregister_veaddr_print_cb(ve_seq_print_t);
++
+#ifdef CONFIG_INET
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid);
+#ifdef CONFIG_VE_NETDEV
@@ -28803,10 +31413,10 @@
+#endif
diff --git a/include/linux/venet.h b/include/linux/venet.h
new file mode 100644
-index 0000000..1554037
+index 0000000..dd26f11
--- /dev/null
+++ b/include/linux/venet.h
-@@ -0,0 +1,86 @@
+@@ -0,0 +1,95 @@
+/*
+ * include/linux/venet.h
+ *
@@ -28845,12 +31455,19 @@
+ struct list_head ve_list;
+};
+
++struct ext_entry_struct
++{
++ struct list_head list;
++ struct ve_addr_struct addr;
++};
++
+struct veip_struct
+{
+ struct list_head src_lh;
+ struct list_head dst_lh;
+ struct list_head ip_lh;
+ struct list_head list;
++ struct list_head ext_lh;
+ envid_t veid;
+};
+
@@ -28884,6 +31501,8 @@
+int veip_entry_add(struct ve_struct *ve, struct ve_addr_struct *addr);
+int veip_entry_del(envid_t veid, struct ve_addr_struct *addr);
+int venet_change_skb_owner(struct sk_buff *skb);
++struct ext_entry_struct *venet_ext_lookup(struct ve_struct *ve,
++ struct ve_addr_struct *addr);
+
+extern struct list_head ip_entry_hash_table[];
+extern rwlock_t veip_hash_lock;
@@ -29092,10 +31711,10 @@
+#endif /* __LINUX_VIRTINFO_H */
diff --git a/include/linux/virtinfoscp.h b/include/linux/virtinfoscp.h
new file mode 100644
-index 0000000..9e7584f
+index 0000000..5661c0d
--- /dev/null
+++ b/include/linux/virtinfoscp.h
-@@ -0,0 +1,21 @@
+@@ -0,0 +1,23 @@
+#ifndef __VIRTINFO_SCP_H__
+#define __VIRTINFO_SCP_H__
+
@@ -29114,6 +31733,8 @@
+#define VIRTINFO_SCP_RSTTSK 0x20
+#define VIRTINFO_SCP_RSTMM 0x21
+
++#define VIRTINFO_SCP_TEST 0x30
++
+#define VIRTNOTIFY_CHANGE 0x100
+
+#endif /* __VIRTINFO_SCP_H__ */
@@ -29502,10 +32123,10 @@
+#endif /* __LINUX_VZCTL_QUOTA_H__ */
diff --git a/include/linux/vzctl_venet.h b/include/linux/vzctl_venet.h
new file mode 100644
-index 0000000..4797a50
+index 0000000..8c02cd4
--- /dev/null
+++ b/include/linux/vzctl_venet.h
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,53 @@
+/*
+ * include/linux/vzctl_venet.h
+ *
@@ -29533,6 +32154,8 @@
+ int op;
+#define VE_IP_ADD 1
+#define VE_IP_DEL 2
++#define VE_IP_EXT_ADD 3
++#define VE_IP_EXT_DEL 4
+ struct sockaddr *addr;
+ int addrlen;
+};
@@ -29818,10 +32441,10 @@
+#endif /* _LINUX_VZIPTABLE_DEFS_H */
diff --git a/include/linux/vzquota.h b/include/linux/vzquota.h
new file mode 100644
-index 0000000..e16605e
+index 0000000..1dba5fa
--- /dev/null
+++ b/include/linux/vzquota.h
-@@ -0,0 +1,379 @@
+@@ -0,0 +1,380 @@
+/*
+ *
+ * Copyright (C) 2001-2005 SWsoft
@@ -30031,7 +32654,7 @@
+ struct dq_info dq_info; /* grace times and flags */
+ spinlock_t dq_data_lock; /* for dq_stat */
+
-+ struct semaphore dq_sem; /* semaphore to protect
++ struct mutex dq_mutex; /* mutex to protect
+ ugid tree */
+
+ struct list_head dq_ilink_list; /* list of vz_quota_ilink */
@@ -30096,7 +32719,8 @@
+#define DQUOT_CMD_CHECK 12
+#define DQUOT_CMD_FORCE 13
+
-+extern struct semaphore vz_quota_sem;
++extern struct mutex vz_quota_mutex;
++
+void inode_qmblk_lock(struct super_block *sb);
+void inode_qmblk_unlock(struct super_block *sb);
+void qmblk_data_read_lock(struct vz_quota_master *qmblk);
@@ -30106,6 +32730,7 @@
+
+/* for quota operations */
+void vzquota_inode_init_call(struct inode *inode);
++void vzquota_inode_swap_call(struct inode *, struct inode *);
+void vzquota_inode_drop_call(struct inode *inode);
+int vzquota_inode_transfer_call(struct inode *, struct iattr *);
+struct vz_quota_master *vzquota_inode_data(struct inode *inode,
@@ -30193,7 +32818,6 @@
+int vzquota_proc_init(void);
+void vzquota_proc_release(void);
+struct vz_quota_master *vzquota_find_qmblk(struct super_block *);
-+extern struct semaphore vz_quota_sem;
+
+void vzaquota_init(void);
+void vzaquota_fini(void);
@@ -30268,7 +32892,7 @@
+#endif /* __VZ_RATELIMIT_H__ */
diff --git a/include/linux/vzstat.h b/include/linux/vzstat.h
new file mode 100644
-index 0000000..5c23ea4
+index 0000000..c7dfd1f
--- /dev/null
+++ b/include/linux/vzstat.h
@@ -0,0 +1,182 @@
@@ -30310,7 +32934,7 @@
+ cycles_t avg[3];
+};
+struct kstat_lat_pcpu_struct {
-+ struct kstat_lat_pcpu_snap_struct cur[NR_CPUS];
++ struct kstat_lat_pcpu_snap_struct *cur;
+ cycles_t max_snap;
+ struct kstat_lat_snap_struct last;
+ cycles_t avg[3];
@@ -30395,7 +33019,7 @@
+{
+ struct kstat_lat_pcpu_snap_struct *cur;
+
-+ cur = &p->cur[cpu];
++ cur = per_cpu_ptr(p->cur, cpu);
+ write_seqcount_begin(&cur->lock);
+ cur->count++;
+ if (cur->maxlat < dur)
@@ -30426,8 +33050,8 @@
+ cycles_t m;
+
+ memset(&p->last, 0, sizeof(p->last));
-+ for (cpu = 0; cpu < NR_CPUS; cpu++) {
-+ cur = &p->cur[cpu];
++ for_each_online_cpu(cpu) {
++ cur = per_cpu_ptr(p->cur, cpu);
+ do {
+ i = read_seqcount_begin(&cur->lock);
+ memcpy(&snap, cur, sizeof(snap));
@@ -30454,6 +33078,24 @@
+}
+
+#endif /* __VZSTAT_H__ */
+diff --git a/include/linux/xattr.h b/include/linux/xattr.h
+index 5c84af8..12bd3c3 100644
+--- a/include/linux/xattr.h
++++ b/include/linux/xattr.h
+@@ -10,6 +10,13 @@
+ #ifndef _LINUX_XATTR_H
+ #define _LINUX_XATTR_H
+
++#ifdef CONFIG_VE
++extern int ve_xattr_policy;
++#define VE_XATTR_POLICY_ACCEPT 0
++#define VE_XATTR_POLICY_IGNORE 1
++#define VE_XATTR_POLICY_REJECT 2
++#endif
++
+ #define XATTR_CREATE 0x1 /* set value, fail if attr already exists */
+ #define XATTR_REPLACE 0x2 /* set value, fail if attr does not exist */
+
diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index 0f7c378..e2a9043 100644
--- a/include/net/addrconf.h
@@ -31026,7 +33668,7 @@
printed = true;
}
diff --git a/init/main.c b/init/main.c
-index bc109c7..d7f4866 100644
+index bc109c7..d06cdc8 100644
--- a/init/main.c
+++ b/init/main.c
@@ -70,6 +70,9 @@
@@ -31056,20 +33698,20 @@
/*
* Boot command-line arguments
*/
-@@ -516,6 +529,9 @@ asmlinkage void __init start_kernel(void)
+@@ -516,6 +529,8 @@ asmlinkage void __init start_kernel(void)
smp_setup_processor_id();
+ prepare_ve0_process(&init_task);
-+ init_ve0();
+
/*
* Need to run as early as possible, to initialize the
* lockdep hash:
-@@ -548,6 +564,7 @@ asmlinkage void __init start_kernel(void)
+@@ -548,6 +563,8 @@ asmlinkage void __init start_kernel(void)
setup_command_line(command_line);
setup_nr_cpu_ids();
setup_per_cpu_areas();
++ init_ve0();
+ ub_init_early();
smp_prepare_boot_cpu(); /* arch-specific boot-cpu hooks */
@@ -32131,10 +34773,10 @@
+obj-$(CONFIG_BC_IO_ACCOUNTING) += io_acct.o
diff --git a/kernel/bc/beancounter.c b/kernel/bc/beancounter.c
new file mode 100644
-index 0000000..6513257
+index 0000000..fdf3bb8
--- /dev/null
+++ b/kernel/bc/beancounter.c
-@@ -0,0 +1,688 @@
+@@ -0,0 +1,715 @@
+/*
+ * linux/kernel/bc/beancounter.c
+ *
@@ -32168,6 +34810,7 @@
+#include <linux/module.h>
+#include <linux/mm.h>
+#include <linux/sched.h>
++#include <linux/random.h>
+
+#include <bc/beancounter.h>
+#include <bc/hash.h>
@@ -32204,9 +34847,9 @@
+ "dummy",
+ "dummy",
+ "numiptent",
++ "swappages",
+ "unused_privvmpages", /* UB_RESOURCES */
+ "tmpfs_respages",
-+ "swap_pages",
+ "held_pages",
+};
+
@@ -32309,6 +34952,25 @@
+ return NULL;
+}
+
++int ub_count;
++
++/* next two must be called under ub_hash_lock */
++static inline void ub_count_inc(struct user_beancounter *ub)
++{
++ if (ub->parent)
++ ub->parent->ub_childs++;
++ else
++ ub_count++;
++}
++
++static inline void ub_count_dec(struct user_beancounter *ub)
++{
++ if (ub->parent)
++ ub->parent->ub_childs--;
++ else
++ ub_count--;
++}
++
+struct user_beancounter *get_beancounter_byuid(uid_t uid, int create)
+{
+ struct user_beancounter *new_ub, *ub;
@@ -32337,6 +34999,7 @@
+ if (new_ub != NULL) {
+ list_add_rcu(&new_ub->ub_list, &ub_list_head);
+ hlist_add_head(&new_ub->ub_hash, hash);
++ ub_count_inc(new_ub);
+ spin_unlock_irqrestore(&ub_hash_lock, flags);
+ return new_ub;
+ }
@@ -32382,6 +35045,7 @@
+ if (new_ub != NULL) {
+ list_add_rcu(&new_ub->ub_list, &ub_list_head);
+ hlist_add_head(&new_ub->ub_hash, hash);
++ ub_count_inc(new_ub);
+ spin_unlock_irqrestore(&ub_hash_lock, flags);
+ return new_ub;
+ }
@@ -32431,7 +35095,6 @@
+
+ clean &= verify_res(ub, UB_UNUSEDPRIVVM, ub->ub_unused_privvmpages);
+ clean &= verify_res(ub, UB_TMPFSPAGES, ub->ub_tmpfs_respages);
-+ clean &= verify_res(ub, UB_SWAPPAGES, ub->ub_swap_pages);
+ clean &= verify_res(ub, UB_HELDPAGES, (unsigned long)ub->ub_held_pages);
+
+ ub_debug_trace(!clean, 5, 60*HZ);
@@ -32460,6 +35123,7 @@
+ }
+
+ hlist_del(&ub->ub_hash);
++ ub_count_dec(ub);
+ list_del_rcu(&ub->ub_list);
+ spin_unlock_irqrestore(&ub_hash_lock, flags);
+
@@ -32716,6 +35380,7 @@
+static void init_beancounter_struct(struct user_beancounter *ub)
+{
+ ub->ub_magic = UB_MAGIC;
++ ub->ub_cookie = get_random_int();
+ atomic_set(&ub->ub_refcount, 1);
+ spin_lock_init(&ub->ub_lock);
+ INIT_LIST_HEAD(&ub->ub_tcp_sk_list);
@@ -32775,6 +35440,7 @@
+ ub->ub_parms[UB_NUMSIGINFO].limit = 1024;
+ ub->ub_parms[UB_DCACHESIZE].limit = 1024*1024;
+ ub->ub_parms[UB_NUMFILE].limit = 1024;
++ ub->ub_parms[UB_SWAPPAGES].limit = UB_MAXVALUE;
+
+ for (k = 0; k < UB_RESOURCES; k++)
+ ub->ub_parms[k].barrier = ub->ub_parms[k].limit;
@@ -32783,6 +35449,8 @@
+ ub->ub_limit_rl.interval = 300*HZ;
+}
+
++static DEFINE_PER_CPU(struct ub_percpu_struct, ub0_percpu);
++
+void __init ub_init_early(void)
+{
+ struct user_beancounter *ub;
@@ -32794,7 +35462,7 @@
+ init_beancounter_nolimits(ub);
+ init_beancounter_store(ub);
+ init_beancounter_struct(ub);
-+ ub->ub_percpu = NULL;
++ ub->ub_percpu = &per_cpu__ub0_percpu;
+
+ memset(¤t->task_bc, 0, sizeof(struct task_beancounter));
+ (void)set_exec_ub(ub);
@@ -32806,6 +35474,7 @@
+
+ hlist_add_head(&ub->ub_hash, &ub_hash[ub->ub_uid]);
+ list_add(&ub->ub_list, &ub_list_head);
++ ub_count_inc(ub);
+}
+
+void __init ub_init_late(void)
@@ -33737,10 +36406,10 @@
+#endif
diff --git a/kernel/bc/kmem.c b/kernel/bc/kmem.c
new file mode 100644
-index 0000000..74c4179
+index 0000000..7068e57
--- /dev/null
+++ b/kernel/bc/kmem.c
-@@ -0,0 +1,406 @@
+@@ -0,0 +1,405 @@
+/*
+ * kernel/bc/kmem.c
+ *
@@ -33911,16 +36580,15 @@
+{
+ struct user_beancounter *ub;
+ struct ub_cache_counter *cc;
-+ long pages, vmpages, pbc;
++ long pages, vmpages;
+ int i;
+
+ ub = seq_beancounter(f);
+
-+ pages = vmpages = pbc = 0;
++ pages = vmpages = 0;
+ for_each_online_cpu(i) {
+ pages += per_cpu_ptr(ub->ub_percpu, i)->pages_charged;
+ vmpages += per_cpu_ptr(ub->ub_percpu, i)->vmalloc_charged;
-+ pbc += per_cpu_ptr(ub->ub_percpu, i)->pbcs;
+ }
+ if (pages < 0)
+ pages = 0;
@@ -33929,7 +36597,7 @@
+
+ seq_printf(f, bc_proc_lu_lu_fmt, "pages", pages, PAGE_SIZE);
+ seq_printf(f, bc_proc_lu_lu_fmt, "vmalloced", vmpages, PAGE_SIZE);
-+ seq_printf(f, bc_proc_lu_lu_fmt, "pbcs", pbc,
++ seq_printf(f, bc_proc_lu_lu_fmt, "pbcs", ub->ub_pbcs,
+ sizeof(struct page_beancounter));
+
+ spin_lock_irq(&cc_lock);
@@ -34149,10 +36817,10 @@
+EXPORT_SYMBOL(mem_ub);
diff --git a/kernel/bc/misc.c b/kernel/bc/misc.c
new file mode 100644
-index 0000000..a47b355
+index 0000000..15e7aa4
--- /dev/null
+++ b/kernel/bc/misc.c
-@@ -0,0 +1,454 @@
+@@ -0,0 +1,460 @@
+/*
+ * kernel/bc/misc.c
+ *
@@ -34447,28 +37115,34 @@
+ return err;
+}
+
++static inline int task_precharge_farnr(struct task_beancounter *task_bc)
++{
++ return (task_bc->file_precharged < (1UL << task_bc->file_quant));
++}
++
+void ub_file_uncharge(struct file *f)
+{
+ struct user_beancounter *ub, *pub;
+ struct task_beancounter *task_bc;
-+ unsigned long nr;
++ int nr;
+
+ ub = f->f_ub;
+ task_bc = ¤t->task_bc;
+ if (likely(ub == task_bc->task_ub)) {
+ task_bc->file_precharged++;
+ pub = top_beancounter(ub);
-+ if (ub_barrier_farnr(pub, UB_NUMFILE) &&
++ if (task_precharge_farnr(task_bc) &&
+ ub_barrier_farsz(pub, UB_KMEMSIZE))
+ return;
-+ if (task_bc->file_precharged < (1UL << task_bc->file_quant))
-+ return;
+ nr = task_bc->file_precharged
+ - (1UL << (task_bc->file_quant - 1));
-+ task_bc->file_precharged -= nr;
-+ __put_beancounter_batch(ub, nr);
-+ uncharge_beancounter(ub, UB_NUMFILE, nr);
-+ uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(nr));
++ if (nr > 0) {
++ task_bc->file_precharged -= nr;
++ __put_beancounter_batch(ub, nr);
++ uncharge_beancounter(ub, UB_NUMFILE, nr);
++ uncharge_beancounter(ub, UB_KMEMSIZE,
++ ub_file_kmemsize(nr));
++ }
+ } else {
+ uncharge_beancounter(ub, UB_NUMFILE, 1);
+ uncharge_beancounter(ub, UB_KMEMSIZE, ub_file_kmemsize(1));
@@ -35974,10 +38648,10 @@
+EXPORT_SYMBOL(ub_out_of_memory);
diff --git a/kernel/bc/proc.c b/kernel/bc/proc.c
new file mode 100644
-index 0000000..4bfc03c
+index 0000000..dd96e38
--- /dev/null
+++ b/kernel/bc/proc.c
-@@ -0,0 +1,682 @@
+@@ -0,0 +1,703 @@
+/*
+ * kernel/bc/proc.c
+ *
@@ -36265,7 +38939,7 @@
+
+ ret = 0xbc000000;
+ if (ub->parent)
-+ ret |= ((ub->parent->ub_uid) << 4);
++ ret |= ((ub->parent->ub_uid + 1) << 4);
+ ret |= (ub->ub_uid + 1);
+ return ret;
+}
@@ -36583,6 +39257,17 @@
+ return bc_lookup(ub, dir, dentry);
+}
+
++static int bc_entry_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ struct user_beancounter *ub;
++
++ generic_fillattr(dentry->d_inode, stat);
++ ub = (struct user_beancounter *)dentry->d_fsdata;
++ stat->nlink = ub->ub_childs + 2;
++ return 0;
++}
++
+static struct file_operations bc_entry_fops = {
+ .read = generic_read_dir,
+ .readdir = bc_entry_readdir,
@@ -36590,6 +39275,7 @@
+
+static struct inode_operations bc_entry_iops = {
+ .lookup = bc_entry_lookup,
++ .getattr = bc_entry_getattr,
+};
+
+/*
@@ -36627,6 +39313,14 @@
+ return bc_lookup(ub, dir, dentry);
+}
+
++static int bc_root_getattr(struct vfsmount *mnt, struct dentry *dentry,
++ struct kstat *stat)
++{
++ generic_fillattr(dentry->d_inode, stat);
++ stat->nlink = ub_count + 2;
++ return 0;
++}
++
+static struct file_operations bc_root_fops = {
+ .read = generic_read_dir,
+ .readdir = bc_root_readdir,
@@ -36634,6 +39328,7 @@
+
+static struct inode_operations bc_root_iops = {
+ .lookup = bc_root_lookup,
++ .getattr = bc_root_getattr,
+};
+
+static int __init ub_init_proc(void)
@@ -36662,10 +39357,10 @@
+core_initcall(ub_init_proc);
diff --git a/kernel/bc/rss_pages.c b/kernel/bc/rss_pages.c
new file mode 100644
-index 0000000..7b3d872
+index 0000000..2f64be5
--- /dev/null
+++ b/kernel/bc/rss_pages.c
-@@ -0,0 +1,438 @@
+@@ -0,0 +1,454 @@
+/*
+ * kernel/bc/rss_pages.c
+ *
@@ -36754,6 +39449,22 @@
+}
+
+/*
++ * ++ and -- beyond are protected with pb_lock
++ */
++
++static inline void inc_pbc_count(struct user_beancounter *ub)
++{
++ for (; ub != NULL; ub = ub->parent)
++ ub->ub_pbcs++;
++}
++
++static inline void dec_pbc_count(struct user_beancounter *ub)
++{
++ for (; ub != NULL; ub = ub->parent)
++ ub->ub_pbcs--;
++}
++
++/*
+ * Alloc - free
+ */
+
@@ -36865,7 +39576,7 @@
+
+static inline int pb_hash(struct user_beancounter *ub, struct page *page)
+{
-+ return (page_to_pfn(page) + (ub->ub_uid << 10)) & pb_hash_mask;
++ return (page_to_pfn(page) ^ ub->ub_cookie) & pb_hash_mask;
+}
+
+/* pb_lock should be held */
@@ -37565,10 +40276,10 @@
+module_init(ubstatd_init);
diff --git a/kernel/bc/sys.c b/kernel/bc/sys.c
new file mode 100644
-index 0000000..a997944
+index 0000000..8fb942e
--- /dev/null
+++ b/kernel/bc/sys.c
-@@ -0,0 +1,176 @@
+@@ -0,0 +1,184 @@
+/*
+ * kernel/bc/sys.c
+ *
@@ -37724,18 +40435,26 @@
+}
+
+#ifdef CONFIG_COMPAT
-+asmlinkage long compat_sys_setublimit(uid_t uid, int resource,
-+ unsigned int __user *limits)
++#define UB_MAXVALUE_COMPAT ((1UL << (sizeof(compat_long_t) * 8 - 1)) - 1)
++
++asmlinkage long compat_sys_setublimit(uid_t uid,
++ compat_long_t resource,
++ compat_long_t __user *limits)
+{
-+ unsigned int u_new_limits[2];
++ compat_long_t u_new_limits[2];
+ unsigned long new_limits[2];
+
-+ if (copy_from_user(&u_new_limits, limits, sizeof(u_new_limits)))
-+ return -EFAULT;
++ if (copy_from_user(&u_new_limits, limits, sizeof(u_new_limits)))
++ return -EFAULT;
+
+ new_limits[0] = u_new_limits[0];
+ new_limits[1] = u_new_limits[1];
+
++ if (u_new_limits[0] == UB_MAXVALUE_COMPAT)
++ new_limits[0] = UB_MAXVALUE;
++ if (u_new_limits[1] == UB_MAXVALUE_COMPAT)
++ new_limits[1] = UB_MAXVALUE;
++
+ return do_setublimit(uid, resource, new_limits);
+}
+
@@ -37747,10 +40466,10 @@
+#endif
diff --git a/kernel/bc/vm_pages.c b/kernel/bc/vm_pages.c
new file mode 100644
-index 0000000..e98134b
+index 0000000..9b4ef0e
--- /dev/null
+++ b/kernel/bc/vm_pages.c
-@@ -0,0 +1,549 @@
+@@ -0,0 +1,546 @@
+/*
+ * kernel/bc/vm_pages.c
+ *
@@ -37858,7 +40577,8 @@
+void __ub_update_oomguarpages(struct user_beancounter *ub)
+{
+ ub->ub_parms[UB_OOMGUARPAGES].held =
-+ ub->ub_parms[UB_PHYSPAGES].held + ub->ub_swap_pages;
++ ub->ub_parms[UB_PHYSPAGES].held +
++ ub->ub_parms[UB_SWAPPAGES].held;
+ ub_adjust_maxheld(ub, UB_OOMGUARPAGES);
+}
+
@@ -38160,7 +40880,7 @@
+ unsigned long flags;
+
+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ ub->ub_swap_pages++;
++ __charge_beancounter_locked(ub, UB_SWAPPAGES, 1, UB_FORCE);
+ __ub_update_oomguarpages(ub);
+ spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
@@ -38179,10 +40899,7 @@
+ unsigned long flags;
+
+ spin_lock_irqsave(&ub->ub_lock, flags);
-+ if (ub->ub_swap_pages <= 0)
-+ uncharge_warn(ub, UB_SWAPPAGES, 1, ub->ub_swap_pages);
-+ else
-+ ub->ub_swap_pages--;
++ __uncharge_beancounter_locked(ub, UB_SWAPPAGES, 1);
+ __ub_update_oomguarpages(ub);
+ spin_unlock_irqrestore(&ub->ub_lock, flags);
+}
@@ -38280,8 +40997,7 @@
+ ub->ub_unused_privvmpages);
+ seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_TMPFSPAGES],
+ ub->ub_tmpfs_respages);
-+ seq_printf(f, bc_proc_lu_fmt, ub_rnames[UB_SWAPPAGES],
-+ ub->ub_swap_pages);
++ seq_printf(f, bc_proc_lu_fmt, "rss", ub->ub_pbcs);
+
+ seq_printf(f, bc_proc_lu_fmt, "swapin", swap);
+ seq_printf(f, bc_proc_lu_fmt, "unmap", unmap);
@@ -39015,7 +41731,7 @@
+#endif
diff --git a/kernel/cpt/cpt_context.c b/kernel/cpt/cpt_context.c
new file mode 100644
-index 0000000..bfba186
+index 0000000..f095a73
--- /dev/null
+++ b/kernel/cpt/cpt_context.c
@@ -0,0 +1,285 @@
@@ -39171,7 +41887,7 @@
+ hdr.cpt_signature[2] = CPT_SIGNATURE2;
+ hdr.cpt_signature[3] = CPT_SIGNATURE3;
+ hdr.cpt_hdrlen = sizeof(hdr);
-+ hdr.cpt_image_version = CPT_VERSION_32;
++ hdr.cpt_image_version = CPT_CURRENT_VERSION;
+#ifdef CONFIG_X86_64
+ hdr.cpt_os_arch = CPT_OS_ARCH_EMT64;
+#elif defined(CONFIG_X86_32)
@@ -39306,10 +42022,10 @@
+}
diff --git a/kernel/cpt/cpt_context.h b/kernel/cpt/cpt_context.h
new file mode 100644
-index 0000000..e4f82f9
+index 0000000..9eb851a
--- /dev/null
+++ b/kernel/cpt/cpt_context.h
-@@ -0,0 +1,215 @@
+@@ -0,0 +1,225 @@
+#include <linux/fs.h>
+#include <asm/uaccess.h>
+#include <bc/beancounter.h>
@@ -39415,6 +42131,16 @@
+ and restore them before resuming */
+ struct ubparm saved_ubc[UB_RESOURCES];
+#endif
++
++ int tcp_cb_convert;
++#define CPT_TCP_CB_CONV 1
++#define CPT_TCP_CB_NOT_CONV 2
++
++#define CPT_MAX_LINKDIRS 1
++ struct file *linkdirs[CPT_MAX_LINKDIRS];
++ int linkdirs_num;
++ unsigned int linkcnt; /* for create hardlinked files */
++ int hardlinked_on;
+} cpt_context_t;
+
+typedef struct {
@@ -39527,10 +42253,10 @@
+}
diff --git a/kernel/cpt/cpt_dump.c b/kernel/cpt/cpt_dump.c
new file mode 100644
-index 0000000..7a36b4e
+index 0000000..08ae5e6
--- /dev/null
+++ b/kernel/cpt/cpt_dump.c
-@@ -0,0 +1,1248 @@
+@@ -0,0 +1,1271 @@
+/*
+ *
+ * kernel/cpt/cpt_dump.c
@@ -40327,6 +43053,7 @@
+ i->start_jiffies_delta = get_jiffies_64() - ve->start_jiffies;
+
+ i->last_pid = ve->ve_ns->pid_ns->last_pid;
++ i->rnd_va_space = ve->_randomize_va_space + 1;
+
+ ctx->write(i, sizeof(*i), ctx);
+ cpt_release_buf(ctx);
@@ -40692,8 +43419,10 @@
+
+ p.dentry = mnt->mnt_root;
+ p.mnt = mnt;
++ spin_lock(&dcache_lock);
+ path = __d_path(&p, &env->root_path,
+ path_buf, PAGE_SIZE);
++ spin_unlock(&dcache_lock);
+ if (IS_ERR(path))
+ continue;
+
@@ -40714,7 +43443,7 @@
+ struct nsproxy *old_ns;
+ struct mnt_namespace *n;
+ int err;
-+ unsigned int flags = test_cpu_caps();
++ unsigned int flags = test_cpu_caps_and_features();
+
+ if (!ctx->ve_id)
+ return -EINVAL;
@@ -40723,8 +43452,26 @@
+ if (env == NULL)
+ return -ESRCH;
+
++ down_read(&env->op_sem);
++ err = -ESRCH;
++ if (!env->is_running) {
++ eprintk_ctx("CT is not running\n");
++ goto out_noenv;
++ }
++
++ err = -EBUSY;
++ if (env->is_locked) {
++ eprintk_ctx("CT is locked\n");
++ goto out_noenv;
++ }
++
+ *caps = flags & (1<<CPT_CPU_X86_CMOV);
+
++ if (flags & (1 << CPT_SLM_DMPRST)) {
++ eprintk_ctx("SLM is enabled, but slm_dmprst module is not loaded\n");
++ *caps |= (1 << CPT_SLM_DMPRST);
++ }
++
+ old_env = set_exec_env(env);
+ old_ns = current->nsproxy;
+ current->nsproxy = env->ve_ns;
@@ -40775,6 +43522,8 @@
+out:
+ current->nsproxy = old_ns;
+ set_exec_env(old_env);
++out_noenv:
++ up_read(&env->op_sem);
+ put_ve(env);
+
+ return err;
@@ -40941,10 +43690,10 @@
+EXPORT_SYMBOL(lookup_cpt_obj_bypos);
diff --git a/kernel/cpt/cpt_files.c b/kernel/cpt/cpt_files.c
new file mode 100644
-index 0000000..f013331
+index 0000000..3ada205
--- /dev/null
+++ b/kernel/cpt/cpt_files.c
-@@ -0,0 +1,1648 @@
+@@ -0,0 +1,1783 @@
+/*
+ *
+ * kernel/cpt/cpt_files.c
@@ -40973,6 +43722,7 @@
+#include <linux/namei.h>
+#include <linux/smp_lock.h>
+#include <linux/pagemap.h>
++#include <linux/proc_fs.h>
+#include <asm/uaccess.h>
+#include <linux/vzcalluser.h>
+#include <linux/ve_proto.h>
@@ -41020,15 +43770,29 @@
+}
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
-+ cpt_context_t *ctx)
++ int verify, cpt_context_t *ctx)
+{
++ if (d->d_inode->i_sb->s_magic == FSMAGIC_PROC &&
++ proc_dentry_of_dead_task(d))
++ return 0;
++
+ if (path[0] == '/' && !(!IS_ROOT(d) && d_unhashed(d))) {
+ struct nameidata nd;
+ if (path_lookup(path, 0, &nd)) {
+ eprintk_ctx("d_path cannot be looked up %s\n", path);
+ return -EINVAL;
+ }
-+ if (nd.path.dentry != d || nd.path.mnt != mnt) {
++ if (nd.path.dentry != d || (verify && nd.path.mnt != mnt)) {
++ if (!strcmp(path, "/dev/null")) {
++ /*
++ * epic kludge to workaround the case, when the
++ * init opens a /dev/null and then udevd
++ * overmounts the /dev with tmpfs
++ */
++ path_put(&nd.path);
++ return 0;
++ }
++
+ eprintk_ctx("d_path is invisible %s\n", path);
+ path_put(&nd.path);
+ return -EINVAL;
@@ -41090,7 +43854,7 @@
+}
+
+static int cpt_dump_dentry(struct dentry *d, struct vfsmount *mnt,
-+ int replaced, cpt_context_t *ctx)
++ int replaced, int verify, cpt_context_t *ctx)
+{
+ int len;
+ char *path;
@@ -41155,7 +43919,7 @@
+ o.cpt_content = CPT_CONTENT_NAME;
+ path[len] = 0;
+
-+ if (cpt_verify_overmount(path, d, mnt, ctx)) {
++ if (cpt_verify_overmount(path, d, mnt, verify, ctx)) {
+ __cpt_release_buf(ctx);
+ return -EINVAL;
+ }
@@ -41194,7 +43958,7 @@
+static int
+cpt_dump_filename(struct file *file, int replaced, cpt_context_t *ctx)
+{
-+ return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, replaced, ctx);
++ return cpt_dump_dentry(file->f_dentry, file->f_vfsmnt, replaced, 1, ctx);
+}
+
+int cpt_dump_inode(struct dentry *d, struct vfsmount *mnt, struct cpt_context *ctx)
@@ -41435,25 +44199,33 @@
+
+ v->cpt_i_mode = sbuf.mode;
+ v->cpt_lflags = 0;
++
++ if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC) {
++ v->cpt_lflags |= CPT_DENTRY_PROC;
++ if (proc_dentry_of_dead_task(file->f_dentry))
++ v->cpt_lflags |= CPT_DENTRY_PROCPID_DEAD;
++ }
++
+ if (IS_ROOT(file->f_dentry))
+ v->cpt_lflags |= CPT_DENTRY_ROOT;
+ else if (d_unhashed(file->f_dentry)) {
+ if (cpt_replaced(file->f_dentry, file->f_vfsmnt, ctx)) {
+ v->cpt_lflags |= CPT_DENTRY_REPLACED;
+ replaced = 1;
-+ } else {
++ } else if (!(v->cpt_lflags & CPT_DENTRY_PROCPID_DEAD))
+ v->cpt_lflags |= CPT_DENTRY_DELETED;
-+ }
+ }
+ if (is_cloning_inode(file->f_dentry->d_inode))
+ v->cpt_lflags |= CPT_DENTRY_CLONING;
-+ if (file->f_dentry->d_inode->i_sb->s_magic == FSMAGIC_PROC)
-+ v->cpt_lflags |= CPT_DENTRY_PROC;
++
+ v->cpt_inode = CPT_NULL;
+ if (!(v->cpt_lflags & CPT_DENTRY_REPLACED)) {
+ iobj = lookup_cpt_object(CPT_OBJ_INODE, file->f_dentry->d_inode, ctx);
-+ if (iobj)
++ if (iobj) {
+ v->cpt_inode = iobj->o_pos;
++ if (iobj->o_flags & CPT_INODE_HARDLINKED)
++ v->cpt_lflags |= CPT_DENTRY_HARDLINKED;
++ }
+ }
+ v->cpt_priv = CPT_NULL;
+ v->cpt_fown_fd = -1;
@@ -41604,14 +44376,17 @@
+
+ if (!(file->f_mode & FMODE_READ) ||
+ (file->f_flags & O_DIRECT)) {
-+ file = dentry_open(dget(file->f_dentry),
-+ mntget(file->f_vfsmnt), O_RDONLY,
++ struct file *filp;
++ filp = dentry_open(dget(file->f_dentry),
++ mntget(file->f_vfsmnt),
++ O_RDONLY | O_LARGEFILE,
+ NULL /* not checked */);
-+ if (IS_ERR(file)) {
++ if (IS_ERR(filp)) {
+ cpt_printk_dentry(file->f_dentry, file->f_vfsmnt);
-+ eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(file));
-+ return PTR_ERR(file);
++ eprintk_ctx("cannot reopen file for read %ld\n", PTR_ERR(filp));
++ return PTR_ERR(filp);
+ }
++ file = filp;
+ } else {
+ atomic_long_inc(&file->f_count);
+ }
@@ -41858,7 +44633,7 @@
+ }
+ spin_unlock(&dcache_lock);
+ if (found) {
-+ err = cpt_dump_dentry(found, mnt, 0, ctx);
++ err = cpt_dump_dentry(found, mnt, 0, 1, ctx);
+ dput(found);
+ if (!err) {
+ dprintk_ctx("dentry found in aliases\n");
@@ -41872,7 +44647,7 @@
+ return -EINVAL;
+
+ mntget(mnt);
-+ f = dentry_open(de, mnt, O_RDONLY, NULL);
++ f = dentry_open(de, mnt, O_RDONLY | O_LARGEFILE, NULL);
+ if (IS_ERR(f))
+ return PTR_ERR(f);
+
@@ -41897,7 +44672,7 @@
+
+ dprintk_ctx("dentry found in dir\n");
+ __cpt_release_buf(ctx);
-+ err = cpt_dump_dentry(found, mnt, 0, ctx);
++ err = cpt_dump_dentry(found, mnt, 0, 1, ctx);
+
+err_lookup:
+ dput(found);
@@ -41907,6 +44682,86 @@
+ return err;
+}
+
++static struct dentry *find_linkdir(struct vfsmount *mnt, struct cpt_context *ctx)
++{
++ int i;
++
++ for (i = 0; i < ctx->linkdirs_num; i++)
++ if (ctx->linkdirs[i]->f_vfsmnt == mnt)
++ return ctx->linkdirs[i]->f_dentry;
++ return NULL;
++}
++
++struct dentry *cpt_fake_link(struct dentry *d, struct vfsmount *mnt,
++ struct inode *ino, struct cpt_context *ctx)
++{
++ int err;
++ int order = 8;
++ const char *prefix = ".cpt_hardlink.";
++ int preflen = strlen(prefix) + order;
++ char name[preflen + 1];
++ struct dentry *dirde, *hardde;
++
++ dirde = find_linkdir(mnt, ctx);
++ if (!dirde) {
++ err = -ENOENT;
++ goto out;
++ }
++
++ ctx->linkcnt++;
++ snprintf(name, sizeof(name), "%s%0*u", prefix, order, ctx->linkcnt);
++
++ mutex_lock(&dirde->d_inode->i_mutex);
++ hardde = lookup_one_len(name, dirde, strlen(name));
++ if (IS_ERR(hardde)) {
++ err = PTR_ERR(hardde);
++ goto out_unlock;
++ }
++
++ if (hardde->d_inode) {
++ /* Userspace should clean hardlinked files from previous
++ * dump/undump
++ */
++ eprintk_ctx("Hardlinked file already exists: %s\n", name);
++ err = -EEXIST;
++ goto out_put;
++ }
++
++ if (d == NULL)
++ err = vfs_create(dirde->d_inode, hardde, 0600, NULL);
++ else
++ err = vfs_link(d, dirde->d_inode, hardde);
++ if (err) {
++ eprintk_ctx("error hardlink %s, %d\n", name, err);
++ goto out_put;
++ }
++
++out_unlock:
++ mutex_unlock(&dirde->d_inode->i_mutex);
++out:
++ return err ? ERR_PTR(err) : hardde;
++
++out_put:
++ dput(hardde);
++ goto out_unlock;
++}
++
++static int create_dump_hardlink(struct dentry *d, struct vfsmount *mnt,
++ struct inode *ino, struct cpt_context *ctx)
++{
++ int err;
++ struct dentry *hardde;
++
++ hardde = cpt_fake_link(d, mnt, ino, ctx);
++ if (IS_ERR(hardde))
++ return PTR_ERR(hardde);
++
++ err = cpt_dump_dentry(hardde, mnt, 0, 1, ctx);
++ dput(hardde);
++
++ return err;
++}
++
+static int dump_one_inode(struct file *file, struct dentry *d,
+ struct vfsmount *mnt, struct cpt_context *ctx)
+{
@@ -41922,6 +44777,10 @@
+ if (iobj->o_pos >= 0)
+ return 0;
+
++ if (ino->i_sb->s_magic == FSMAGIC_PROC &&
++ proc_dentry_of_dead_task(d))
++ return 0;
++
+ if ((!IS_ROOT(d) && d_unhashed(d)) &&
+ !cpt_replaced(d, mnt, ctx))
+ dump_it = 1;
@@ -41948,6 +44807,14 @@
+ * process group. */
+ if (ino->i_nlink != 0) {
+ err = find_linked_dentry(d, mnt, ino, ctx);
++ if (err && S_ISREG(ino->i_mode)) {
++ err = create_dump_hardlink(d, mnt, ino, ctx);
++ iobj->o_flags |= CPT_INODE_HARDLINKED;
++ } else if (S_ISCHR(ino->i_mode) ||
++ S_ISBLK(ino->i_mode) ||
++ S_ISFIFO(ino->i_mode))
++ err = 0;
++
+ if (err) {
+ eprintk_ctx("deleted reference to existing inode, checkpointing is impossible: %d\n", err);
+ return -EBUSY;
@@ -42305,6 +45172,7 @@
+{
+ int* pfd;
+ char* path;
++ envid_t veid;
+};
+
+static int dumptmpfs(void *arg)
@@ -42316,7 +45184,7 @@
+ char *path = args->path;
+ char *argv[] = { "tar", "-c", "-S", "--numeric-owner", path, NULL };
+
-+ i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
++ i = real_env_create(args->veid, VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+ if (i < 0) {
+ eprintk("cannot enter ve to dump tmpfs\n");
+ module_put(THIS_MODULE);
@@ -42363,16 +45231,20 @@
+ int status;
+ mm_segment_t oldfs;
+ sigset_t ignore, blocked;
++ struct ve_struct *oldenv;
+
+ err = sc_pipe(pfd);
+ if (err < 0)
+ return err;
+ args.pfd = pfd;
+ args.path = path;
++ args.veid = VEID(get_exec_env());
+ ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+ sigprocmask(SIG_BLOCK, &ignore, &blocked);
++ oldenv = set_exec_env(get_ve0());
+ err = pid = local_kernel_thread(dumptmpfs, (void*)&args,
+ SIGCHLD | CLONE_VFORK, 0);
++ set_exec_env(oldenv);
+ if (err < 0) {
+ eprintk_ctx("tmpfs local_kernel_thread: %d\n", err);
+ goto out;
@@ -42454,7 +45326,7 @@
+
+ /* One special case: mount --bind /a /a */
+ if (mnt->mnt_root == mnt->mnt_mountpoint)
-+ return cpt_dump_dentry(mnt->mnt_root, mnt, 0, ctx);
++ return cpt_dump_dentry(mnt->mnt_root, mnt, 0, 0, ctx);
+
+ list_for_each_prev(p, &mnt->mnt_list) {
+ struct vfsmount * m;
@@ -42467,7 +45339,7 @@
+ if (m->mnt_sb != mnt->mnt_sb)
+ continue;
+
-+ err = cpt_dump_dentry(mnt->mnt_root, m, 0, ctx);
++ err = cpt_dump_dentry(mnt->mnt_root, m, 0, 1, ctx);
+ if (err == 0)
+ break;
+ }
@@ -42517,19 +45389,30 @@
+ cpt_dump_string(path, ctx);
+ cpt_dump_string(mnt->mnt_sb->s_type->name, ctx);
+
-+ if (v.cpt_mntflags & CPT_MNT_BIND)
++ if (v.cpt_mntflags & CPT_MNT_BIND) {
+ err = cpt_dump_bind_mnt(mnt, ctx);
-+ else if (!(v.cpt_mntflags & CPT_MNT_EXT) &&
-+ strcmp(mnt->mnt_sb->s_type->name, "tmpfs") == 0) {
-+ mntget(mnt);
-+ up_read(&namespace_sem);
-+ err = cpt_dump_tmpfs(path, ctx);
-+ down_read(&namespace_sem);
-+ if (!err) {
-+ if (list_empty(&mnt->mnt_list))
++
++ /* Temporary solution for Ubuntu 8.04 */
++ if (err == -EINVAL && !strcmp(path, "/dev/.static/dev")) {
++ cpt_dump_string("/dev", ctx);
++ err = 0;
++ }
++ }
++ else if (!(v.cpt_mntflags & CPT_MNT_EXT)) {
++
++ if (mnt->mnt_sb->s_type->fs_flags & FS_REQUIRES_DEV) {
++ eprintk_ctx("Checkpoint supports only nodev fs: %s\n",
++ mnt->mnt_sb->s_type->name);
++ err = -EXDEV;
++ } else if (!strcmp(mnt->mnt_sb->s_type->name, "tmpfs")) {
++ mntget(mnt);
++ up_read(&namespace_sem);
++ err = cpt_dump_tmpfs(path, ctx);
++ down_read(&namespace_sem);
++ if (!err && list_empty(&mnt->mnt_list))
+ err = -EBUSY;
++ mntput(mnt);
+ }
-+ mntput(mnt);
+ }
+
+ cpt_pop_object(&saved_obj, ctx);
@@ -42547,7 +45430,7 @@
+{
+ struct mnt_namespace *n = obj->o_obj;
+ struct cpt_object_hdr v;
-+ struct list_head *p;
++ struct vfsmount *rootmnt, *p;
+ loff_t saved_obj;
+ int err = 0;
+
@@ -42563,8 +45446,9 @@
+ cpt_push_object(&saved_obj, ctx);
+
+ down_read(&namespace_sem);
-+ list_for_each(p, &n->list) {
-+ err = dump_vfsmount(list_entry(p, struct vfsmount, mnt_list), ctx);
++ rootmnt = n->root;
++ for (p = rootmnt; p; p = next_mnt(p, rootmnt)) {
++ err = dump_vfsmount(p, ctx);
+ if (err)
+ break;
+ }
@@ -42595,10 +45479,10 @@
+}
diff --git a/kernel/cpt/cpt_files.h b/kernel/cpt/cpt_files.h
new file mode 100644
-index 0000000..e0ebd97
+index 0000000..bc66731
--- /dev/null
+++ b/kernel/cpt/cpt_files.h
-@@ -0,0 +1,73 @@
+@@ -0,0 +1,77 @@
+int cpt_collect_files(cpt_context_t *);
+int cpt_collect_fs(cpt_context_t *);
+int cpt_collect_namespace(cpt_context_t *);
@@ -42619,6 +45503,7 @@
+
+struct file *rst_file(loff_t pos, int fd, struct cpt_context *ctx);
+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
++int rst_files_std(struct cpt_task_image *ti, struct cpt_context *ctx);
+__u32 rst_files_flag(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_fs_complete(struct cpt_task_image *ti, struct cpt_context *ctx);
+int rst_restore_fs(struct cpt_context *ctx);
@@ -42658,9 +45543,11 @@
+ unsigned flags,
+ struct cpt_context *ctx);
+
++struct dentry *cpt_fake_link(struct dentry *d, struct vfsmount *mnt,
++ struct inode *ino, struct cpt_context *ctx);
+
+int cpt_verify_overmount(char *path, struct dentry *d, struct vfsmount *mnt,
-+ cpt_context_t *ctx);
++ int verify, cpt_context_t *ctx);
+
+#define check_one_vfsmount(mnt) \
+ (strcmp(mnt->mnt_sb->s_type->name, "rootfs") != 0 && \
@@ -42671,7 +45558,8 @@
+ strcmp(mnt->mnt_sb->s_type->name, "tmpfs") != 0 && \
+ strcmp(mnt->mnt_sb->s_type->name, "devpts") != 0 && \
+ strcmp(mnt->mnt_sb->s_type->name, "proc") != 0 && \
-+ strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0)
++ strcmp(mnt->mnt_sb->s_type->name, "sysfs") != 0 && \
++ strcmp(mnt->mnt_sb->s_type->name, "binfmt_misc") != 0)
diff --git a/kernel/cpt/cpt_fsmagic.h b/kernel/cpt/cpt_fsmagic.h
new file mode 100644
index 0000000..7e79789
@@ -42697,10 +45585,10 @@
+#define FSMAGIC_ANON 0x09041934
diff --git a/kernel/cpt/cpt_inotify.c b/kernel/cpt/cpt_inotify.c
new file mode 100644
-index 0000000..87f6bfd
+index 0000000..4f2abb0
--- /dev/null
+++ b/kernel/cpt/cpt_inotify.c
-@@ -0,0 +1,151 @@
+@@ -0,0 +1,174 @@
+/*
+ *
+ * kernel/cpt/cpt_inotify.c
@@ -42744,6 +45632,29 @@
+#include "cpt_fsmagic.h"
+#include "cpt_syscalls.h"
+
++static int dump_watch_inode(struct path *path, cpt_context_t *ctx)
++{
++ int err;
++ struct dentry *d;
++
++ d = path->dentry;
++ if (IS_ROOT(d) || !d_unhashed(d))
++ goto dump_dir;
++
++ d = cpt_fake_link(d->d_inode->i_nlink ? d : NULL,
++ path->mnt, d->d_inode, ctx);
++
++ if (IS_ERR(d))
++ return PTR_ERR(d);
++
++dump_dir:
++ err = cpt_dump_dir(d, path->mnt, ctx);
++ if (d != path->dentry)
++ dput(d);
++
++ return err;
++}
++
+static int cpt_dump_watches(struct fsnotify_group *g, struct cpt_context *ctx)
+{
+ int err = 0;
@@ -42783,7 +45694,7 @@
+ path_get(&path);
+ spin_unlock(&fse->lock);
+
-+ err = cpt_dump_dir(path.dentry, path.mnt, ctx);
++ err = dump_watch_inode(&path, ctx);
+ cpt_pop_object(&saved_obj, ctx);
+ path_put(&path);
+
@@ -42854,10 +45765,10 @@
+}
diff --git a/kernel/cpt/cpt_kernel.c b/kernel/cpt/cpt_kernel.c
new file mode 100644
-index 0000000..3272d81
+index 0000000..10fa5d6
--- /dev/null
+++ b/kernel/cpt/cpt_kernel.c
-@@ -0,0 +1,178 @@
+@@ -0,0 +1,185 @@
+/*
+ *
+ * kernel/cpt/cpt_kernel.c
@@ -42880,6 +45791,8 @@
+#include <asm/cpufeature.h>
+#endif
+#include <linux/cpt_image.h>
++#include <linux/virtinfo.h>
++#include <linux/virtinfoscp.h>
+
+#include "cpt_kernel.h"
+#include "cpt_syscalls.h"
@@ -42952,7 +45865,9 @@
+ }
+ if (!try_module_get(THIS_MODULE))
+ return -EBUSY;
-+ ret = asm_kernel_thread(fn, arg, flags, pid);
++ while ((ret = asm_kernel_thread(fn, arg, flags, pid)) ==
++ -ERESTARTNOINTR)
++ cond_resched();
+ if (ret < 0)
+ module_put(THIS_MODULE);
+ return ret;
@@ -42981,7 +45896,7 @@
+ return ret;
+}
+
-+unsigned int test_cpu_caps(void)
++unsigned int test_cpu_caps_and_features(void)
+{
+ unsigned int flags = 0;
+
@@ -43023,6 +45938,9 @@
+ flags |= 1 << CPT_CPU_X86_IA64;
+ flags |= 1 << CPT_CPU_X86_FXSR;
+#endif
++ if (virtinfo_notifier_call(VITYPE_SCP,
++ VIRTINFO_SCP_TEST, NULL) & NOTIFY_FAIL)
++ flags |= 1 << CPT_SLM_DMPRST;
+ return flags;
+}
+
@@ -43038,7 +45956,7 @@
+}
diff --git a/kernel/cpt/cpt_kernel.h b/kernel/cpt/cpt_kernel.h
new file mode 100644
-index 0000000..9254778
+index 0000000..8bbd402
--- /dev/null
+++ b/kernel/cpt/cpt_kernel.h
@@ -0,0 +1,99 @@
@@ -43092,7 +46010,7 @@
+static inline void vefs_track_notify(struct dentry *vdentry, int track_cow) { };
+#endif
+
-+unsigned int test_cpu_caps(void);
++unsigned int test_cpu_caps_and_features(void);
+unsigned int test_kernel_config(void);
+
+#define test_one_flag_old(src, dst, flag, message, ret) \
@@ -44113,10 +47031,10 @@
+extern struct vm_operations_struct special_mapping_vmops;
diff --git a/kernel/cpt/cpt_net.c b/kernel/cpt/cpt_net.c
new file mode 100644
-index 0000000..9e09675
+index 0000000..4e183ba
--- /dev/null
+++ b/kernel/cpt/cpt_net.c
-@@ -0,0 +1,544 @@
+@@ -0,0 +1,652 @@
+/*
+ *
+ * kernel/cpt/cpt_net.c
@@ -44514,13 +47432,20 @@
+ return err;
+}
+
++struct args_t
++{
++ int* pfd;
++ envid_t veid;
++};
++
+static int dumpfn(void *arg)
+{
+ int i;
-+ int *pfd = arg;
++ struct args_t *args = arg;
++ int *pfd = args->pfd;
+ char *argv[] = { "iptables-save", "-c", NULL };
+
-+ i = real_env_create(VEID(get_exec_env()), VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
++ i = real_env_create(args->veid, VE_ENTER|VE_SKIPLOCK, 2, NULL, 0);
+ if (i < 0) {
+ eprintk("cannot enter ve to dump iptables\n");
+ module_put(THIS_MODULE);
@@ -44560,6 +47485,8 @@
+ int status;
+ mm_segment_t oldfs;
+ sigset_t ignore, blocked;
++ struct args_t args;
++ struct ve_struct *oldenv;
+
+ if (!(get_exec_env()->_iptables_modules & VE_IP_IPTABLES_MOD))
+ return 0;
@@ -44569,9 +47496,14 @@
+ eprintk_ctx("sc_pipe: %d\n", err);
+ return err;
+ }
++ args.pfd = pfd;
++ args.veid = VEID(get_exec_env());
+ ignore.sig[0] = CPT_SIG_IGNORE_MASK;
+ sigprocmask(SIG_BLOCK, &ignore, &blocked);
-+ err = pid = local_kernel_thread(dumpfn, (void*)pfd, SIGCHLD, 0);
++ oldenv = set_exec_env(get_ve0());
++ err = pid = local_kernel_thread(dumpfn, (void*)&args,
++ SIGCHLD | CLONE_VFORK, 0);
++ set_exec_env(oldenv);
+ if (err < 0) {
+ eprintk_ctx("local_kernel_thread: %d\n", err);
+ goto out;
@@ -44646,6 +47578,98 @@
+ return err;
+}
+
++static unsigned long fold_field(void *mib[], int offt)
++{
++ unsigned long res = 0;
++ int i;
++
++ for_each_possible_cpu(i) {
++ res += *(((unsigned long *) per_cpu_ptr(mib[0], i)) + offt);
++ res += *(((unsigned long *) per_cpu_ptr(mib[1], i)) + offt);
++ }
++ return res;
++}
++
++static void cpt_dump_snmp_stat(struct cpt_context *ctx, void *mib[], int n)
++{
++ int i;
++ struct cpt_object_hdr o;
++ __u32 *stats;
++
++ stats = cpt_get_buf(ctx);
++
++ cpt_open_object(NULL, ctx);
++
++ for (i = 0; i < n; i++)
++ stats[i] = fold_field(mib, i);
++
++ o.cpt_next = CPT_NULL;
++ o.cpt_object = CPT_OBJ_BITS;
++ o.cpt_hdrlen = sizeof(o);
++ o.cpt_content = CPT_CONTENT_DATA;
++
++ ctx->write(&o, sizeof(o), ctx);
++ ctx->write(stats, n * sizeof(*stats), ctx);
++ ctx->align(ctx);
++
++ cpt_close_object(ctx);
++
++ cpt_release_buf(ctx);
++}
++
++static void cpt_dump_snmp_stub(struct cpt_context *ctx)
++{
++ struct cpt_object_hdr o;
++
++ cpt_open_object(NULL, ctx);
++ o.cpt_next = CPT_NULL;
++ o.cpt_object = CPT_OBJ_BITS;
++ o.cpt_hdrlen = sizeof(o);
++ o.cpt_content = CPT_CONTENT_VOID;
++ ctx->write(&o, sizeof(o), ctx);
++ ctx->align(ctx);
++ cpt_close_object(ctx);
++}
++
++static int cpt_dump_snmp(struct cpt_context *ctx)
++{
++ struct ve_struct *ve;
++ struct net *net;
++
++ ve = get_exec_env();
++ net = ve->ve_netns;
++
++ cpt_open_section(ctx, CPT_SECT_SNMP_STATS);
++
++ cpt_dump_snmp_stat(ctx, (void **)&net->mib.net_statistics,
++ LINUX_MIB_MAX);
++ cpt_dump_snmp_stat(ctx, (void **)&net->mib.ip_statistics,
++ IPSTATS_MIB_MAX);
++ cpt_dump_snmp_stat(ctx, (void **)&net->mib.tcp_statistics,
++ TCP_MIB_MAX);
++ cpt_dump_snmp_stat(ctx, (void **)&net->mib.udp_statistics,
++ UDP_MIB_MAX);
++ cpt_dump_snmp_stat(ctx, (void **)&net->mib.icmp_statistics,
++ ICMP_MIB_MAX);
++ cpt_dump_snmp_stat(ctx, (void **)&net->mib.icmpmsg_statistics,
++ ICMPMSG_MIB_MAX);
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++ cpt_dump_snmp_stat(ctx, (void **)&ve->_ipv6_statistics,
++ IPSTATS_MIB_MAX);
++ cpt_dump_snmp_stat(ctx, (void **)&ve->_udp_stats_in6,
++ UDP_MIB_MAX);
++ cpt_dump_snmp_stat(ctx, (void **)&ve->_icmpv6_statistics,
++ ICMP6_MIB_MAX);
++#else
++ cpt_dump_snmp_stub(ctx);
++ cpt_dump_snmp_stub(ctx);
++ cpt_dump_snmp_stub(ctx);
++#endif
++ cpt_close_section(ctx);
++
++ return 0;
++}
++
+int cpt_dump_ifinfo(struct cpt_context * ctx)
+{
+ int err;
@@ -44659,6 +47683,8 @@
+ err = cpt_dump_route(ctx);
+ if (!err)
+ err = cpt_dump_iptables(ctx);
++ if (!err)
++ err = cpt_dump_snmp(ctx);
+ return err;
+}
diff --git a/kernel/cpt/cpt_net.h b/kernel/cpt/cpt_net.h
@@ -44676,10 +47702,10 @@
+int rst_restore_ip_conntrack(struct cpt_context * ctx);
diff --git a/kernel/cpt/cpt_obj.c b/kernel/cpt/cpt_obj.c
new file mode 100644
-index 0000000..7ab23d7
+index 0000000..341d2ab
--- /dev/null
+++ b/kernel/cpt/cpt_obj.c
-@@ -0,0 +1,162 @@
+@@ -0,0 +1,163 @@
+/*
+ *
+ * kernel/cpt/cpt_obj.c
@@ -44720,6 +47746,7 @@
+ obj->o_index = CPT_NOINDEX;
+ obj->o_obj = NULL;
+ obj->o_image = NULL;
++ obj->o_flags = 0;
+ ctx->objcount++;
+ }
+ return obj;
@@ -44844,10 +47871,10 @@
+}
diff --git a/kernel/cpt/cpt_obj.h b/kernel/cpt/cpt_obj.h
new file mode 100644
-index 0000000..7762623
+index 0000000..2dca39b
--- /dev/null
+++ b/kernel/cpt/cpt_obj.h
-@@ -0,0 +1,62 @@
+@@ -0,0 +1,64 @@
+#ifndef __CPT_OBJ_H_
+#define __CPT_OBJ_H_ 1
+
@@ -44867,6 +47894,8 @@
+ void *o_image;
+ void *o_parent;
+ struct list_head o_alist;
++ unsigned int o_flags;
++#define CPT_INODE_HARDLINKED 0x1
+} cpt_object_t;
+
+struct cpt_context;
@@ -44912,10 +47941,10 @@
+#endif /* __CPT_OBJ_H_ */
diff --git a/kernel/cpt/cpt_proc.c b/kernel/cpt/cpt_proc.c
new file mode 100644
-index 0000000..918fe2a
+index 0000000..a7d2d82
--- /dev/null
+++ b/kernel/cpt/cpt_proc.c
-@@ -0,0 +1,594 @@
+@@ -0,0 +1,623 @@
+/*
+ *
+ * kernel/cpt/cpt_proc.c
@@ -45001,6 +48030,8 @@
+
+void cpt_context_release(cpt_context_t *ctx)
+{
++ int i;
++
+ list_del(&ctx->ctx_list);
+ spin_unlock(&cpt_context_lock);
+
@@ -45027,6 +48058,8 @@
+ fput(ctx->errorfile);
+ ctx->errorfile = NULL;
+ }
++ for (i = 0; i < ctx->linkdirs_num; i++)
++ fput(ctx->linkdirs[i]);
+ if (ctx->error_msg) {
+ free_page((unsigned long)ctx->error_msg);
+ ctx->error_msg = NULL;
@@ -45122,7 +48155,7 @@
+ unsigned int src_flags, dst_flags = arg;
+
+ err = 0;
-+ src_flags = test_cpu_caps();
++ src_flags = test_cpu_caps_and_features();
+ test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_CMOV, "cmov", err);
+ test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_FXSR, "fxsr", err);
+ test_one_flag_old(src_flags, dst_flags, CPT_CPU_X86_SSE, "sse", err);
@@ -45244,6 +48277,26 @@
+ fput(ctx->file);
+ ctx->file = dfile;
+ break;
++ case CPT_LINKDIR_ADD:
++ if (ctx->linkdirs_num >= CPT_MAX_LINKDIRS) {
++ err = -EMLINK;
++ break;
++ }
++
++ dfile = fget(arg);
++ if (!dfile) {
++ err = -EBADFD;
++ break;
++ }
++
++ if (!S_ISDIR(dfile->f_dentry->d_inode->i_mode)) {
++ err = -ENOTDIR;
++ fput(dfile);
++ break;
++ }
++
++ ctx->linkdirs[ctx->linkdirs_num++] = dfile;
++ break;
+ case CPT_SET_ERRORFD:
+ if (arg >= 0) {
+ dfile = fget(arg);
@@ -45304,7 +48357,7 @@
+ break;
+ }
+ ctx->dst_cpu_flags = arg;
-+ ctx->src_cpu_flags = test_cpu_caps();
++ ctx->src_cpu_flags = test_cpu_caps_and_features();
+ break;
+ case CPT_SUSPEND:
+ if (cpt_context_lookup_veid(ctx->ve_id) ||
@@ -45378,6 +48431,11 @@
+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_IA64, "ia64", err);
+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL, "syscall", err);
+ test_one_flag(src_flags, dst_flags, CPT_CPU_X86_SYSCALL32, "syscall32", err);
++ if (dst_flags & (1 << CPT_SLM_DMPRST)) {
++ eprintk_ctx("SLM is enabled on destination node, but slm_dmprst module is not loaded\n");
++ err = 1;
++ }
++
+ if (src_flags & CPT_UNSUPPORTED_MASK)
+ err = 2;
+ break;
@@ -45512,10 +48570,10 @@
+module_exit(exit_cpt);
diff --git a/kernel/cpt/cpt_process.c b/kernel/cpt/cpt_process.c
new file mode 100644
-index 0000000..2afc171
+index 0000000..6314bee
--- /dev/null
+++ b/kernel/cpt/cpt_process.c
-@@ -0,0 +1,1383 @@
+@@ -0,0 +1,1379 @@
+/*
+ *
+ * kernel/cpt/cpt_process.c
@@ -46241,10 +49299,6 @@
+
+int cpt_check_unsupported(struct task_struct *tsk, cpt_context_t *ctx)
+{
-+ if (tsk->splice_pipe) {
-+ eprintk_ctx("splice is used by " CPT_FID "\n", CPT_TID(tsk));
-+ return -EBUSY;
-+ }
+#ifdef CONFIG_KEYS
+ if (tsk->cred->request_key_auth || tsk->cred->thread_keyring) {
+ eprintk_ctx("keys are used by " CPT_FID "\n", CPT_TID(tsk));
@@ -46321,7 +49375,7 @@
+ return -EBUSY;
+ }
+
-+ v->cpt_flags = tsk->flags&~(PF_FROZEN|PF_EXIT_RESTART);
++ v->cpt_flags = tsk->flags & CPT_TASK_FLAGS_MASK;
+ v->cpt_ptrace = tsk->ptrace;
+ v->cpt_prio = tsk->prio;
+ v->cpt_exit_code = tsk->exit_code;
@@ -46920,10 +49974,10 @@
+struct pid *alloc_vpid_safe(pid_t vnr);
diff --git a/kernel/cpt/cpt_socket.c b/kernel/cpt/cpt_socket.c
new file mode 100644
-index 0000000..939fb30
+index 0000000..3943b60
--- /dev/null
+++ b/kernel/cpt/cpt_socket.c
-@@ -0,0 +1,790 @@
+@@ -0,0 +1,802 @@
+/*
+ *
+ * kernel/cpt/cpt_socket.c
@@ -47105,7 +50159,7 @@
+}
+
+int cpt_dump_skb(int type, int owner, struct sk_buff *skb,
-+ struct cpt_context *ctx)
++ struct sock *sk, struct cpt_context *ctx)
+{
+ struct cpt_skb_image *v = cpt_get_buf(ctx);
+ loff_t saved_obj;
@@ -47129,7 +50183,19 @@
+ v->cpt_nh = skb_network_header(skb) - skb->head;
+ v->cpt_mac = skb_mac_header(skb) - skb->head;
+ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(v->cpt_cb));
-+ memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
++ memset(v->cpt_cb, 0, sizeof(v->cpt_cb));
++#if !defined(CONFIG_IPV6) && !defined(CONFIG_IPV6_MODULE)
++ if (sk->sk_protocol == IPPROTO_TCP) {
++ /* Save control block according to tcp_skb_cb with IPv6 */
++ BUG_ON(sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm) >
++ sizeof(v->cpt_cb) - sizeof(struct inet6_skb_parm));
++ memcpy(v->cpt_cb, skb->cb, sizeof(struct inet_skb_parm));
++ memcpy((void *)v->cpt_cb + sizeof(struct inet6_skb_parm),
++ skb->cb + sizeof(struct inet_skb_parm),
++ sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
++ } else
++#endif
++ memcpy(v->cpt_cb, skb->cb, sizeof(v->cpt_cb));
+ if (sizeof(skb->cb) > sizeof(v->cpt_cb)) {
+ int i;
+ for (i=sizeof(v->cpt_cb); i<sizeof(skb->cb); i++) {
@@ -47256,7 +50322,7 @@
+ }
+ }
+
-+ err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, ctx);
++ err = cpt_dump_skb(CPT_SKB_RQ, idx, skb, sk, ctx);
+ if (err)
+ return err;
+
@@ -47274,7 +50340,7 @@
+
+ skb = skb_peek(&sk->sk_write_queue);
+ while (skb && skb != (struct sk_buff*)&sk->sk_write_queue) {
-+ int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, ctx);
++ int err = cpt_dump_skb(CPT_SKB_WQ, idx, skb, sk, ctx);
+ if (err)
+ return err;
+
@@ -47438,7 +50504,7 @@
+ } else {
+ wprintk_ctx("af_unix path is too long: %s (%s)\n", path, ((char*)v->cpt_laddr)+2);
+ }
-+ err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, ctx);
++ err = cpt_verify_overmount(path, d, unix_sk(sk)->mnt, 1, ctx);
+ } else {
+ eprintk_ctx("cannot get path of an af_unix socket\n");
+ err = PTR_ERR(path);
@@ -47716,10 +50782,10 @@
+}
diff --git a/kernel/cpt/cpt_socket.h b/kernel/cpt/cpt_socket.h
new file mode 100644
-index 0000000..6489184
+index 0000000..9c64399
--- /dev/null
+++ b/kernel/cpt/cpt_socket.h
-@@ -0,0 +1,33 @@
+@@ -0,0 +1,37 @@
+struct sock;
+
+int cpt_collect_passedfds(cpt_context_t *);
@@ -47733,7 +50799,8 @@
+int cpt_dump_orphaned_sockets(struct cpt_context *ctx);
+
+int rst_sock_attr(loff_t *pos_p, struct sock *sk, cpt_context_t *ctx);
-+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx);
++struct sk_buff * rst_skb(struct sock *sk, loff_t *pos_p, __u32 *owner,
++ __u32 *queue, struct cpt_context *ctx);
+
+void cpt_unlock_sockets(cpt_context_t *);
+void cpt_kill_sockets(cpt_context_t *);
@@ -47742,11 +50809,14 @@
+int cpt_kill_socket(struct sock *, cpt_context_t *);
+int cpt_dump_socket_in(struct cpt_sock_image *, struct sock *, struct cpt_context*);
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *, struct cpt_context *ctx);
++int rst_listen_socket_in(struct sock *sk, struct cpt_sock_image *si,
++ loff_t pos, struct cpt_context *ctx);
+__u32 cpt_socket_fasync(struct file *file, struct cpt_context *ctx);
+int cpt_attach_accept(struct sock *lsk, struct sock *sk, cpt_context_t *);
+int rst_restore_synwait_queue(struct sock *sk, struct cpt_sock_image *si, loff_t pos, struct cpt_context *ctx);
+int cpt_dump_ofo_queue(int idx, struct sock *sk, struct cpt_context *ctx);
-+int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct cpt_context *ctx);
++int cpt_dump_skb(int type, int owner, struct sk_buff *skb, struct sock *sk,
++ struct cpt_context *ctx);
+int cpt_dump_mcfilter(struct sock *sk, struct cpt_context *ctx);
+
+int rst_sk_mcfilter_in(struct sock *sk, struct cpt_sockmc_image *v,
@@ -47755,7 +50825,7 @@
+ loff_t pos, cpt_context_t *ctx);
diff --git a/kernel/cpt/cpt_socket_in.c b/kernel/cpt/cpt_socket_in.c
new file mode 100644
-index 0000000..9c25d70
+index 0000000..d565745
--- /dev/null
+++ b/kernel/cpt/cpt_socket_in.c
@@ -0,0 +1,448 @@
@@ -47820,7 +50890,7 @@
+ while (skb && skb != (struct sk_buff*)&tp->out_of_order_queue) {
+ int err;
+
-+ err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, ctx);
++ err = cpt_dump_skb(CPT_SKB_OFOQ, idx, skb, sk, ctx);
+ if (err)
+ return err;
+
@@ -48073,7 +51143,7 @@
+ v->cpt_snt_isn = tcp_rsk(req)->snt_isn;
+ v->cpt_rmt_port = inet_rsk(req)->rmt_port;
+ v->cpt_mss = req->mss;
-+ // // v->cpt_family = (req->class == &or_ipv4 ? AF_INET : AF_INET6);
++ v->cpt_family = req->rsk_ops->family;
+ v->cpt_retrans = req->retrans;
+ v->cpt_snd_wscale = inet_rsk(req)->snd_wscale;
+ v->cpt_rcv_wscale = inet_rsk(req)->rcv_wscale;
@@ -48946,10 +52016,10 @@
+}
diff --git a/kernel/cpt/cpt_ubc.c b/kernel/cpt/cpt_ubc.c
new file mode 100644
-index 0000000..5746184
+index 0000000..0fc4f5f
--- /dev/null
+++ b/kernel/cpt/cpt_ubc.c
-@@ -0,0 +1,133 @@
+@@ -0,0 +1,135 @@
+/*
+ *
+ * kernel/cpt/cpt_ubc.c
@@ -49020,13 +52090,15 @@
+ v->cpt_next = CPT_NULL;
+ v->cpt_object = CPT_OBJ_UBC;
+ v->cpt_hdrlen = sizeof(*v);
-+ v->cpt_content = CPT_CONTENT_VOID;
++ v->cpt_content = CPT_CONTENT_ARRAY;
+
+ if (obj->o_parent != NULL)
+ v->cpt_parent = ((cpt_object_t *)obj->o_parent)->o_pos;
+ else
+ v->cpt_parent = CPT_NULL;
+ v->cpt_id = (obj->o_parent != NULL) ? bc->ub_uid : 0;
++ v->cpt_ub_resources = UB_RESOURCES;
++ BUILD_BUG_ON(ARRAY_SIZE(v->cpt_parms) < UB_RESOURCES * 2);
+ for (i = 0; i < UB_RESOURCES; i++) {
+ dump_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+ dump_one_bc_parm(v->cpt_parms + i * 2 + 1, bc->ub_store + i, 1);
@@ -49187,10 +52259,10 @@
+
diff --git a/kernel/cpt/rst_conntrack.c b/kernel/cpt/rst_conntrack.c
new file mode 100644
-index 0000000..4c31f32
+index 0000000..b863ac4
--- /dev/null
+++ b/kernel/cpt/rst_conntrack.c
-@@ -0,0 +1,283 @@
+@@ -0,0 +1,328 @@
+/*
+ *
+ * kernel/cpt/rst_conntrack.c
@@ -49249,17 +52321,33 @@
+ int index;
+};
+
-+static void decode_tuple(struct cpt_ipct_tuple *v, struct ip_conntrack_tuple *tuple, int dir)
++static int decode_tuple(struct cpt_ipct_tuple *v,
++ struct ip_conntrack_tuple *tuple, int dir,
++ cpt_context_t *ctx)
+{
+ tuple->dst.ip = v->cpt_dst;
+ tuple->dst.u.all = v->cpt_dstport;
-+ tuple->dst.protonum = v->cpt_protonum;
-+ tuple->dst.dir = v->cpt_dir;
-+ if (dir != tuple->dst.dir)
-+ wprintk("dir != tuple->dst.dir\n");
++ if (ctx->image_version < CPT_VERSION_16) {
++ /* In 2.6.9 kernel protonum has short type */
++ __u16 protonum = *(__u16 *)&v->cpt_protonum;
++ if (protonum > 0xff && protonum < 0xffff) {
++ eprintk_ctx("tuple: protonum > 255: %u\n", protonum);
++ return -EINVAL;
++ }
++ tuple->dst.protonum = protonum;
++ tuple->dst.dir = dir;
++ } else {
++ tuple->dst.protonum = v->cpt_protonum;
++ tuple->dst.dir = v->cpt_dir;
++ if (dir != tuple->dst.dir) {
++ eprintk_ctx("dir != tuple->dst.dir\n");
++ return -EINVAL;
++ }
++ }
+
+ tuple->src.ip = v->cpt_src;
+ tuple->src.u.all = v->cpt_srcport;
++ return 0;
+}
+
+
@@ -49314,16 +52402,13 @@
+ return -ENOMEM;
+ }
+
-+ if (ct->helper->timeout && !del_timer(&exp->timeout)) {
-+ /* Dying already. We can do nothing. */
++ if (decode_tuple(&v.cpt_tuple, &exp->tuple, 0, ctx) ||
++ decode_tuple(&v.cpt_mask, &exp->mask, 0, ctx)) {
++ ip_conntrack_expect_put(exp);
+ write_unlock_bh(&ip_conntrack_lock);
-+ dprintk_ctx("conntrack expectation is dying\n");
-+ continue;
++ return -EINVAL;
+ }
+
-+ decode_tuple(&v.cpt_tuple, &exp->tuple, 0);
-+ decode_tuple(&v.cpt_mask, &exp->mask, 0);
-+
+ exp->master = ct;
+ nf_conntrack_get(&ct->ct_general);
+ ip_conntrack_expect_insert(exp);
@@ -49337,11 +52422,12 @@
+ } else
+#endif
+ if (ct->helper->timeout) {
-+ exp->timeout.expires = jiffies + v.cpt_timeout;
-+ add_timer(&exp->timeout);
++ mod_timer(&exp->timeout, jiffies + v.cpt_timeout);
+ }
+ write_unlock_bh(&ip_conntrack_lock);
+
++ ip_conntrack_expect_put(exp);
++
+ pos += v.cpt_next;
+ }
+ return 0;
@@ -49359,8 +52445,11 @@
+ if (c == NULL)
+ return -ENOMEM;
+
-+ decode_tuple(&ci->cpt_tuple[0], &orig, 0);
-+ decode_tuple(&ci->cpt_tuple[1], &repl, 1);
++ if (decode_tuple(&ci->cpt_tuple[0], &orig, 0, ctx) ||
++ decode_tuple(&ci->cpt_tuple[1], &repl, 1, ctx)) {
++ kfree(c);
++ return -EINVAL;
++ }
+
+ conntrack = ip_conntrack_alloc(&orig, &repl, get_exec_env()->_ip_conntrack->ub);
+ if (!conntrack || IS_ERR(conntrack)) {
@@ -49373,14 +52462,15 @@
+ *ct_list = c;
+ c->index = ci->cpt_index;
+
-+ decode_tuple(&ci->cpt_tuple[0], &conntrack->tuplehash[0].tuple, 0);
-+ decode_tuple(&ci->cpt_tuple[1], &conntrack->tuplehash[1].tuple, 1);
-+
+ conntrack->status = ci->cpt_status;
+
+ memcpy(&conntrack->proto, ci->cpt_proto_data, sizeof(conntrack->proto));
+ memcpy(&conntrack->help, ci->cpt_help_data, sizeof(conntrack->help));
+
++#if defined(CONFIG_IP_NF_CONNTRACK_MARK)
++ conntrack->mark = ci->cpt_mark;
++#endif
++
+#ifdef CONFIG_IP_NF_NAT_NEEDED
+#if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \
+ defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE)
@@ -49412,9 +52502,34 @@
+ if (err == 0 && ci->cpt_next > ci->cpt_hdrlen)
+ err = undump_expect_list(conntrack, ci, pos, *ct_list, ctx);
+
++ if (conntrack->helper)
++ ip_conntrack_helper_put(conntrack->helper);
++
+ return err;
+}
+
++static void convert_conntrack_image(struct cpt_ip_conntrack_image *ci)
++{
++ struct cpt_ip_conntrack_image_compat img;
++
++ memcpy(&img, ci, sizeof(struct cpt_ip_conntrack_image_compat));
++ /*
++ * Size of cpt_help_data in 2.6.9 kernel is 16 bytes,
++ * in 2.6.18 cpt_help_data size is 24 bytes, so zero the rest 8 bytes
++ */
++ memset(ci->cpt_help_data + 4, 0, 8);
++ ci->cpt_initialized = img.cpt_initialized;
++ ci->cpt_num_manips = img.cpt_num_manips;
++ memcpy(ci->cpt_nat_manips, img.cpt_nat_manips, sizeof(img.cpt_nat_manips));
++ memcpy(ci->cpt_nat_seq, img.cpt_nat_seq, sizeof(img.cpt_nat_seq));
++ ci->cpt_masq_index = img.cpt_masq_index;
++ /* Id will be assigned in ip_conntrack_hash_insert(), so make it 0 here */
++ ci->cpt_id = 0;
++ /* mark was not supported in 2.6.9, so set it to default 0 value */
++ ci->cpt_mark = 0;
++
++}
++
+int rst_restore_ip_conntrack(struct cpt_context * ctx)
+{
+ int err = 0;
@@ -49445,6 +52560,8 @@
+ err = rst_get_object(CPT_OBJ_NET_CONNTRACK, sec, &ci, ctx);
+ if (err)
+ break;
++ if (ctx->image_version < CPT_VERSION_16)
++ convert_conntrack_image(&ci);
+ err = undump_one_ct(&ci, sec, &ct_list, ctx);
+ if (err)
+ break;
@@ -49476,10 +52593,10 @@
+#endif
diff --git a/kernel/cpt/rst_context.c b/kernel/cpt/rst_context.c
new file mode 100644
-index 0000000..c68e807
+index 0000000..0007197
--- /dev/null
+++ b/kernel/cpt/rst_context.c
-@@ -0,0 +1,330 @@
+@@ -0,0 +1,331 @@
+/*
+ *
+ * kernel/cpt/rst_context.c
@@ -49662,8 +52779,9 @@
+ ctx->start_time.tv_nsec = h.cpt_start_nsec;
+ ctx->kernel_config_flags = h.cpt_kernel_config[0];
+ ctx->iptables_mask = h.cpt_iptables_mask;
-+ if (h.cpt_image_version > CPT_VERSION_32 ||
-+ CPT_VERSION_MINOR(h.cpt_image_version) > 1) {
++ if (h.cpt_image_version > CPT_CURRENT_VERSION ||
++ CPT_VERSION_MINOR(h.cpt_image_version) >
++ CPT_VERSION_MINOR(CPT_CURRENT_VERSION)) {
+ eprintk_ctx("Unknown image version: %x. Can't restore.\n",
+ h.cpt_image_version);
+ err = -EINVAL;
@@ -49987,10 +53105,10 @@
+}
diff --git a/kernel/cpt/rst_files.c b/kernel/cpt/rst_files.c
new file mode 100644
-index 0000000..4b21b04
+index 0000000..a84e3d3
--- /dev/null
+++ b/kernel/cpt/rst_files.c
-@@ -0,0 +1,1698 @@
+@@ -0,0 +1,1779 @@
+/*
+ *
+ * kernel/cpt/rst_files.c
@@ -50030,6 +53148,7 @@
+#include <linux/fdtable.h>
+#include <linux/shm.h>
+#include <linux/signalfd.h>
++#include <linux/proc_fs.h>
+
+#include "cpt_obj.h"
+#include "cpt_context.h"
@@ -50523,7 +53642,7 @@
+ fput(file);
+ file = dentry_open(dget(file->f_dentry),
+ mntget(file->f_vfsmnt),
-+ O_WRONLY, NULL);
++ O_WRONLY | O_LARGEFILE, NULL);
+ if (IS_ERR(file)) {
+ __cpt_release_buf(ctx);
+ return PTR_ERR(file);
@@ -50825,6 +53944,7 @@
+ struct cpt_file_image fi;
+ __u8 *name = NULL;
+ struct file *file;
++ struct proc_dir_entry *proc_dead_file;
+ int flags;
+
+ obj = lookup_cpt_obj_bypos(CPT_OBJ_FILE, pos, ctx);
@@ -50896,6 +54016,12 @@
+ err = -EINVAL;
+ goto err_out;
+ }
++ if ((fi.cpt_lflags & CPT_DENTRY_HARDLINKED) &&
++ !ctx->hardlinked_on) {
++ eprintk_ctx("Open hardlinked is off\n");
++ err = -EPERM;
++ goto err_out;
++ }
+ goto open_file;
+ }
+ }
@@ -50963,8 +54089,32 @@
+ goto map_file;
+ }
+
++ /* This hook is needed to open file /proc/<pid>/<somefile>
++ * but there is no proccess with pid <pid>.
++ */
++ proc_dead_file = NULL;
++ if (fi.cpt_lflags & CPT_DENTRY_PROCPID_DEAD) {
++ sprintf(name, "/proc/rst_dead_pid_file_%d", task_pid_vnr(current));
++
++ proc_dead_file = create_proc_entry(name + 6, S_IRUGO|S_IWUGO,
++ NULL);
++ if (!proc_dead_file) {
++ eprintk_ctx("can't create proc entry %s\n", name);
++ err = -ENOMEM;
++ goto err_out;
++ }
++#ifdef CONFIG_PROC_FS
++ proc_dead_file->proc_fops = &dummy_proc_pid_file_operations;
++#endif
++ }
++
+ file = filp_open(name, flags, 0);
+
++ if (proc_dead_file) {
++ remove_proc_entry(proc_dead_file->name, NULL);
++ if (!IS_ERR(file))
++ d_drop(file->f_dentry);
++ }
+map_file:
+ if (!IS_ERR(file)) {
+ fixup_file_flags(file, &fi, was_dentry_open, pos, ctx);
@@ -51009,7 +54159,8 @@
+ goto err_put;
+ }
+ } else {
-+ if (fi.cpt_lflags & CPT_DENTRY_PROC) {
++ if ((fi.cpt_lflags & CPT_DENTRY_PROC) &&
++ !(fi.cpt_lflags & CPT_DENTRY_PROCPID_DEAD)) {
+ dprintk_ctx("rst_file /proc delayed\n");
+ file = NULL;
+ } else if (name)
@@ -51073,7 +54224,8 @@
+extern int expand_fdtable(struct files_struct *files, int nr);
+
+
-+int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++static int rst_files(struct cpt_task_image *ti, struct cpt_context *ctx,
++ int from, int to)
+{
+ struct cpt_files_struct_image fi;
+ struct files_struct *f = current->files;
@@ -51088,6 +54240,14 @@
+ return 0;
+ }
+
++ if (from == 3) {
++ err = rst_get_object(CPT_OBJ_FILES, ti->cpt_files, &fi, ctx);
++ if (err)
++ return err;
++
++ goto just_do_it;
++ }
++
+ obj = lookup_cpt_obj_bypos(CPT_OBJ_FILES, ti->cpt_files, ctx);
+ if (obj) {
+ if (obj->o_obj != f) {
@@ -51113,6 +54273,7 @@
+ return err;
+ }
+
++just_do_it:
+ pos = ti->cpt_files + fi.cpt_hdrlen;
+ endpos = ti->cpt_files + fi.cpt_next;
+ while (pos < endpos) {
@@ -51122,6 +54283,9 @@
+ err = rst_get_object(CPT_OBJ_FILEDESC, pos, &fdi, ctx);
+ if (err)
+ return err;
++ if (fdi.cpt_fd < from || fdi.cpt_fd > to)
++ goto skip;
++
+ filp = rst_file(fdi.cpt_file, fdi.cpt_fd, ctx);
+ if (IS_ERR(filp)) {
+ eprintk_ctx("rst_file: %ld %Lu\n", PTR_ERR(filp),
@@ -51139,6 +54303,8 @@
+ if (fdi.cpt_flags&CPT_FD_FLAG_CLOSEEXEC)
+ FD_SET(fdi.cpt_fd, f->fdt->close_on_exec);
+ }
++
++skip:
+ pos += fdi.cpt_next;
+ }
+ f->next_fd = fi.cpt_next_fd;
@@ -51151,6 +54317,16 @@
+ return 0;
+}
+
++int rst_files_complete(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++ return rst_files(ti, ctx, (ti->cpt_pid == 1) ? 3 : 0, INT_MAX);
++}
++
++int rst_files_std(struct cpt_task_image *ti, struct cpt_context *ctx)
++{
++ return rst_files(ti, ctx, 0, 2);
++}
++
+int rst_do_filejobs(cpt_context_t *ctx)
+{
+ struct filejob *j;
@@ -51260,8 +54436,31 @@
+ return err;
+
+ file = rst_file(*pos, -2, ctx);
-+ if (IS_ERR(file))
++ if (IS_ERR(file)) {
++ if (PTR_ERR(file) == -EINVAL && S_ISLNK(fi.cpt_i_mode)) {
++ /* One special case: inotify on symlink */
++ struct nameidata nd;
++ __u8 *name = NULL;
++
++ if (fi.cpt_next > fi.cpt_hdrlen)
++ name = rst_get_name(*pos + sizeof(fi), ctx);
++ if (!name) {
++ eprintk_ctx("can't get name for file\n");
++ return -EINVAL;
++ }
++ if ((err = path_lookup(name, 0, &nd)) != 0) {
++ eprintk_ctx("path_lookup %s: %d\n", name, err);
++ rst_put_name(name, ctx);
++ return -EINVAL;
++ }
++ *dp = nd.path.dentry;
++ *mp = nd.path.mnt;
++ *pos += fi.cpt_next;
++ rst_put_name(name, ctx);
++ return 0;
++ }
+ return PTR_ERR(file);
++ }
+
+ *dp = dget(file->f_dentry);
+ *mp = mntget(file->f_vfsmnt);
@@ -53041,10 +56240,10 @@
+}
diff --git a/kernel/cpt/rst_net.c b/kernel/cpt/rst_net.c
new file mode 100644
-index 0000000..dc5de80
+index 0000000..4c8d482
--- /dev/null
+++ b/kernel/cpt/rst_net.c
-@@ -0,0 +1,628 @@
+@@ -0,0 +1,745 @@
+/*
+ *
+ * kernel/cpt/rst_net.c
@@ -53638,6 +56837,7 @@
+ err = (status & 0xff00) >> 8;
+ if (err != 0) {
+ eprintk_ctx("iptables-restore exited with %d\n", err);
++ eprintk_ctx("Most probably some iptables modules are not loaded\n");
+ err = -EINVAL;
+ }
+ } else {
@@ -53658,6 +56858,120 @@
+ return err;
+}
+
++static int rst_restore_snmp_stat(struct cpt_context *ctx, void *mib[], int n,
++ loff_t *ppos, loff_t endpos)
++{
++ int err, in, i;
++ struct cpt_object_hdr o;
++ __u32 *stats;
++
++ err = rst_get_object(CPT_OBJ_BITS, *ppos, &o, ctx);
++ if (err)
++ return err;
++
++ in = o.cpt_next - o.cpt_hdrlen;
++ if (in >= PAGE_SIZE - 4) {
++ eprintk_ctx("Too long SNMP buf (%d)\n", in);
++ return -EINVAL;
++ }
++
++ if (o.cpt_content != CPT_CONTENT_DATA) {
++ if (o.cpt_content == CPT_CONTENT_VOID)
++ return 1;
++
++ eprintk_ctx("Corrupted SNMP stats\n");
++ return -EINVAL;
++ }
++
++ stats = cpt_get_buf(ctx);
++ err = ctx->pread(stats, in, ctx, (*ppos) + o.cpt_hdrlen);
++ if (err)
++ goto out;
++
++ in /= sizeof(*stats);
++ if (in > n)
++ wprintk_ctx("SNMP stats trimmed\n");
++ else
++ n = in;
++
++ for (i = 0; i < n; i++)
++ *((unsigned long *)(per_cpu_ptr(mib[0], 0)) + i) = stats[i];
++
++ *ppos += o.cpt_next;
++ if (*ppos < endpos)
++ err = 1; /* go on restoring */
++out:
++ cpt_release_buf(ctx);
++ return err;
++}
++
++static int rst_restore_snmp(struct cpt_context *ctx)
++{
++ int err;
++ loff_t sec = ctx->sections[CPT_SECT_SNMP_STATS];
++ loff_t endsec;
++ struct cpt_section_hdr h;
++ struct ve_struct *ve;
++ struct net *net;
++
++ if (sec == CPT_NULL)
++ return 0;
++
++ err = ctx->pread(&h, sizeof(h), ctx, sec);
++ if (err)
++ return err;
++ if (h.cpt_section != CPT_SECT_SNMP_STATS || h.cpt_hdrlen < sizeof(h))
++ return -EINVAL;
++
++ ve = get_exec_env();
++ net = ve->ve_netns;
++ endsec = sec + h.cpt_next;
++ sec += h.cpt_hdrlen;
++ if (sec >= endsec)
++ goto out;
++
++ err = rst_restore_snmp_stat(ctx, (void **)&net->mib.net_statistics,
++ LINUX_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++ err = rst_restore_snmp_stat(ctx, (void **)&net->mib.ip_statistics,
++ IPSTATS_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++ err = rst_restore_snmp_stat(ctx, (void **)&net->mib.tcp_statistics,
++ TCP_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++ err = rst_restore_snmp_stat(ctx, (void **)&net->mib.udp_statistics,
++ UDP_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++ err = rst_restore_snmp_stat(ctx, (void **)&net->mib.icmp_statistics,
++ ICMP_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++ err = rst_restore_snmp_stat(ctx, (void **)&net->mib.icmpmsg_statistics,
++ ICMPMSG_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++ err = rst_restore_snmp_stat(ctx, (void **)&ve->_ipv6_statistics,
++ IPSTATS_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++ err = rst_restore_snmp_stat(ctx, (void **)&ve->_udp_stats_in6,
++ UDP_MIB_MAX, &sec, endsec);
++ if (err <= 0)
++ goto out;
++ err = rst_restore_snmp_stat(ctx, (void **)&ve->_icmpv6_statistics,
++ ICMP6_MIB_MAX, &sec, endsec);
++#endif
++ if (err == 1)
++ err = 0;
++out:
++ return err;
++}
++
+int rst_restore_net(struct cpt_context *ctx)
+{
+ int err;
@@ -53671,14 +56985,16 @@
+ err = rst_restore_iptables(ctx);
+ if (!err)
+ err = rst_restore_ip_conntrack(ctx);
++ if (!err)
++ err = rst_restore_snmp(ctx);
+ return err;
+}
diff --git a/kernel/cpt/rst_proc.c b/kernel/cpt/rst_proc.c
new file mode 100644
-index 0000000..2b0b283
+index 0000000..beaaa3f
--- /dev/null
+++ b/kernel/cpt/rst_proc.c
-@@ -0,0 +1,579 @@
+@@ -0,0 +1,582 @@
+/*
+ *
+ * kernel/cpt/rst_proc.c
@@ -53887,7 +57203,7 @@
+ unlock_kernel();
+
+ if (cmd == CPT_TEST_CAPS) {
-+ err = test_cpu_caps();
++ err = test_cpu_caps_and_features();
+ goto out_lock;
+ }
+
@@ -54087,6 +57403,9 @@
+ fput(ctx->errorfile);
+ ctx->errorfile = dfile;
+ break;
++ case CPT_HARDLNK_ON:
++ ctx->hardlinked_on = 1;
++ break;
+ case CPT_SET_VEID:
+ if (ctx->ctx_state > 0) {
+ err = -EBUSY;
@@ -54260,10 +57579,10 @@
+module_exit(exit_rst);
diff --git a/kernel/cpt/rst_process.c b/kernel/cpt/rst_process.c
new file mode 100644
-index 0000000..19915b3
+index 0000000..000e0b9
--- /dev/null
+++ b/kernel/cpt/rst_process.c
-@@ -0,0 +1,1614 @@
+@@ -0,0 +1,1661 @@
+/*
+ *
+ * kernel/cpt/rst_process.c
@@ -54687,8 +58006,13 @@
+ }
+ }
+
-+ if (si->cpt_curr_target)
++ if (si->cpt_curr_target) {
+ current->signal->curr_target = find_task_by_vpid(si->cpt_curr_target);
++ if (current->signal->curr_target == NULL) {
++ wprintk_ctx("oops, curr_target=NULL, pid=%u\n", si->cpt_curr_target);
++ current->signal->curr_target = current;
++ }
++ }
+ current->signal->flags = 0;
+ *exiting = si->cpt_group_exit;
+ current->signal->group_exit_code = si->cpt_group_exit_code;
@@ -55449,7 +58773,7 @@
+#ifdef CONFIG_X86_32
+ unsigned int flags;
+
-+ flags = test_cpu_caps();
++ flags = test_cpu_caps_and_features();
+
+ /* if cpu does not support sse2 mask 6 bit (DAZ flag) and 16-31 bits
+ in MXCSR to avoid general protection fault */
@@ -55462,6 +58786,32 @@
+#include <asm/i387.h>
+#endif
+
++#define RLIM_INFINITY32 0xffffffff
++#define RLIM_INFINITY64 (~0ULL)
++
++#ifdef CONFIG_X86_64
++#define rst_rlim_32_to_64(a, i, t, im) \
++do { \
++ if (im->cpt_rlim_##a[i] == RLIM_INFINITY32) \
++ t->signal->rlim[i].rlim_##a = RLIM_INFINITY64; \
++ else \
++ t->signal->rlim[i].rlim_##a = im->cpt_rlim_##a[i]; \
++} while (0)
++#elif defined(CONFIG_X86_32)
++#define rst_rlim_64_to_32(a, i, t, im) \
++do { \
++ if (im->cpt_rlim_##a[i] == RLIM_INFINITY64) \
++ t->signal->rlim[i].rlim_##a = RLIM_INFINITY32; \
++ else if (im->cpt_rlim_##a[i] > RLIM_INFINITY32) { \
++ eprintk_ctx("rlimit %Lu is too high for 32-bit task, " \
++ "dump file is corrupted\n", \
++ im->cpt_rlim_##a[i]); \
++ return -EINVAL; \
++ } else \
++ t->signal->rlim[i].rlim_##a = im->cpt_rlim_##a[i]; \
++} while (0)
++#endif
++
+int rst_restore_process(struct cpt_context *ctx)
+{
+ cpt_object_t *obj;
@@ -55574,8 +58924,23 @@
+ tsk->signal->cmaj_flt = ti->cpt_cmaj_flt;
+
+ for (i=0; i<RLIM_NLIMITS; i++) {
-+ tsk->signal->rlim[i].rlim_cur = ti->cpt_rlim_cur[i];
-+ tsk->signal->rlim[i].rlim_max = ti->cpt_rlim_max[i];
++#ifdef CONFIG_X86_64
++ if (ctx->image_arch == CPT_OS_ARCH_I386) {
++ rst_rlim_32_to_64(cur, i, tsk, ti);
++ rst_rlim_32_to_64(max, i, tsk, ti);
++ } else
++#elif defined(CONFIG_X86_32)
++ if (ctx->image_arch == CPT_OS_ARCH_EMT64) {
++ rst_rlim_64_to_32(cur, i, tsk, ti);
++ rst_rlim_64_to_32(max, i, tsk, ti);
++ } else
++#endif
++ {
++ tsk->signal->rlim[i].rlim_cur =
++ ti->cpt_rlim_cur[i];
++ tsk->signal->rlim[i].rlim_max =
++ ti->cpt_rlim_max[i];
++ }
+ }
+ }
+#endif
@@ -55809,7 +59174,8 @@
+ }
+
+ tsk->ptrace = ti->cpt_ptrace;
-+ tsk->flags = ti->cpt_flags & ~PF_FROZEN;
++ tsk->flags = (tsk->flags & PF_USED_MATH) |
++ (ti->cpt_flags & CPT_TASK_FLAGS_MASK);
+ clear_tsk_thread_flag(tsk, TIF_FREEZE);
+ tsk->exit_signal = ti->cpt_exit_signal;
+
@@ -55880,10 +59246,10 @@
+}
diff --git a/kernel/cpt/rst_socket.c b/kernel/cpt/rst_socket.c
new file mode 100644
-index 0000000..22e1d1b
+index 0000000..78cc4ff
--- /dev/null
+++ b/kernel/cpt/rst_socket.c
-@@ -0,0 +1,918 @@
+@@ -0,0 +1,993 @@
+/*
+ *
+ * kernel/cpt/rst_socket.c
@@ -56121,7 +59487,7 @@
+ struct sk_buff *skb;
+ __u32 type;
+
-+ skb = rst_skb(&pos, NULL, &type, ctx);
++ skb = rst_skb(sk, &pos, NULL, &type, ctx);
+ if (IS_ERR(skb)) {
+ if (PTR_ERR(skb) == -EINVAL) {
+ int err;
@@ -56374,8 +59740,10 @@
+
+ setup_sock_common(sock->sk, si, pos, ctx);
+
-+ if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6)
++ if (si->cpt_family == AF_INET || si->cpt_family == AF_INET6) {
++ rst_listen_socket_in(sock->sk, si, pos, ctx);
+ rst_restore_synwait_queue(sock->sk, si, pos, ctx);
++ }
+
+ return 0;
+
@@ -56456,7 +59824,53 @@
+ return err;
+}
+
-+struct sk_buff * rst_skb(loff_t *pos_p, __u32 *owner, __u32 *queue, struct cpt_context *ctx)
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++static void rst_tcp_cb_ipv4_to_ipv6(struct cpt_skb_image *v, struct sk_buff *skb)
++{
++ BUG_ON(sizeof(skb->cb) - sizeof(struct inet6_skb_parm) <
++ sizeof(struct tcp_skb_cb) - sizeof(struct inet6_skb_parm));
++ memcpy(skb->cb, v->cpt_cb, sizeof(struct inet_skb_parm));
++ memcpy(skb->cb + sizeof(struct inet6_skb_parm),
++ (void *)v->cpt_cb + sizeof(struct inet_skb_parm),
++ sizeof(struct tcp_skb_cb) - sizeof(struct inet6_skb_parm));
++}
++#else
++static void rst_tcp_cb_ipv6_to_ipv4(struct cpt_skb_image *v, struct sk_buff *skb)
++{
++ BUG_ON(sizeof(v->cpt_cb) - sizeof(struct inet6_skb_parm) <
++ sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
++ memcpy(skb->cb, v->cpt_cb, sizeof(struct inet_skb_parm));
++ memcpy(skb->cb + sizeof(struct inet_skb_parm),
++ (void *)v->cpt_cb + sizeof(struct inet6_skb_parm),
++ sizeof(struct tcp_skb_cb) - sizeof(struct inet_skb_parm));
++}
++#endif
++
++struct tcp_skb_cb_ipv6 {
++ union {
++ struct inet_skb_parm h4;
++ struct inet6_skb_parm h6;
++ } header;
++ __u32 seq;
++ __u32 end_seq;
++ __u32 when;
++ __u8 flags;
++ __u8 sacked;
++ __u16 urg_ptr;
++ __u32 ack_seq;
++};
++
++#define check_tcp_cb_conv(op1, op2) do { \
++ if (!ctx->tcp_cb_convert) \
++ ctx->tcp_cb_convert = CPT_TCP_CB_##op1; \
++ else if (ctx->tcp_cb_convert == CPT_TCP_CB_##op2) { \
++ kfree_skb(skb); \
++ return ERR_PTR(-EINVAL); \
++ } \
++} while (0)
++
++struct sk_buff * rst_skb(struct sock *sk, loff_t *pos_p, __u32 *owner,
++ __u32 *queue, struct cpt_context *ctx)
+{
+ int err;
+ struct sk_buff *skb;
@@ -56490,7 +59904,34 @@
+ skb->mac_header = skb->head + v.cpt_mac;
+#endif
+ BUILD_BUG_ON(sizeof(skb->cb) < sizeof(v.cpt_cb));
-+ memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
++ if (sk->sk_protocol == IPPROTO_TCP) {
++ /*
++ * According to Alexey all packets in queue have non-zero
++ * flags, as at least TCPCB_FLAG_ACK is set on them.
++ * Luckily for us, offset of field flags in tcp_skb_cb struct
++ * with IPv6 is higher then total size of tcp_skb_cb struct
++ * without IPv6.
++ */
++ if (ctx->image_version >= CPT_VERSION_18_2 ||
++ ((struct tcp_skb_cb_ipv6 *)&v.cpt_cb)->flags) {
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++ check_tcp_cb_conv(NOT_CONV, CONV);
++ memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
++#else
++ check_tcp_cb_conv(CONV, NOT_CONV);
++ rst_tcp_cb_ipv6_to_ipv4(&v, skb);
++#endif
++ } else {
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++ check_tcp_cb_conv(CONV, NOT_CONV);
++ rst_tcp_cb_ipv4_to_ipv6(&v, skb);
++#else
++ check_tcp_cb_conv(NOT_CONV, CONV);
++ memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
++#endif
++ }
++ } else
++ memcpy(skb->cb, v.cpt_cb, sizeof(v.cpt_cb));
+ skb->mac_len = v.cpt_mac_len;
+
+ skb->csum = v.cpt_csum;
@@ -56568,7 +60009,7 @@
+ struct sock *owner_sk;
+ __u32 owner;
+
-+ skb = rst_skb(&pos, &owner, NULL, ctx);
++ skb = rst_skb(sk, &pos, &owner, NULL, ctx);
+ if (IS_ERR(skb)) {
+ if (PTR_ERR(skb) == -EINVAL) {
+ int err;
@@ -56804,10 +60245,10 @@
+
diff --git a/kernel/cpt/rst_socket_in.c b/kernel/cpt/rst_socket_in.c
new file mode 100644
-index 0000000..f63df90
+index 0000000..08bf907
--- /dev/null
+++ b/kernel/cpt/rst_socket_in.c
-@@ -0,0 +1,492 @@
+@@ -0,0 +1,578 @@
+/*
+ *
+ * kernel/cpt/rst_socket_in.c
@@ -56869,7 +60310,7 @@
+ struct sk_buff *skb;
+ __u32 type;
+
-+ skb = rst_skb(&pos, NULL, &type, ctx);
++ skb = rst_skb(sk, &pos, NULL, &type, ctx);
+ if (IS_ERR(skb)) {
+ if (PTR_ERR(skb) == -EINVAL) {
+ int err;
@@ -57104,6 +60545,62 @@
+ return 0;
+}
+
++static void rst_listen_socket_tcp(struct cpt_sock_image *si, struct sock *sk)
++{
++ struct tcp_sock *tp = tcp_sk(sk);
++
++ tp->rcv_tstamp = tcp_jiffies_import(si->cpt_rcv_tstamp);
++ tp->lsndtime = tcp_jiffies_import(si->cpt_lsndtime);
++ tp->tcp_header_len = si->cpt_tcp_header_len;
++ inet_csk(sk)->icsk_accept_queue.rskq_defer_accept = si->cpt_defer_accept;
++
++ /* Next options are inherited by children */
++ tp->mss_cache = si->cpt_mss_cache;
++ inet_csk(sk)->icsk_ext_hdr_len = si->cpt_ext_header_len;
++ tp->reordering = si->cpt_reordering;
++ tp->nonagle = si->cpt_nonagle;
++ tp->keepalive_probes = si->cpt_keepalive_probes;
++ tp->rx_opt.user_mss = si->cpt_user_mss;
++ inet_csk(sk)->icsk_syn_retries = si->cpt_syn_retries;
++ tp->keepalive_time = si->cpt_keepalive_time;
++ tp->keepalive_intvl = si->cpt_keepalive_intvl;
++ tp->linger2 = si->cpt_linger2;
++}
++
++int rst_listen_socket_in( struct sock *sk, struct cpt_sock_image *si,
++ loff_t pos, struct cpt_context *ctx)
++{
++ struct inet_sock *inet = inet_sk(sk);
++
++ lock_sock(sk);
++
++ inet->uc_ttl = si->cpt_uc_ttl;
++ inet->tos = si->cpt_tos;
++ inet->cmsg_flags = si->cpt_cmsg_flags;
++ inet->pmtudisc = si->cpt_pmtudisc;
++ inet->recverr = si->cpt_recverr;
++ inet->freebind = si->cpt_freebind;
++ inet->id = si->cpt_idcounter;
++
++ if (sk->sk_family == AF_INET6) {
++ struct ipv6_pinfo *np = inet6_sk(sk);
++
++ np->frag_size = si->cpt_frag_size6;
++ np->hop_limit = si->cpt_hop_limit6;
++
++ np->rxopt.all = si->cpt_rxopt6;
++ np->mc_loop = si->cpt_mc_loop6;
++ np->recverr = si->cpt_recverr6;
++ np->pmtudisc = si->cpt_pmtudisc6;
++ np->ipv6only = si->cpt_ipv6only6;
++ }
++
++ if (sk->sk_protocol == IPPROTO_TCP)
++ rst_listen_socket_tcp(si, sk);
++
++ release_sock(sk);
++ return 0;
++}
+
+int rst_socket_in(struct cpt_sock_image *si, loff_t pos, struct sock *sk,
+ struct cpt_context *ctx)
@@ -57215,26 +60712,49 @@
+ loff_t pos, struct cpt_context *ctx)
+{
+ int err;
-+ loff_t end = si->cpt_next;
++ loff_t end = pos + si->cpt_next;
+
+ pos += si->cpt_hdrlen;
++
++ lock_sock(sk);
+ while (pos < end) {
+ struct cpt_openreq_image oi;
+
+ err = rst_get_object(CPT_OBJ_OPENREQ, pos, &oi, ctx);
+ if (err) {
+ err = rst_sock_attr(&pos, sk, ctx);
-+ if (err)
++ if (err) {
++ release_sock(sk);
+ return err;
++ }
++
+ continue;
+ }
+
+ if (oi.cpt_object == CPT_OBJ_OPENREQ) {
-+ struct request_sock *req = reqsk_alloc(&tcp_request_sock_ops);
-+ if (req == NULL)
++ struct request_sock *req;
++
++ if (oi.cpt_family == AF_INET6 &&
++ sk->sk_family != AF_INET6)
++ /* related to non initialized cpt_family bug */
++ goto next;
++
++ if (oi.cpt_family == AF_INET6) {
++#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++ req = reqsk_alloc(&tcp6_request_sock_ops);
++#else
++ release_sock(sk);
++ return -EINVAL;
++#endif
++ } else {
++ req = reqsk_alloc(&tcp_request_sock_ops);
++ }
++
++ if (req == NULL) {
++ release_sock(sk);
+ return -ENOMEM;
++ }
+
-+ memset(req, 0, sizeof(*req));
+ tcp_rsk(req)->rcv_isn = oi.cpt_rcv_isn;
+ tcp_rsk(req)->snt_isn = oi.cpt_snt_isn;
+ inet_rsk(req)->rmt_port = oi.cpt_rmt_port;
@@ -57247,26 +60767,33 @@
+ inet_rsk(req)->wscale_ok = oi.cpt_wscale_ok;
+ inet_rsk(req)->ecn_ok = oi.cpt_ecn_ok;
+ inet_rsk(req)->acked = oi.cpt_acked;
++ inet_rsk(req)->opt = NULL;
+ req->window_clamp = oi.cpt_window_clamp;
+ req->rcv_wnd = oi.cpt_rcv_wnd;
+ req->ts_recent = oi.cpt_ts_recent;
+ req->expires = jiffies_import(oi.cpt_expires);
++ req->sk = NULL;
++ req->secid = 0;
++ req->peer_secid = 0;
+
-+ if (oi.cpt_family == AF_INET) {
-+ memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
-+ memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
-+ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
-+ } else {
++ if (oi.cpt_family == AF_INET6) {
+#if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
++ inet6_rsk(req)->pktopts = NULL;
+ memcpy(&inet6_rsk(req)->loc_addr, oi.cpt_loc_addr, 16);
+ memcpy(&inet6_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 16);
+ inet6_rsk(req)->iif = oi.cpt_iif;
+ inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+#endif
++ } else {
++ memcpy(&inet_rsk(req)->loc_addr, oi.cpt_loc_addr, 4);
++ memcpy(&inet_rsk(req)->rmt_addr, oi.cpt_rmt_addr, 4);
++ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
+ }
+ }
++next:
+ pos += oi.cpt_next;
+ }
++ release_sock(sk);
+ return 0;
+}
+
@@ -57302,10 +60829,10 @@
+#endif
diff --git a/kernel/cpt/rst_sysvipc.c b/kernel/cpt/rst_sysvipc.c
new file mode 100644
-index 0000000..0f21493
+index 0000000..b5e62a7
--- /dev/null
+++ b/kernel/cpt/rst_sysvipc.c
-@@ -0,0 +1,634 @@
+@@ -0,0 +1,639 @@
+/*
+ *
+ * kernel/cpt/rst_sysvipc.c
@@ -57468,8 +60995,11 @@
+ u.shmi.cpt_segsz, u.shmi.cpt_mode);
+ if (!IS_ERR(file)) {
+ err = fixup_shm(file, &u.shmi);
-+ if (err != -EEXIST && dpos < epos)
++ if (err != -EEXIST && dpos < epos) {
+ err = fixup_shm_data(file, dpos, epos, ctx);
++ if (err)
++ goto err_put;
++ }
+ } else if (IS_ERR(file) && PTR_ERR(file) == -EEXIST) {
+ struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
+ struct shmid_kernel *shp;
@@ -57482,6 +61012,8 @@
+ }
+ return file;
+
++err_put:
++ fput(file);
+err_out:
+ return ERR_PTR(err);
+}
@@ -58332,10 +61864,10 @@
+}
diff --git a/kernel/cpt/rst_ubc.c b/kernel/cpt/rst_ubc.c
new file mode 100644
-index 0000000..e7f717e
+index 0000000..db1f982
--- /dev/null
+++ b/kernel/cpt/rst_ubc.c
-@@ -0,0 +1,133 @@
+@@ -0,0 +1,144 @@
+/*
+ *
+ * kernel/cpt/rst_ubc.c
@@ -58396,7 +61928,7 @@
+{
+ struct user_beancounter *bc;
+ cpt_object_t *pobj;
-+ int i;
++ int resources, i;
+
+ if (v->cpt_parent != CPT_NULL) {
+ pobj = lookup_cpt_obj_bypos(CPT_OBJ_UBC, v->cpt_parent, ctx);
@@ -58417,7 +61949,15 @@
+ CPT_VERSION_MINOR(ctx->image_version) < 1)
+ goto out;
+
-+ for (i = 0; i < UB_RESOURCES; i++) {
++ if (v->cpt_content == CPT_CONTENT_ARRAY)
++ resources = v->cpt_ub_resources;
++ else
++ resources = UB_RESOURCES_COMPAT;
++
++ if (resources > UB_RESOURCES)
++ return -EINVAL;
++
++ for (i = 0; i < resources; i++) {
+ restore_one_bc_parm(v->cpt_parms + i * 2, bc->ub_parms + i, 0);
+ restore_one_bc_parm(v->cpt_parms + i * 2 + 1,
+ bc->ub_store + i, 1);
@@ -58454,9 +61994,12 @@
+ cpt_obj_setpos(obj, start, ctx);
+ intern_cpt_object(CPT_OBJ_UBC, obj, ctx);
+
-+ restore_one_bc(v, obj, ctx);
++ err = restore_one_bc(v, obj, ctx);
+
+ cpt_release_buf(ctx);
++ if (err)
++ return err;
++
+ start += v->cpt_next;
+ }
+ return 0;
@@ -58471,10 +62014,10 @@
+}
diff --git a/kernel/cpt/rst_undump.c b/kernel/cpt/rst_undump.c
new file mode 100644
-index 0000000..aadddcb
+index 0000000..68cc6c2
--- /dev/null
+++ b/kernel/cpt/rst_undump.c
-@@ -0,0 +1,1069 @@
+@@ -0,0 +1,1077 @@
+/*
+ *
+ * kernel/cpt/rst_undump.c
@@ -58589,6 +62132,8 @@
+ // // ve->start_cycles -= (s64)i->start_jiffies_delta * cycles_per_jiffy;
+
+ ctx->last_vpid = i->last_pid;
++ if (i->rnd_va_space)
++ ve->_randomize_va_space = i->rnd_va_space - 1;
+
+ err = 0;
+out_rel:
@@ -58626,7 +62171,7 @@
+ param.known_features = (ctx->image_version < CPT_VERSION_18) ?
+ VE_FEATURES_OLD : ~(__u64)0;
+
-+ err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK, 2,
++ err = real_env_create(ctx->ve_id, VE_CREATE|VE_LOCK|VE_EXCLUSIVE, 2,
+ ¶m, sizeof(param));
+ if (err < 0)
+ eprintk_ctx("real_env_create: %d\n", err);
@@ -58769,6 +62314,12 @@
+ goto out;
+ }
+
++ err = rst_files_std(ti, ctx);
++ if (err) {
++ eprintk_ctx("rst_root_stds: %d\n", err);
++ goto out;
++ }
++
+ err = rst_root_namespace(ctx);
+ if (err) {
+ eprintk_ctx("rst_namespace: %d\n", err);
@@ -59558,7 +63109,7 @@
(!cputime_eq(p->utime, cputime_zero) ||
!cputime_eq(p->stime, cputime_zero)))
diff --git a/kernel/exit.c b/kernel/exit.c
-index f7864ac..7773280 100644
+index f7864ac..38b3e22 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -22,6 +22,9 @@
@@ -59621,7 +63172,16 @@
call_rcu(&p->rcu, delayed_put_task_struct);
p = leader;
-@@ -526,6 +540,7 @@ void put_files_struct(struct files_struct *files)
+@@ -422,6 +436,8 @@ void daemonize(const char *name, ...)
+ va_list args;
+ sigset_t blocked;
+
++ (void)virtinfo_gencall(VIRTINFO_DOEXIT, NULL);
++
+ va_start(args, name);
+ vsnprintf(current->comm, sizeof(current->comm), name, args);
+ va_end(args);
+@@ -526,6 +542,7 @@ void put_files_struct(struct files_struct *files)
free_fdtable(fdt);
}
}
@@ -59629,7 +63189,7 @@
void reset_files_struct(struct files_struct *files)
{
-@@ -598,10 +613,10 @@ retry:
+@@ -598,10 +615,10 @@ retry:
* Search through everything else. We should not get
* here often
*/
@@ -59642,7 +63202,7 @@
read_unlock(&tasklist_lock);
/*
-@@ -640,7 +655,7 @@ assign_new_owner:
+@@ -640,7 +657,7 @@ assign_new_owner:
* Turn us into a lazy TLB process if we
* aren't already..
*/
@@ -59651,7 +63211,7 @@
{
struct mm_struct *mm = tsk->mm;
struct core_state *core_state;
-@@ -648,6 +663,10 @@ static void exit_mm(struct task_struct * tsk)
+@@ -648,6 +665,10 @@ static void exit_mm(struct task_struct * tsk)
mm_release(tsk, mm);
if (!mm)
return;
@@ -59662,7 +63222,7 @@
/*
* Serialize with any possible pending coredump.
* We must hold mmap_sem around checking core_state
-@@ -692,6 +711,7 @@ static void exit_mm(struct task_struct * tsk)
+@@ -692,6 +713,7 @@ static void exit_mm(struct task_struct * tsk)
mm_update_next_owner(mm);
mmput(mm);
}
@@ -59670,7 +63230,7 @@
/*
* When we die, we re-parent all our children.
-@@ -706,7 +726,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
+@@ -706,7 +728,7 @@ static struct task_struct *find_new_reaper(struct task_struct *father)
struct task_struct *thread;
thread = father;
@@ -59679,7 +63239,7 @@
if (thread->flags & PF_EXITING)
continue;
if (unlikely(pid_ns->child_reaper == father))
-@@ -839,11 +859,16 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
+@@ -839,11 +861,16 @@ static void exit_notify(struct task_struct *tsk, int group_dead)
tsk->self_exec_id != tsk->parent_exec_id))
tsk->exit_signal = SIGCHLD;
@@ -59696,7 +63256,7 @@
/* mt-exec, de_thread() is waiting for us */
if (thread_group_leader(tsk) &&
-@@ -900,6 +925,7 @@ NORET_TYPE void do_exit(long code)
+@@ -900,6 +927,7 @@ NORET_TYPE void do_exit(long code)
panic("Attempted to kill the idle task!");
tracehook_report_exit(&code);
@@ -59704,7 +63264,7 @@
validate_creds_for_do_exit(tsk);
-@@ -983,7 +1009,15 @@ NORET_TYPE void do_exit(long code)
+@@ -983,7 +1011,15 @@ NORET_TYPE void do_exit(long code)
*/
perf_event_exit_task(tsk);
@@ -59721,7 +63281,7 @@
#ifdef CONFIG_NUMA
mpol_put(tsk->mempolicy);
tsk->mempolicy = NULL;
-@@ -1626,7 +1660,7 @@ repeat:
+@@ -1626,7 +1662,7 @@ repeat:
if (wo->wo_flags & __WNOTHREAD)
break;
@@ -59730,7 +63290,7 @@
read_unlock(&tasklist_lock);
notask:
-@@ -1753,6 +1787,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
+@@ -1753,6 +1789,7 @@ SYSCALL_DEFINE4(wait4, pid_t, upid, int __user *, stat_addr,
asmlinkage_protect(4, ret, upid, stat_addr, options, ru);
return ret;
}
@@ -59740,10 +63300,10 @@
diff --git a/kernel/fairsched.c b/kernel/fairsched.c
new file mode 100644
-index 0000000..bfa5c33
+index 0000000..7cbd309
--- /dev/null
+++ b/kernel/fairsched.c
-@@ -0,0 +1,633 @@
+@@ -0,0 +1,683 @@
+/*
+ * Fair Scheduler
+ *
@@ -59861,7 +63421,7 @@
+{
+ int retval;
+
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ return -EPERM;
+
+ mutex_lock(&fairsched_mutex);
@@ -59902,7 +63462,7 @@
+{
+ int retval;
+
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ return -EPERM;
+
+ mutex_lock(&fairsched_mutex);
@@ -59936,7 +63496,7 @@
+{
+ int retval;
+
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ return -EPERM;
+
+ mutex_lock(&fairsched_mutex);
@@ -59964,7 +63524,7 @@
+{
+ int retval;
+
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ return -EPERM;
+
+ mutex_lock(&fairsched_mutex);
@@ -60015,7 +63575,7 @@
+{
+ int retval;
+
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ return -EPERM;
+
+ mutex_lock(&fairsched_mutex);
@@ -60063,7 +63623,7 @@
+{
+ int retval;
+
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ return -EPERM;
+
+ mutex_lock(&fairsched_mutex);
@@ -60074,6 +63634,56 @@
+}
+EXPORT_SYMBOL(sys_fairsched_mvpr);
+
++int fairsched_new_node(int id, unsigned int vcpus)
++{
++ int err;
++
++ mutex_lock(&fairsched_mutex);
++ /*
++ * We refuse to switch to an already existing node since nodes
++ * keep a pointer to their ve_struct...
++ */
++ err = do_fairsched_mknod(0, 1, id);
++ if (err < 0) {
++ printk(KERN_WARNING "Can't create fairsched node %d\n", id);
++ goto out;
++ }
++#if 0
++ err = do_fairsched_vcpus(id, vcpus);
++ if (err) {
++ printk(KERN_WARNING "Can't set sched vcpus on node %d\n", id);
++ goto cleanup;
++ }
++#endif
++ err = do_fairsched_mvpr(current->pid, id);
++ if (err) {
++ printk(KERN_WARNING "Can't switch to fairsched node %d\n", id);
++ goto cleanup;
++ }
++ mutex_unlock(&fairsched_mutex);
++ return 0;
++
++cleanup:
++ if (do_fairsched_rmnod(id))
++ printk(KERN_ERR "Can't clean fairsched node %d\n", id);
++out:
++ mutex_unlock(&fairsched_mutex);
++ return err;
++}
++EXPORT_SYMBOL(fairsched_new_node);
++
++void fairsched_drop_node(int id)
++{
++ mutex_lock(&fairsched_mutex);
++ if (task_fairsched_node_id(current) == id)
++ if (do_fairsched_mvpr(current->pid, FAIRSCHED_INIT_NODE_ID))
++ printk(KERN_WARNING "Can't leave sched node %d\n", id);
++ if (do_fairsched_rmnod(id))
++ printk(KERN_ERR "Can't remove fairsched node %d\n", id);
++ mutex_unlock(&fairsched_mutex);
++}
++EXPORT_SYMBOL(fairsched_drop_node);
++
+#ifdef CONFIG_PROC_FS
+
+/*********************************************************************/
@@ -61016,7 +64626,7 @@
/**
* kthread_stop - stop a thread created by kthread_create().
diff --git a/kernel/lockdep.c b/kernel/lockdep.c
-index 9af5672..99c3c9b 100644
+index f672d51..bc200db 100644
--- a/kernel/lockdep.c
+++ b/kernel/lockdep.c
@@ -3742,7 +3742,7 @@ retry:
@@ -61038,7 +64648,7 @@
printk("\n");
printk("=============================================\n\n");
diff --git a/kernel/module.c b/kernel/module.c
-index dfa33e8..48a2edc 100644
+index a4aae35..6d7a625 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -2915,6 +2915,8 @@ static char *module_flags(struct module *mod, char *buf)
@@ -61656,7 +65266,7 @@
rcu_read_unlock();
/* If we failed to send the signal the timer stops. */
diff --git a/kernel/power/process.c b/kernel/power/process.c
-index cc2e553..3122fcb 100644
+index e7cd671..732f532 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -15,6 +15,8 @@
@@ -61739,7 +65349,7 @@
continue;
@@ -142,8 +148,10 @@ static void thaw_tasks(bool nosig_only)
- if (cgroup_frozen(p))
+ if (cgroup_freezing_or_frozen(p))
continue;
- thaw_process(p);
@@ -61752,7 +65362,7 @@
}
diff --git a/kernel/printk.c b/kernel/printk.c
-index f38b07f..517bd6a 100644
+index f38b07f..1041e53 100644
--- a/kernel/printk.c
+++ b/kernel/printk.c
@@ -31,7 +31,9 @@
@@ -61973,20 +65583,21 @@
boot_delay_msec();
printk_delay();
-@@ -705,6 +754,12 @@ asmlinkage int vprintk(const char *fmt, va_list args)
+@@ -705,6 +754,13 @@ asmlinkage int vprintk(const char *fmt, va_list args)
spin_lock(&logbuf_lock);
printk_cpu = this_cpu;
+ err = ve_log_init();
+ if (err) {
-+ spin_unlock_irqrestore(&logbuf_lock, flags);
-+ return err;
++ spin_unlock(&logbuf_lock);
++ printed_len = err;
++ goto out_lockdep;
+ }
+
if (recursion_bug) {
recursion_bug = 0;
strcpy(printk_buf, recursion_bug_msg);
-@@ -788,7 +843,13 @@ asmlinkage int vprintk(const char *fmt, va_list args)
+@@ -788,19 +844,67 @@ asmlinkage int vprintk(const char *fmt, va_list args)
* will release 'logbuf_lock' regardless of whether it
* actually gets the semaphore or not.
*/
@@ -61994,14 +65605,24 @@
+ if (!ve_is_super(get_exec_env())) {
+ need_wake = (ve_log_start != ve_log_end);
+ printk_cpu = UINT_MAX;
-+ spin_unlock_irqrestore(&logbuf_lock, flags);
++ spin_unlock(&logbuf_lock);
++ lockdep_on();
++ raw_local_irq_restore(flags);
+ if (!oops_in_progress && need_wake)
+ wake_up_interruptible(&ve_log_wait);
++ goto out_preempt;
+ } else if (acquire_console_semaphore_for_printk(this_cpu))
release_console_sem();
++out_lockdep:
lockdep_on();
-@@ -801,6 +862,41 @@ out_restore_irqs:
+ out_restore_irqs:
+ raw_local_irq_restore(flags);
+
++out_preempt:
+ preempt_enable();
+ return printed_len;
+ }
EXPORT_SYMBOL(printk);
EXPORT_SYMBOL(vprintk);
@@ -62019,12 +65640,14 @@
+asmlinkage int ve_vprintk(int dst, const char *fmt, va_list args)
+{
+ int printed_len;
++ va_list args2;
+
+ printed_len = 0;
++ va_copy(args2, args);
+ if (ve_is_super(get_exec_env()) || (dst & VE0_LOG))
+ printed_len = vprintk(fmt, args);
+ if (!ve_is_super(get_exec_env()) && (dst & VE_LOG))
-+ printed_len = __vprintk(fmt, args);
++ printed_len = __vprintk(fmt, args2);
+ return printed_len;
+}
+
@@ -62043,7 +65666,7 @@
#else
static void call_console_drivers(unsigned start, unsigned end)
-@@ -1058,6 +1154,7 @@ void release_console_sem(void)
+@@ -1058,6 +1162,7 @@ void release_console_sem(void)
_con_start = con_start;
_log_end = log_end;
con_start = log_end; /* Flush */
@@ -62051,7 +65674,7 @@
spin_unlock(&logbuf_lock);
stop_critical_timings(); /* don't trace print latency */
call_console_drivers(_con_start, _log_end);
-@@ -1066,6 +1163,7 @@ void release_console_sem(void)
+@@ -1066,6 +1171,7 @@ void release_console_sem(void)
}
console_locked = 0;
up(&console_sem);
@@ -62059,7 +65682,7 @@
spin_unlock_irqrestore(&logbuf_lock, flags);
if (wake_klogd)
wake_up_klogd();
-@@ -1382,6 +1480,36 @@ int printk_ratelimit(void)
+@@ -1382,6 +1488,36 @@ int printk_ratelimit(void)
}
EXPORT_SYMBOL(printk_ratelimit);
@@ -62096,6 +65719,72 @@
/**
* printk_timed_ratelimit - caller-controlled printk ratelimiting
* @caller_jiffies: pointer to caller's state
+@@ -1405,3 +1541,65 @@ bool printk_timed_ratelimit(unsigned long *caller_jiffies,
+ }
+ EXPORT_SYMBOL(printk_timed_ratelimit);
+ #endif
++
++static cpumask_t nmi_show_regs_cpus = CPU_MASK_NONE;
++static unsigned long nmi_show_regs_timeout;
++
++void __attribute__((weak)) send_nmi_ipi_allbutself(void)
++{
++ cpus_clear(nmi_show_regs_cpus);
++}
++
++static void busted_show_regs(struct pt_regs *regs, int in_nmi)
++{
++ if (!regs || (in_nmi && spin_is_locked(&logbuf_lock)))
++ return;
++
++ bust_spinlocks(1);
++ printk("----------- IPI show regs -----------\n");
++ show_regs(regs);
++ bust_spinlocks(0);
++}
++
++void nmi_show_regs(struct pt_regs *regs, int in_nmi)
++{
++ if (cpus_empty(nmi_show_regs_cpus))
++ goto doit;
++
++ /* Previous request still in progress */
++ if (time_before(jiffies, nmi_show_regs_timeout))
++ return;
++
++ if (!in_nmi || !spin_is_locked(&logbuf_lock)) {
++ int cpu;
++
++ bust_spinlocks(1);
++ printk("previous show regs lost IPI to: ");
++ for_each_cpu_mask(cpu, nmi_show_regs_cpus)
++ printk("%d ", cpu);
++ printk("\n");
++ bust_spinlocks(0);
++ }
++
++doit:
++ nmi_show_regs_timeout = jiffies + HZ/10;
++ nmi_show_regs_cpus = cpu_online_map;
++ cpu_clear(raw_smp_processor_id(), nmi_show_regs_cpus);
++ busted_show_regs(regs, in_nmi);
++ send_nmi_ipi_allbutself();
++}
++
++/* call only from nmi handler */
++int do_nmi_show_regs(struct pt_regs *regs, int cpu)
++{
++ static DEFINE_SPINLOCK(nmi_show_regs_lock);
++
++ if (!cpu_isset(cpu, nmi_show_regs_cpus))
++ return 0;
++
++ spin_lock(&nmi_show_regs_lock);
++ busted_show_regs(regs, 1);
++ cpu_clear(cpu, nmi_show_regs_cpus);
++ spin_unlock(&nmi_show_regs_lock);
++ return 1;
++}
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 23bd09c..8967db7 100644
--- a/kernel/ptrace.c
@@ -62159,7 +65848,7 @@
child = find_task_by_vpid(pid);
if (child)
diff --git a/kernel/sched.c b/kernel/sched.c
-index ed61192..e66f256 100644
+index 34d924e..bf1165c 100644
--- a/kernel/sched.c
+++ b/kernel/sched.c
@@ -71,6 +71,8 @@
@@ -62190,7 +65879,7 @@
struct task_struct *curr, *idle;
unsigned long next_balance;
struct mm_struct *prev_mm;
-@@ -647,6 +654,11 @@ static inline int cpu_of(struct rq *rq)
+@@ -647,6 +654,12 @@ static inline int cpu_of(struct rq *rq)
#endif
}
@@ -62198,11 +65887,12 @@
+DEFINE_SPINLOCK(kstat_glb_lock);
+EXPORT_SYMBOL(kstat_glob);
+EXPORT_SYMBOL(kstat_glb_lock);
++static DEFINE_PER_CPU(struct kstat_lat_pcpu_snap_struct, glob_kstat_lat);
+
/*
* The domain tree (rq->sd) is protected by RCU's quiescent state transition.
* See detach_destroy_domains: synchronize_sched for details.
-@@ -998,6 +1010,220 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
+@@ -998,6 +1011,220 @@ static inline void task_rq_unlock(struct rq *rq, unsigned long *flags)
spin_unlock_irqrestore(&rq->lock, *flags);
}
@@ -62423,7 +66113,7 @@
/*
* this_rq_lock - lock this runqueue and disable interrupts.
*/
-@@ -1943,11 +2169,21 @@ static int effective_prio(struct task_struct *p)
+@@ -1943,11 +2170,21 @@ static int effective_prio(struct task_struct *p)
*/
static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
{
@@ -62446,7 +66136,7 @@
}
/*
-@@ -1955,11 +2191,31 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
+@@ -1955,11 +2192,31 @@ static void activate_task(struct rq *rq, struct task_struct *p, int wakeup)
*/
static void deactivate_task(struct rq *rq, struct task_struct *p, int sleep)
{
@@ -62479,7 +66169,7 @@
}
/**
-@@ -2278,6 +2534,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
+@@ -2276,6 +2533,7 @@ unsigned long wait_task_inactive(struct task_struct *p, long match_state)
return ncsw;
}
@@ -62487,7 +66177,7 @@
/***
* kick_process - kick a running thread to enter/exit the kernel
-@@ -2374,8 +2631,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
+@@ -2372,8 +2630,11 @@ static int try_to_wake_up(struct task_struct *p, unsigned int state,
*
* First fix up the nr_uninterruptible count:
*/
@@ -62500,7 +66190,7 @@
p->state = TASK_WAKING;
task_rq_unlock(rq, &flags);
-@@ -2609,6 +2869,10 @@ void sched_fork(struct task_struct *p, int clone_flags)
+@@ -2607,6 +2868,10 @@ void sched_fork(struct task_struct *p, int clone_flags)
/* Want to start with kernel preemption disabled. */
task_thread_info(p)->preempt_count = 1;
#endif
@@ -62511,7 +66201,7 @@
plist_node_init(&p->pushable_tasks, MAX_PRIO);
put_cpu();
-@@ -2639,6 +2903,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
+@@ -2637,6 +2902,8 @@ void wake_up_new_task(struct task_struct *p, unsigned long clone_flags)
*/
p->sched_class->task_new(rq, p);
inc_nr_running(rq);
@@ -62520,7 +66210,7 @@
}
trace_sched_wakeup_new(rq, p, 1);
check_preempt_curr(rq, p, WF_FORK);
-@@ -2841,6 +3107,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
+@@ -2839,6 +3106,7 @@ asmlinkage void schedule_tail(struct task_struct *prev)
if (current->set_child_tid)
put_user(task_pid_vnr(current), current->set_child_tid);
}
@@ -62528,7 +66218,7 @@
/*
* context_switch - switch to the new MM and the new
-@@ -2912,6 +3179,7 @@ unsigned long nr_running(void)
+@@ -2910,6 +3178,7 @@ unsigned long nr_running(void)
return sum;
}
@@ -62536,7 +66226,7 @@
unsigned long nr_uninterruptible(void)
{
-@@ -2929,6 +3197,7 @@ unsigned long nr_uninterruptible(void)
+@@ -2927,6 +3196,7 @@ unsigned long nr_uninterruptible(void)
return sum;
}
@@ -62544,7 +66234,7 @@
unsigned long long nr_context_switches(void)
{
-@@ -2964,6 +3233,72 @@ unsigned long this_cpu_load(void)
+@@ -2962,6 +3232,72 @@ unsigned long this_cpu_load(void)
}
@@ -62617,7 +66307,7 @@
/* Variables and functions for calc_load */
static atomic_long_t calc_load_tasks;
static unsigned long calc_load_update;
-@@ -2985,6 +3320,16 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
+@@ -2983,6 +3319,16 @@ void get_avenrun(unsigned long *loads, unsigned long offset, int shift)
loads[2] = (avenrun[2] + offset) << shift;
}
@@ -62634,7 +66324,7 @@
static unsigned long
calc_load(unsigned long load, unsigned long exp, unsigned long active)
{
-@@ -2993,6 +3338,35 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
+@@ -2991,6 +3337,35 @@ calc_load(unsigned long load, unsigned long exp, unsigned long active)
return load >> FSHIFT;
}
@@ -62670,7 +66360,7 @@
/*
* calc_load - update the avenrun load estimates 10 ticks after the
* CPUs have updated calc_load_tasks.
-@@ -3012,6 +3386,8 @@ void calc_global_load(void)
+@@ -3010,6 +3385,8 @@ void calc_global_load(void)
avenrun[1] = calc_load(avenrun[1], EXP_5, active);
avenrun[2] = calc_load(avenrun[2], EXP_15, active);
@@ -62679,7 +66369,7 @@
calc_load_update += LOAD_FREQ;
}
-@@ -3076,6 +3452,16 @@ static void update_cpu_load(struct rq *this_rq)
+@@ -3074,6 +3451,16 @@ static void update_cpu_load(struct rq *this_rq)
}
}
@@ -62696,7 +66386,7 @@
#ifdef CONFIG_SMP
/*
-@@ -3176,8 +3562,15 @@ void sched_exec(void)
+@@ -3174,8 +3561,15 @@ void sched_exec(void)
static void pull_task(struct rq *src_rq, struct task_struct *p,
struct rq *this_rq, int this_cpu)
{
@@ -62712,7 +66402,7 @@
activate_task(this_rq, p, 0);
check_preempt_curr(this_rq, p, 0);
}
-@@ -5054,10 +5447,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
+@@ -5052,10 +5446,13 @@ void account_user_time(struct task_struct *p, cputime_t cputime,
/* Add user time to cpustat. */
tmp = cputime_to_cputime64(cputime);
@@ -62728,7 +66418,7 @@
cpuacct_update_stats(p, CPUACCT_STAT_USER, cputime);
/* Account for user time used */
-@@ -5114,6 +5510,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
+@@ -5112,6 +5509,7 @@ void account_system_time(struct task_struct *p, int hardirq_offset,
/* Add system time to cpustat. */
tmp = cputime_to_cputime64(cputime);
@@ -62736,7 +66426,7 @@
if (hardirq_count() - hardirq_offset)
cpustat->irq = cputime64_add(cpustat->irq, tmp);
else if (softirq_count())
-@@ -5492,6 +5889,8 @@ need_resched_nonpreemptible:
+@@ -5490,6 +5888,8 @@ need_resched_nonpreemptible:
next = pick_next_task(rq);
if (likely(prev != next)) {
@@ -62745,7 +66435,7 @@
sched_info_switch(prev, next);
perf_event_task_sched_out(prev, next, cpu);
-@@ -5499,6 +5898,22 @@ need_resched_nonpreemptible:
+@@ -5497,6 +5897,22 @@ need_resched_nonpreemptible:
rq->curr = next;
++*switch_count;
@@ -62768,7 +66458,7 @@
context_switch(rq, prev, next); /* unlocks the rq */
/*
* the context switch might have flipped the stack from under
-@@ -5506,8 +5921,10 @@ need_resched_nonpreemptible:
+@@ -5504,8 +5920,10 @@ need_resched_nonpreemptible:
*/
cpu = smp_processor_id();
rq = cpu_rq(cpu);
@@ -62780,7 +66470,7 @@
post_schedule(rq);
-@@ -6291,7 +6708,7 @@ recheck:
+@@ -6289,7 +6707,7 @@ recheck:
/*
* Allow unprivileged RT tasks to decrease priority:
*/
@@ -62789,7 +66479,7 @@
if (rt_policy(policy)) {
unsigned long rlim_rtprio;
-@@ -6798,11 +7215,16 @@ EXPORT_SYMBOL(yield);
+@@ -6800,11 +7218,16 @@ EXPORT_SYMBOL(yield);
void __sched io_schedule(void)
{
struct rq *rq = raw_rq();
@@ -62806,7 +66496,7 @@
current->in_iowait = 0;
atomic_dec(&rq->nr_iowait);
delayacct_blkio_end();
-@@ -6813,11 +7235,16 @@ long __sched io_schedule_timeout(long timeout)
+@@ -6815,11 +7238,16 @@ long __sched io_schedule_timeout(long timeout)
{
struct rq *rq = raw_rq();
long ret;
@@ -62823,7 +66513,7 @@
current->in_iowait = 0;
atomic_dec(&rq->nr_iowait);
delayacct_blkio_end();
-@@ -6924,17 +7351,7 @@ void sched_show_task(struct task_struct *p)
+@@ -6926,17 +7354,7 @@ void sched_show_task(struct task_struct *p)
state = p->state ? __ffs(p->state) + 1 : 0;
printk(KERN_INFO "%-13.13s %c", p->comm,
state < sizeof(stat_nam) - 1 ? stat_nam[state] : '?');
@@ -62842,7 +66532,7 @@
#ifdef CONFIG_DEBUG_STACK_USAGE
free = stack_not_used(p);
#endif
-@@ -6951,13 +7368,13 @@ void show_state_filter(unsigned long state_filter)
+@@ -6953,13 +7371,13 @@ void show_state_filter(unsigned long state_filter)
#if BITS_PER_LONG == 32
printk(KERN_INFO
@@ -62859,7 +66549,7 @@
/*
* reset the NMI-timeout, listing all files on a slow
* console might take alot of time:
-@@ -6965,7 +7382,7 @@ void show_state_filter(unsigned long state_filter)
+@@ -6967,7 +7385,7 @@ void show_state_filter(unsigned long state_filter)
touch_nmi_watchdog();
if (!state_filter || (p->state & state_filter))
sched_show_task(p);
@@ -62868,7 +66558,7 @@
touch_all_softlockup_watchdogs();
-@@ -7331,13 +7748,13 @@ static void migrate_live_tasks(int src_cpu)
+@@ -7336,13 +7754,13 @@ static void migrate_live_tasks(int src_cpu)
read_lock(&tasklist_lock);
@@ -62884,7 +66574,15 @@
read_unlock(&tasklist_lock);
}
-@@ -9498,7 +9915,7 @@ void __init sched_init(void)
+@@ -9490,6 +9908,7 @@ void __init sched_init(void)
+ update_shares_data = __alloc_percpu(nr_cpu_ids * sizeof(unsigned long),
+ __alignof__(unsigned long));
+ #endif
++ kstat_glob.sched_lat.cur = &per_cpu__glob_kstat_lat;
+ for_each_possible_cpu(i) {
+ struct rq *rq;
+
+@@ -9503,7 +9922,7 @@ void __init sched_init(void)
#ifdef CONFIG_FAIR_GROUP_SCHED
init_task_group.shares = init_task_group_load;
INIT_LIST_HEAD(&rq->leaf_cfs_rq_list);
@@ -62893,7 +66591,7 @@
/*
* How much cpu bandwidth does init_task_group get?
*
-@@ -9544,7 +9961,7 @@ void __init sched_init(void)
+@@ -9549,7 +9968,7 @@ void __init sched_init(void)
rq->rt.rt_runtime = def_rt_bandwidth.rt_runtime;
#ifdef CONFIG_RT_GROUP_SCHED
INIT_LIST_HEAD(&rq->leaf_rt_rq_list);
@@ -62902,7 +66600,7 @@
init_tg_rt_entry(&init_task_group, &rq->rt, NULL, i, 1, NULL);
#elif defined CONFIG_USER_SCHED
init_tg_rt_entry(&root_task_group, &rq->rt, NULL, i, 0, NULL);
-@@ -9610,6 +10027,7 @@ void __init sched_init(void)
+@@ -9615,6 +10034,7 @@ void __init sched_init(void)
* During early bootup we pretend to be a normal task:
*/
current->sched_class = &fair_sched_class;
@@ -62910,7 +66608,7 @@
/* Allocate the nohz_cpu_mask if CONFIG_CPUMASK_OFFSTACK */
zalloc_cpumask_var(&nohz_cpu_mask, GFP_NOWAIT);
-@@ -9688,7 +10106,7 @@ void normalize_rt_tasks(void)
+@@ -9693,7 +10113,7 @@ void normalize_rt_tasks(void)
struct rq *rq;
read_lock_irqsave(&tasklist_lock, flags);
@@ -62919,7 +66617,7 @@
/*
* Only normalize user tasks:
*/
-@@ -9719,7 +10137,7 @@ void normalize_rt_tasks(void)
+@@ -9724,7 +10144,7 @@ void normalize_rt_tasks(void)
__task_rq_unlock(rq);
spin_unlock(&p->pi_lock);
@@ -62928,7 +66626,7 @@
read_unlock_irqrestore(&tasklist_lock, flags);
}
-@@ -10165,10 +10583,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
+@@ -10170,10 +10590,10 @@ static inline int tg_has_rt_tasks(struct task_group *tg)
{
struct task_struct *g, *p;
@@ -63457,17 +67155,19 @@
if (len < 0 || len > __NEW_UTS_LEN)
return -EINVAL;
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
-index e06d0b8..da15284 100644
+index e06d0b8..7216e06 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
-@@ -179,3 +179,15 @@ cond_syscall(sys_eventfd2);
+@@ -179,3 +179,17 @@ cond_syscall(sys_eventfd2);
/* performance counters: */
cond_syscall(sys_perf_event_open);
+cond_syscall(sys_getluid);
+cond_syscall(sys_setluid);
+cond_syscall(sys_setublimit);
++cond_syscall(compat_sys_setublimit);
+cond_syscall(sys_ubstat);
++cond_syscall(compat_sys_lutime);
+
+/* fairsched compat */
+cond_syscall(sys_fairsched_mknod);
@@ -63477,10 +67177,18 @@
+cond_syscall(sys_fairsched_chwt);
+cond_syscall(sys_fairsched_rate);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
-index b8bd058..d2d9eec 100644
+index b8bd058..5ef2188 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
-@@ -83,6 +83,21 @@ extern int pid_max_min, pid_max_max;
+@@ -50,6 +50,7 @@
+ #include <linux/ftrace.h>
+ #include <linux/slow-work.h>
+ #include <linux/perf_event.h>
++#include <linux/ve_task.h>
+
+ #include <asm/uaccess.h>
+ #include <asm/processor.h>
+@@ -83,6 +84,21 @@ extern int pid_max_min, pid_max_max;
extern int sysctl_drop_caches;
extern int percpu_pagelist_fraction;
extern int compat_log;
@@ -63502,7 +67210,17 @@
extern int latencytop_enabled;
extern int sysctl_nr_open_min, sysctl_nr_open_max;
#ifndef CONFIG_MMU
-@@ -178,9 +193,31 @@ static struct ctl_table_header root_table_header = {
+@@ -169,6 +185,9 @@ static int proc_taint(struct ctl_table *table, int write,
+ void __user *buffer, size_t *lenp, loff_t *ppos);
+ #endif
+
++static int proc_dointvec_ve(struct ctl_table *table, int write,
++ void __user *buffer, size_t *lenp, loff_t *ppos);
++
+ static struct ctl_table root_table[];
+ static struct ctl_table_root sysctl_table_root;
+ static struct ctl_table_header root_table_header = {
+@@ -178,9 +197,31 @@ static struct ctl_table_header root_table_header = {
.root = &sysctl_table_root,
.set = &sysctl_table_root.default_set,
};
@@ -63535,7 +67253,7 @@
};
static struct ctl_table kern_table[];
-@@ -504,6 +541,20 @@ static struct ctl_table kern_table[] = {
+@@ -504,6 +545,20 @@ static struct ctl_table kern_table[] = {
.proc_handler = &proc_dointvec,
},
#endif
@@ -63556,7 +67274,7 @@
#ifdef __hppa__
{
.ctl_name = KERN_HPPA_PWRSW,
-@@ -699,6 +750,24 @@ static struct ctl_table kern_table[] = {
+@@ -699,6 +754,24 @@ static struct ctl_table kern_table[] = {
.extra1 = &pid_max_min,
.extra2 = &pid_max_max,
},
@@ -63581,7 +67299,22 @@
{
.ctl_name = KERN_PANIC_ON_OOPS,
.procname = "panic_on_oops",
-@@ -1424,6 +1493,21 @@ static struct ctl_table vm_table[] = {
+@@ -824,10 +897,12 @@ static struct ctl_table kern_table[] = {
+ {
+ .ctl_name = KERN_RANDOMIZE,
+ .procname = "randomize_va_space",
+- .data = &randomize_va_space,
++ .data = &_randomize_va_space,
++ .extra1 = (void *)offsetof(struct ve_struct,
++ _randomize_va_space),
+ .maxlen = sizeof(int),
+ .mode = 0644,
+- .proc_handler = &proc_dointvec,
++ .proc_handler = &proc_dointvec_ve,
+ },
+ #endif
+ #if defined(CONFIG_S390) && defined(CONFIG_SMP)
+@@ -1424,6 +1499,21 @@ static struct ctl_table vm_table[] = {
.extra2 = &one,
},
#endif
@@ -63603,7 +67336,7 @@
/*
* NOTE: do not add new entries to this table unless you have read
-@@ -1600,6 +1684,13 @@ static struct ctl_table fs_table[] = {
+@@ -1600,6 +1690,13 @@ static struct ctl_table fs_table[] = {
};
static struct ctl_table debug_table[] = {
@@ -63617,7 +67350,7 @@
#if defined(CONFIG_X86) || defined(CONFIG_PPC)
{
.ctl_name = CTL_UNNUMBERED,
-@@ -2150,10 +2241,27 @@ struct ctl_table_header *__register_sysctl_paths(
+@@ -2150,10 +2247,27 @@ struct ctl_table_header *__register_sysctl_paths(
struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
struct ctl_table *table)
{
@@ -63645,7 +67378,7 @@
/**
* register_sysctl_table - register a sysctl table hierarchy
* @table: the top-level table structure
-@@ -2170,6 +2278,14 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
+@@ -2170,6 +2284,14 @@ struct ctl_table_header *register_sysctl_table(struct ctl_table *table)
return register_sysctl_paths(null_path, table);
}
@@ -63660,7 +67393,7 @@
/**
* unregister_sysctl_table - unregister a sysctl table hierarchy
* @header: the header returned from register_sysctl_table
-@@ -2231,6 +2347,18 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
+@@ -2231,6 +2353,18 @@ struct ctl_table_header *register_sysctl_paths(const struct ctl_path *path,
return NULL;
}
@@ -63679,7 +67412,33 @@
void unregister_sysctl_table(struct ctl_table_header * table)
{
}
-@@ -3236,6 +3364,56 @@ static int deprecated_sysctl_warning(struct __sysctl_args *args)
+@@ -2902,6 +3036,25 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
+ return 0;
+ }
+
++#ifdef CONFIG_VE
++static int proc_dointvec_ve(struct ctl_table *table, int write,
++ void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++ struct ctl_table tmp_table;
++
++ tmp_table = *table;
++ tmp_table.data = (char *)get_exec_env() + (unsigned long)table->extra1;
++
++ return proc_dointvec(&tmp_table, write, buffer, lenp, ppos);
++}
++#else
++static int proc_dointvec_ve(struct ctl_table *table, int write,
++ void __user *buffer, size_t *lenp, loff_t *ppos)
++{
++ return proc_dointvec(table, write, buffer, lenp, ppos);
++}
++#endif /* CONFIG_VE */
++
+ #else /* CONFIG_PROC_FS */
+
+ int proc_dostring(struct ctl_table *table, int write,
+@@ -3236,6 +3389,56 @@ static int deprecated_sysctl_warning(struct __sysctl_args *args)
return 0;
}
@@ -63736,7 +67495,7 @@
/*
* No sense putting this after each symbol definition, twice,
* exception granted :-)
-@@ -3249,7 +3427,9 @@ EXPORT_SYMBOL(proc_dostring);
+@@ -3249,7 +3452,9 @@ EXPORT_SYMBOL(proc_dostring);
EXPORT_SYMBOL(proc_doulongvec_minmax);
EXPORT_SYMBOL(proc_doulongvec_ms_jiffies_minmax);
EXPORT_SYMBOL(register_sysctl_table);
@@ -64165,10 +67924,10 @@
+
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
new file mode 100644
-index 0000000..8b59ff7
+index 0000000..85c42c3
--- /dev/null
+++ b/kernel/ve/ve.c
-@@ -0,0 +1,119 @@
+@@ -0,0 +1,129 @@
+/*
+ * linux/kernel/ve/ve.c
+ *
@@ -64252,6 +68011,12 @@
+#endif
+ .features = VE_FEATURE_SIT | VE_FEATURE_IPIP |
+ VE_FEATURE_PPP,
++ ._randomize_va_space =
++#ifdef CONFIG_COMPAT_BRK
++ 1,
++#else
++ 2,
++#endif
+};
+
+EXPORT_SYMBOL(ve0);
@@ -64269,12 +68034,16 @@
+EXPORT_SYMBOL(ve_cleanup_list);
+EXPORT_SYMBOL(ve_cleanup_thread);
+
++static DEFINE_PER_CPU(struct ve_cpu_stats, ve0_cpustats);
++static DEFINE_PER_CPU(struct kstat_lat_pcpu_snap_struct, ve0_lat_stats);
++
+void init_ve0(void)
+{
+ struct ve_struct *ve;
+
+ ve = get_ve0();
-+ ve->cpu_stats = NULL;
++ ve->cpu_stats = &per_cpu__ve0_cpustats;
++ ve->sched_lat_ve.cur = &per_cpu__ve0_lat_stats;
+ list_add(&ve->ve_list, &ve_list_head);
+}
+
@@ -64290,10 +68059,10 @@
+}
diff --git a/kernel/ve/vecalls.c b/kernel/ve/vecalls.c
new file mode 100644
-index 0000000..29b455d
+index 0000000..cc27878
--- /dev/null
+++ b/kernel/ve/vecalls.c
-@@ -0,0 +1,2264 @@
+@@ -0,0 +1,2335 @@
+/*
+ * linux/kernel/ve/vecalls.c
+ *
@@ -64353,6 +68122,7 @@
+#include <linux/tty.h>
+#include <linux/mount.h>
+#include <linux/kthread.h>
++#include <linux/oom.h>
+
+#include <net/route.h>
+#include <net/ip_fib.h>
@@ -64481,7 +68251,7 @@
+ struct ve_struct *ve;
+ int err;
+
-+ if (!capable(CAP_SETVEID) || veid == 0)
++ if (!capable_setveid() || veid == 0)
+ return -EPERM;
+
+ if ((ve = get_ve_by_id(veid)) == NULL)
@@ -64863,44 +68633,18 @@
+
+static int init_ve_sched(struct ve_struct *ve)
+{
-+#ifdef CONFIG_VZ_FAIRSCHED
+ int err;
+
-+ /*
-+ * We refuse to switch to an already existing node since nodes
-+ * keep a pointer to their ve_struct...
-+ */
-+ err = sys_fairsched_mknod(0, 1, ve->veid);
-+ if (err < 0) {
-+ printk(KERN_WARNING "Can't create fairsched node %d\n",
-+ ve->veid);
-+ return err;
-+ }
-+ err = sys_fairsched_mvpr(current->pid, ve->veid);
-+ if (err) {
-+ printk(KERN_WARNING "Can't switch to fairsched node %d\n",
-+ ve->veid);
-+ if (sys_fairsched_rmnod(ve->veid))
-+ printk(KERN_ERR "Can't clean fairsched node %d\n",
-+ ve->veid);
-+ return err;
-+ }
-+#endif
-+ ve_sched_attach(ve);
-+ return 0;
++ err = fairsched_new_node(ve->veid, 0);
++ if (err == 0)
++ ve_sched_attach(ve);
++
++ return err;
+}
+
+static void fini_ve_sched(struct ve_struct *ve)
+{
-+#ifdef CONFIG_VZ_FAIRSCHED
-+ if (task_fairsched_node_id(current) == ve->veid)
-+ if (sys_fairsched_mvpr(current->pid, FAIRSCHED_INIT_NODE_ID))
-+ printk(KERN_WARNING "Can't leave fairsched node %d\n",
-+ ve->veid);
-+ if (sys_fairsched_rmnod(ve->veid))
-+ printk(KERN_ERR "Can't remove fairsched node %d\n",
-+ ve->veid);
-+#endif
++ fairsched_drop_node(ve->veid);
+}
+
+/*
@@ -65023,6 +68767,8 @@
+ ve->start_jiffies = get_jiffies_64();
+ ve->start_cycles = get_cycles();
+
++ ve->_randomize_va_space = ve0._randomize_va_space;
++
+ return 0;
+}
+
@@ -65077,7 +68823,6 @@
+{
+ /* required for real_setdevperms from register_ve_<fs> above */
+ memcpy(&ve->ve_cap_bset, &tsk->cred->cap_effective, sizeof(kernel_cap_t));
-+ cap_lower(ve->ve_cap_bset, CAP_SETVEID);
+}
+
+static int ve_list_add(struct ve_struct *ve)
@@ -65135,6 +68880,10 @@
+ /* setup capabilities before enter */
+ set_task_ve_caps(new, new_creds);
+
++ /* Drop OOM protection. */
++ if (tsk->signal->oom_adj == OOM_DISABLE)
++ tsk->signal->oom_adj = 0;
++
+ old = tsk->ve_task_info.owner_env;
+ tsk->ve_task_info.owner_env = new;
+ tsk->ve_task_info.exec_env = new;
@@ -65193,13 +68942,24 @@
+static inline int init_ve_cpustats(struct ve_struct *ve)
+{
+ ve->cpu_stats = alloc_percpu(struct ve_cpu_stats);
-+ return ve->cpu_stats == NULL ? -ENOMEM : 0;
++ if (ve->cpu_stats == NULL)
++ return -ENOMEM;
++ ve->sched_lat_ve.cur = alloc_percpu(struct kstat_lat_pcpu_snap_struct);
++ if (ve == NULL)
++ goto fail;
++ return 0;
++
++fail:
++ free_percpu(ve->cpu_stats);
++ return -ENOMEM;
+}
+
+static inline void free_ve_cpustats(struct ve_struct *ve)
+{
+ free_percpu(ve->cpu_stats);
+ ve->cpu_stats = NULL;
++ free_percpu(ve->sched_lat_ve.cur);
++ ve->sched_lat_ve.cur = NULL;
+}
+
+static int alone_in_pgrp(struct task_struct *tsk)
@@ -65469,7 +69229,7 @@
+ }
+
+ status = -EPERM;
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ goto out;
+
+ status = -EINVAL;
@@ -65814,6 +69574,8 @@
+#ifdef CONFIG_UNIX98_PTYS
+ free_ve_tty_driver(ve->ptm_driver);
+ free_ve_tty_driver(ve->pts_driver);
++ if (ve->allocated_ptys)
++ ida_destroy(ve->allocated_ptys);
+ kfree(ve->allocated_ptys);
+ ve->ptm_driver = ve->pts_driver = NULL;
+ ve->allocated_ptys = NULL;
@@ -65998,7 +69760,7 @@
+
+int real_ve_dev_map(envid_t veid, int op, char *dev_name)
+{
-+ if (!capable(CAP_SETVEID))
++ if (!capable_setveid())
+ return -EPERM;
+ switch (op) {
+ case VE_NETDEV_ADD:
@@ -66182,6 +69944,20 @@
+ ub->ub_parms[UB_PRIVVMPAGES].held ;
+}
+
++static void ve_swapinfo(struct sysinfo *val, struct user_beancounter *ub)
++{
++ unsigned long size, used;
++
++ size = ub->ub_parms[UB_SWAPPAGES].limit;
++ used = ub->ub_parms[UB_SWAPPAGES].held;
++
++ if (size == UB_MAXVALUE)
++ size = 0;
++
++ val->totalswap = size;
++ val->freeswap = size > used ? size - used : 0;
++}
++
+static inline int ve_mi_replace(struct meminfo *mi, int old_ret)
+{
+#ifdef CONFIG_BEANCOUNTERS
@@ -66198,7 +69974,7 @@
+ return NOTIFY_DONE | NOTIFY_STOP_MASK; /* No virtualization */
+
+ nodettram = mi->si.totalram;
-+ ub = current->mm->mm_ub;
++ ub = top_beancounter(current->mm->mm_ub);
+ usedmem = ve_used_mem(ub);
+
+ memset(mi, 0, sizeof(*mi));
@@ -66208,6 +69984,8 @@
+ mi->si.freeram = (mi->si.totalram > usedmem) ?
+ (mi->si.totalram - usedmem) : 0;
+
++ ve_swapinfo(&mi->si, ub);
++
+ return NOTIFY_OK | NOTIFY_STOP_MASK;
+#else
+ return NOTIFY_DONE;
@@ -66228,6 +70006,62 @@
+ .notifier_call = meminfo_call
+};
+
++/* /proc/vz/veinfo */
++
++static ve_seq_print_t veaddr_seq_print_cb;
++
++void vzmon_register_veaddr_print_cb(ve_seq_print_t cb)
++{
++ rcu_assign_pointer(veaddr_seq_print_cb, cb);
++}
++EXPORT_SYMBOL(vzmon_register_veaddr_print_cb);
++
++void vzmon_unregister_veaddr_print_cb(ve_seq_print_t cb)
++{
++ rcu_assign_pointer(veaddr_seq_print_cb, NULL);
++ synchronize_rcu();
++}
++EXPORT_SYMBOL(vzmon_unregister_veaddr_print_cb);
++
++static int veinfo_seq_show(struct seq_file *m, void *v)
++{
++ struct ve_struct *ve;
++ ve_seq_print_t veaddr_seq_print;
++
++ ve = list_entry((struct list_head *)v, struct ve_struct, ve_list);
++
++ seq_printf(m, "%10u %5u %5u", ve->veid,
++ ve->class_id, atomic_read(&ve->pcounter));
++
++ rcu_read_lock();
++ veaddr_seq_print = rcu_dereference(veaddr_seq_print_cb);
++ if (veaddr_seq_print)
++ veaddr_seq_print(m, ve);
++ rcu_read_unlock();
++
++ seq_putc(m, '\n');
++ return 0;
++}
++
++static struct seq_operations veinfo_seq_op = {
++ .start = ve_seq_start,
++ .next = ve_seq_next,
++ .stop = ve_seq_stop,
++ .show = veinfo_seq_show,
++};
++
++static int veinfo_open(struct inode *inode, struct file *file)
++{
++ return seq_open(file, &veinfo_seq_op);
++}
++
++static struct file_operations proc_veinfo_operations = {
++ .open = veinfo_open,
++ .read = seq_read,
++ .llseek = seq_lseek,
++ .release = seq_release,
++};
++
+static int __init init_vecalls_proc(void)
+{
+ struct proc_dir_entry *de;
@@ -66247,6 +70081,11 @@
+ if (!de)
+ printk(KERN_WARNING "VZMON: can't make version proc entry\n");
+
++ de = proc_create("veinfo", S_IFREG | S_IRUSR, proc_vz_dir,
++ &proc_veinfo_operations);
++ if (!de)
++ printk(KERN_WARNING "VZMON: can't make veinfo proc entry\n");
++
+ virtinfo_notifier_register(VITYPE_GENERAL, &meminfo_notifier_block);
+ return 0;
+}
@@ -66256,6 +70095,7 @@
+ remove_proc_entry("version", proc_vz_dir);
+ remove_proc_entry("devperms", proc_vz_dir);
+ remove_proc_entry("vestat", proc_vz_dir);
++ remove_proc_entry("veinfo", proc_vz_dir);
+ virtinfo_notifier_unregister(VITYPE_GENERAL, &meminfo_notifier_block);
+}
+#else
@@ -66560,10 +70400,10 @@
+module_exit(vecalls_exit)
diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
new file mode 100644
-index 0000000..3889411
+index 0000000..50f4d9a
--- /dev/null
+++ b/kernel/ve/veowner.c
-@@ -0,0 +1,150 @@
+@@ -0,0 +1,160 @@
+/*
+ * kernel/ve/veowner.c
+ *
@@ -66590,6 +70430,7 @@
+#include <linux/list.h>
+#include <linux/inetdevice.h>
+#include <linux/pid_namespace.h>
++#include <linux/xattr.h>
+#include <asm/system.h>
+#include <asm/io.h>
+
@@ -66641,6 +70482,7 @@
+ * OpenVZ sysctl
+ * ------------------------------------------------------------------------
+ */
++int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT;
+extern int ve_area_access_check;
+
+#ifdef CONFIG_INET
@@ -66671,6 +70513,14 @@
+ .mode = 0644,
+ .proc_handler = proc_dointvec,
+ },
++ {
++ .ctl_name = 228,
++ .procname = "ve-xattr-policy",
++ .data = &ve_xattr_policy,
++ .maxlen = sizeof(int),
++ .mode = 0644,
++ .proc_handler = &proc_dointvec,
++ },
+ { 0 }
+};
+
@@ -67550,7 +71400,7 @@
if (!task_early_kill(tsk))
diff --git a/mm/memory.c b/mm/memory.c
-index 4e59455..fcdb9fb 100644
+index 4e59455..220dc95 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -42,6 +42,9 @@
@@ -67575,6 +71425,15 @@
#include <asm/io.h>
#include <asm/pgalloc.h>
#include <asm/uaccess.h>
+@@ -94,7 +102,7 @@ EXPORT_SYMBOL(high_memory);
+ * ( When CONFIG_COMPAT_BRK=y we exclude brk from randomization,
+ * as ancient (libc5 based) binaries can segfault. )
+ */
+-int randomize_va_space __read_mostly =
++int _randomize_va_space __read_mostly =
+ #ifdef CONFIG_COMPAT_BRK
+ 1;
+ #else
@@ -132,18 +140,21 @@ void pgd_clear_bad(pgd_t *pgd)
pgd_ERROR(*pgd);
pgd_clear(pgd);
@@ -68216,7 +72075,7 @@
static int do_mlockall(int flags)
{
diff --git a/mm/mmap.c b/mm/mmap.c
-index ae19746..a5dd0bf 100644
+index ae19746..991a1ac 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -29,6 +29,7 @@
@@ -68279,6 +72138,15 @@
goto out;
set_brk:
mm->brk = brk;
+@@ -927,7 +946,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr,
+ prot |= PROT_EXEC;
+
+ if (!len)
+- return -EINVAL;
++ return strncmp(current->comm, "rpm", 3) ? -EINVAL : addr;
+
+ if (!(flags & MAP_FIXED))
+ addr = round_hint_to_min(addr);
@@ -1106,6 +1125,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr,
struct rb_node **rb_link, *rb_parent;
unsigned long charged = 0;
@@ -69179,7 +73047,7 @@
dec_mm_counter(mm, file_rss);
(*mapcount)--;
diff --git a/mm/shmem.c b/mm/shmem.c
-index 356dd99..141b181 100644
+index 356dd99..bc74e50 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -31,7 +31,11 @@
@@ -69203,7 +73071,41 @@
#include <asm/uaccess.h>
#include <asm/div64.h>
#include <asm/pgtable.h>
-@@ -214,7 +220,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
+@@ -107,14 +113,31 @@ enum sgp_type {
+ };
+
+ #ifdef CONFIG_TMPFS
++
++#include <linux/virtinfo.h>
++
++static unsigned long tmpfs_ram_pages(void)
++{
++ struct meminfo mi;
++
++ if (ve_is_super(get_exec_env()))
++ return totalram_pages;
++
++ memset(&mi, 0, sizeof(mi));
++ si_meminfo(&mi.si);
++ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi) & NOTIFY_FAIL)
++ return 0;
++ return mi.si.totalram;
++}
++
+ static unsigned long shmem_default_max_blocks(void)
+ {
+- return totalram_pages / 2;
++ return tmpfs_ram_pages() / 2;
+ }
+
+ static unsigned long shmem_default_max_inodes(void)
+ {
+- return min(totalram_pages - totalhigh_pages, totalram_pages / 2);
++ return min(totalram_pages - totalhigh_pages, tmpfs_ram_pages() / 2);
+ }
+ #endif
+
+@@ -214,7 +237,7 @@ static inline void shmem_unacct_blocks(unsigned long flags, long pages)
static const struct super_operations shmem_ops;
static const struct address_space_operations shmem_aops;
@@ -69212,7 +73114,7 @@
static const struct inode_operations shmem_inode_operations;
static const struct inode_operations shmem_dir_inode_operations;
static const struct inode_operations shmem_special_inode_operations;
-@@ -277,7 +283,7 @@ static void shmem_free_inode(struct super_block *sb)
+@@ -277,7 +300,7 @@ static void shmem_free_inode(struct super_block *sb)
*
* It has to be called with the spinlock held.
*/
@@ -69221,7 +73123,7 @@
{
struct shmem_inode_info *info = SHMEM_I(inode);
long freed;
-@@ -287,6 +293,8 @@ static void shmem_recalc_inode(struct inode *inode)
+@@ -287,6 +310,8 @@ static void shmem_recalc_inode(struct inode *inode)
info->alloced -= freed;
shmem_unacct_blocks(info->flags, freed);
shmem_free_blocks(inode, freed);
@@ -69230,7 +73132,7 @@
}
}
-@@ -391,6 +399,11 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
+@@ -391,6 +416,11 @@ static void shmem_swp_set(struct shmem_inode_info *info, swp_entry_t *entry, uns
struct page *page = kmap_atomic_to_page(entry);
set_page_private(page, page_private(page) + incdec);
}
@@ -69242,7 +73144,7 @@
}
/**
-@@ -407,14 +420,24 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
+@@ -407,14 +437,24 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
struct page *page = NULL;
swp_entry_t *entry;
@@ -69269,7 +73171,7 @@
/*
* Test free_blocks against 1 not 0, since we have 1 data
* page (and perhaps indirect index pages) yet to allocate:
-@@ -424,7 +447,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
+@@ -424,7 +464,8 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
spin_lock(&sbinfo->stat_lock);
if (sbinfo->free_blocks <= 1) {
spin_unlock(&sbinfo->stat_lock);
@@ -69279,7 +73181,7 @@
}
sbinfo->free_blocks--;
inode->i_blocks += BLOCKS_PER_PAGE;
-@@ -432,31 +456,43 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
+@@ -432,31 +473,43 @@ static swp_entry_t *shmem_swp_alloc(struct shmem_inode_info *info, unsigned long
}
spin_unlock(&info->lock);
@@ -69329,7 +73231,7 @@
}
/**
-@@ -564,6 +600,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
+@@ -564,6 +617,7 @@ static void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end)
return;
spin_lock(&info->lock);
@@ -69337,7 +73239,7 @@
info->flags |= SHMEM_TRUNCATE;
if (likely(end == (loff_t) -1)) {
limit = info->next_index;
-@@ -750,7 +787,7 @@ done2:
+@@ -750,7 +804,7 @@ done2:
info->swapped -= nr_swaps_freed;
if (nr_pages_to_free)
shmem_free_blocks(inode, nr_pages_to_free);
@@ -69346,7 +73248,7 @@
spin_unlock(&info->lock);
/*
-@@ -833,6 +870,7 @@ static void shmem_delete_inode(struct inode *inode)
+@@ -833,6 +887,7 @@ static void shmem_delete_inode(struct inode *inode)
}
}
BUG_ON(inode->i_blocks);
@@ -69354,7 +73256,7 @@
shmem_free_inode(inode->i_sb);
clear_inode(inode);
}
-@@ -1020,6 +1058,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
+@@ -1020,6 +1075,12 @@ int shmem_unuse(swp_entry_t entry, struct page *page)
out: return found; /* 0 or 1 or -ENOMEM */
}
@@ -69367,7 +73269,7 @@
/*
* Move the page from the page cache to the swap cache.
*/
-@@ -1051,7 +1095,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+@@ -1051,7 +1112,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
* discarded.
*/
if (wbc->for_reclaim)
@@ -69376,7 +73278,7 @@
else
swap.val = 0;
-@@ -1069,7 +1113,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
+@@ -1069,7 +1130,7 @@ static int shmem_writepage(struct page *page, struct writeback_control *wbc)
free_swap_and_cache(*entry);
shmem_swp_set(info, entry, 0);
}
@@ -69385,7 +73287,7 @@
if (swap.val && add_to_swap_cache(page, swap, GFP_ATOMIC) == 0) {
remove_from_page_cache(page);
-@@ -1252,7 +1296,7 @@ repeat:
+@@ -1252,7 +1313,7 @@ repeat:
}
spin_lock(&info->lock);
@@ -69394,7 +73296,7 @@
entry = shmem_swp_alloc(info, idx, sgp);
if (IS_ERR(entry)) {
spin_unlock(&info->lock);
-@@ -1455,6 +1499,7 @@ repeat:
+@@ -1455,6 +1516,7 @@ repeat:
clear_highpage(filepage);
flush_dcache_page(filepage);
SetPageUptodate(filepage);
@@ -69402,7 +73304,7 @@
if (sgp == SGP_DIRTY)
set_page_dirty(filepage);
}
-@@ -1512,20 +1557,27 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
+@@ -1512,20 +1574,27 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
spin_lock(&info->lock);
if (lock && !(info->flags & VM_LOCKED)) {
@@ -69431,7 +73333,7 @@
spin_unlock(&info->lock);
return retval;
}
-@@ -1559,6 +1611,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
+@@ -1559,6 +1628,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, int mode,
inode->i_generation = get_seconds();
info = SHMEM_I(inode);
memset(info, 0, (char *)inode - (char *)info);
@@ -69439,7 +73341,16 @@
spin_lock_init(&info->lock);
info->flags = flags & VM_NORESERVE;
INIT_LIST_HEAD(&info->swaplist);
-@@ -2424,7 +2477,7 @@ static const struct address_space_operations shmem_aops = {
+@@ -2182,7 +2252,7 @@ static int shmem_parse_options(char *options, struct shmem_sb_info *sbinfo,
+ size = memparse(value,&rest);
+ if (*rest == '%') {
+ size <<= PAGE_SHIFT;
+- size *= totalram_pages;
++ size *= tmpfs_ram_pages();
+ do_div(size, 100);
+ rest++;
+ }
+@@ -2424,7 +2494,7 @@ static const struct address_space_operations shmem_aops = {
.error_remove_page = generic_error_remove_page,
};
@@ -69448,7 +73359,7 @@
.mmap = shmem_mmap,
#ifdef CONFIG_TMPFS
.llseek = generic_file_llseek,
-@@ -2437,6 +2490,7 @@ static const struct file_operations shmem_file_operations = {
+@@ -2437,6 +2507,7 @@ static const struct file_operations shmem_file_operations = {
.splice_write = generic_file_splice_write,
#endif
};
@@ -69456,7 +73367,7 @@
static const struct inode_operations shmem_inode_operations = {
.truncate = shmem_truncate,
-@@ -2506,6 +2560,10 @@ static const struct vm_operations_struct shmem_vm_ops = {
+@@ -2506,6 +2577,10 @@ static const struct vm_operations_struct shmem_vm_ops = {
#endif
};
@@ -69467,7 +73378,7 @@
static int shmem_get_sb(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data, struct vfsmount *mnt)
-@@ -2513,12 +2571,13 @@ static int shmem_get_sb(struct file_system_type *fs_type,
+@@ -2513,12 +2588,13 @@ static int shmem_get_sb(struct file_system_type *fs_type,
return get_sb_nodev(fs_type, flags, data, shmem_fill_super, mnt);
}
@@ -69482,7 +73393,7 @@
int __init init_tmpfs(void)
{
-@@ -2608,6 +2667,36 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
+@@ -2608,6 +2684,36 @@ int shmem_lock(struct file *file, int lock, struct user_struct *user)
/* common code */
@@ -69519,7 +73430,7 @@
/**
* shmem_file_setup - get an unlinked file living in tmpfs
* @name: name for dentry (to be seen in /proc/<pid>/maps
-@@ -2653,6 +2742,9 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
+@@ -2653,6 +2759,9 @@ struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags
d_instantiate(dentry, inode);
inode->i_size = size;
inode->i_nlink = 0; /* It is unlinked */
@@ -69529,7 +73440,7 @@
init_file(file, shm_mnt, dentry, FMODE_WRITE | FMODE_READ,
&shmem_file_operations);
-@@ -2689,6 +2781,8 @@ int shmem_zero_setup(struct vm_area_struct *vma)
+@@ -2689,6 +2798,8 @@ int shmem_zero_setup(struct vm_area_struct *vma)
if (vma->vm_file)
fput(vma->vm_file);
@@ -70738,7 +74649,7 @@
* swapin_readahead - swap in pages in hope we need them soon
* @entry: swap entry of this memory
diff --git a/mm/swapfile.c b/mm/swapfile.c
-index 9c590ee..f5bc813 100644
+index 9c590ee..9ce0143 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -35,6 +35,8 @@
@@ -71084,7 +74995,56 @@
#ifdef CONFIG_PROC_FS
/* iterator */
static void *swap_start(struct seq_file *swap, loff_t *pos)
-@@ -1743,7 +1817,7 @@ static const struct file_operations proc_swaps_operations = {
+@@ -1729,21 +1803,55 @@ static const struct seq_operations swaps_op = {
+ .show = swap_show
+ };
+
++#include <linux/virtinfo.h>
++
++static int swap_show_ve(struct seq_file *swap, void *v)
++{
++ struct meminfo mi;
++
++ memset(&mi, 0, sizeof(mi));
++ si_swapinfo(&mi.si);
++ if (virtinfo_notifier_call(VITYPE_GENERAL, VIRTINFO_MEMINFO, &mi)
++ & NOTIFY_FAIL)
++ goto out;
++
++ seq_printf(swap, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
++ if (!mi.si.totalswap)
++ goto out;
++ seq_printf(swap, "%-40s%s\t%lu\t%lu\t%d\n",
++ "/dev/null",
++ "partition",
++ mi.si.totalswap << (PAGE_SHIFT - 10),
++ (mi.si.totalswap - mi.si.freeswap) << (PAGE_SHIFT - 10),
++ -1);
++out:
++ return 0;
++}
++
+ static int swaps_open(struct inode *inode, struct file *file)
+ {
++ if (!ve_is_super(get_exec_env()))
++ return single_open(file, &swap_show_ve, NULL);
+ return seq_open(file, &swaps_op);
+ }
+
++static int swaps_release(struct inode *inode, struct file *file)
++{
++ if (!ve_is_super(file->owner_env))
++ return single_release(inode, file);
++ return seq_release(inode, file);
++}
++
+ static const struct file_operations proc_swaps_operations = {
+ .open = swaps_open,
+ .read = seq_read,
+ .llseek = seq_lseek,
+- .release = seq_release,
++ .release = swaps_release,
+ };
static int __init procswaps_init(void)
{
@@ -71093,7 +75053,7 @@
return 0;
}
__initcall(procswaps_init);
-@@ -1973,6 +2047,11 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+@@ -1973,6 +2081,11 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
goto bad_swap;
}
@@ -71105,7 +75065,7 @@
if (p->bdev) {
if (blk_queue_nonrot(bdev_get_queue(p->bdev))) {
p->flags |= SWP_SOLIDSTATE;
-@@ -1991,6 +2070,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
+@@ -1991,6 +2104,8 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
p->prio = --least_priority;
p->swap_map = swap_map;
p->flags |= SWP_WRITEOK;
@@ -71114,7 +75074,7 @@
nr_swap_pages += nr_good_pages;
total_swap_pages += nr_good_pages;
-@@ -2049,6 +2130,8 @@ out:
+@@ -2049,6 +2164,8 @@ out:
return error;
}
@@ -71123,7 +75083,7 @@
void si_swapinfo(struct sysinfo *val)
{
unsigned int i;
-@@ -2146,6 +2229,8 @@ void swap_duplicate(swp_entry_t entry)
+@@ -2146,6 +2263,8 @@ void swap_duplicate(swp_entry_t entry)
__swap_duplicate(entry, SWAP_MAP);
}
@@ -72129,7 +76089,7 @@
else
set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
diff --git a/net/core/dev.c b/net/core/dev.c
-index 74d0cce..48199c3 100644
+index 74d0cce..ee00d53 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -130,6 +130,9 @@
@@ -72256,13 +76216,14 @@
dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
"left");
if (audit_enabled) {
-@@ -4547,11 +4576,20 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+@@ -4547,16 +4576,25 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
* - require strict serialization.
* - do not return a value
*/
+ case SIOCSIFMTU:
+ case SIOCSIFHWADDR:
case SIOCSIFFLAGS:
++ case SIOCSIFTXQLEN:
+ if (!capable(CAP_NET_ADMIN) &&
+ !capable(CAP_VE_NET_ADMIN))
+ return -EPERM;
@@ -72279,6 +76240,11 @@
case SIOCSIFSLAVE:
case SIOCADDMULTI:
case SIOCDELMULTI:
+ case SIOCSIFHWBROADCAST:
+- case SIOCSIFTXQLEN:
+ case SIOCSMIIREG:
+ case SIOCBONDENSLAVE:
+ case SIOCBONDRELEASE:
@@ -4619,12 +4657,11 @@ int dev_ioctl(struct net *net, unsigned int cmd, void __user *arg)
*/
static int dev_new_index(struct net *net)
@@ -75614,7 +79580,7 @@
limit = ((unsigned long)sysctl_tcp_mem[1]) << (PAGE_SHIFT - 7);
max_share = min(4UL*1024*1024, limit);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
-index d86784b..46b61f5 100644
+index 2433bcd..0eb9c17 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -72,6 +72,8 @@
@@ -75751,7 +79717,7 @@
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPHPHITS);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
-index 7cda24b..b0f93fd 100644
+index 7cda24b..e141833 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,6 +72,8 @@
@@ -75847,7 +79813,7 @@
__free_page(sk->sk_sndmsg_page);
sk->sk_sndmsg_page = NULL;
}
-@@ -2478,6 +2493,87 @@ void __init tcp_v4_init(void)
+@@ -2478,6 +2493,93 @@ void __init tcp_v4_init(void)
panic("Failed to create the TCP control socket.\n");
}
@@ -75858,16 +79824,13 @@
+
+ /* Check the assumed state of the socket. */
+ if (!sock_flag(sk, SOCK_DEAD)) {
-+ static int printed;
-+invalid:
-+ if (!printed)
-+ printk(KERN_DEBUG "Killing sk: dead %d, state %d, "
-+ "wrseq %u unseq %u, wrqu %d.\n",
-+ sock_flag(sk, SOCK_DEAD), sk->sk_state,
-+ tp->write_seq, tp->snd_una,
-+ !skb_queue_empty(&sk->sk_write_queue));
-+ printed = 1;
-+ return;
++ printk(KERN_WARNING "Killing sk: dead %d, state %d, "
++ "wrseq %u unseq %u, wrqu %d.\n",
++ sock_flag(sk, SOCK_DEAD), sk->sk_state,
++ tp->write_seq, tp->snd_una,
++ !skb_queue_empty(&sk->sk_write_queue));
++ sk->sk_err = ECONNRESET;
++ sk->sk_error_report(sk);
+ }
+
+ tcp_send_active_reset(sk, GFP_ATOMIC);
@@ -75886,22 +79849,21 @@
+ */
+ tcp_time_wait(sk, TCP_FIN_WAIT2, 0);
+ break;
-+ case TCP_LAST_ACK:
++ default:
+ /* Just jump into CLOSED state. */
+ tcp_done(sk);
+ break;
-+ default:
-+ /* The socket must be already close()d. */
-+ goto invalid;
+ }
+}
+
+void tcp_v4_kill_ve_sockets(struct ve_struct *envid)
+{
+ struct inet_ehash_bucket *head;
-+ int i;
++ int i, retry;
+
+ /* alive */
++again:
++ retry = 0;
+ local_bh_disable();
+ head = tcp_hashinfo.ehash;
+ for (i = 0; i < tcp_hashinfo.ehash_size; i++) {
@@ -75916,6 +79878,12 @@
+ spin_unlock(lock);
+
+ bh_lock_sock(sk);
++ if (sock_owned_by_user(sk)) {
++ retry = 1;
++ bh_unlock_sock(sk);
++ sock_put(sk);
++ break;
++ }
+ /* sk might have disappeared from the hash before
+ * we got the lock */
+ if (sk->sk_state != TCP_CLOSE)
@@ -75928,6 +79896,10 @@
+ spin_unlock(lock);
+ }
+ local_bh_enable();
++ if (retry) {
++ schedule_timeout_interruptible(HZ);
++ goto again;
++ }
+}
+EXPORT_SYMBOL(tcp_v4_kill_ve_sockets);
+#endif
@@ -76355,8 +80327,20 @@
sock_put(sk);
+ (void)set_exec_env(ve);
}
+diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
+index 0fa9f70..ca1c6bf 100644
+--- a/net/ipv4/udp.c
++++ b/net/ipv4/udp.c
+@@ -138,6 +138,7 @@ static int udp_lib_lport_inuse(struct net *net, __u16 num,
+ sk2 != sk &&
+ (bitmap || sk2->sk_hash == num) &&
+ (!sk2->sk_reuse || !sk->sk_reuse) &&
++ sk->sk_reuse != 2 &&
+ (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if
+ || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) &&
+ (*saddr_comp)(sk, sk2)) {
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
-index d1f77cc..d62bbca 100644
+index d1f77cc..7fc4efd 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -407,9 +407,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev)
@@ -76397,7 +80381,17 @@
static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
unsigned int plen)
-@@ -2202,7 +2202,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
+@@ -2188,7 +2188,8 @@ static int inet6_addr_del(struct net *net, int ifindex, struct in6_addr *pfx,
+ disable IPv6 on this interface.
+ */
+ if (idev->addr_list == NULL)
+- addrconf_ifdown(idev->dev, 1);
++ addrconf_ifdown(idev->dev,
++ !(idev->dev->flags & IFF_LOOPBACK));
+ return 0;
+ }
+ }
+@@ -2202,7 +2203,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
@@ -76406,7 +80400,7 @@
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-@@ -2221,7 +2221,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
+@@ -2221,7 +2222,7 @@ int addrconf_del_ifaddr(struct net *net, void __user *arg)
struct in6_ifreq ireq;
int err;
@@ -76415,7 +80409,7 @@
return -EPERM;
if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-@@ -2731,6 +2731,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
+@@ -2731,6 +2732,9 @@ static int addrconf_ifdown(struct net_device *dev, int how)
static void addrconf_rs_timer(unsigned long data)
{
struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
@@ -76425,7 +80419,7 @@
if (ifp->idev->cnf.forwarding)
goto out;
-@@ -2765,6 +2768,7 @@ static void addrconf_rs_timer(unsigned long data)
+@@ -2765,6 +2769,7 @@ static void addrconf_rs_timer(unsigned long data)
out:
in6_ifa_put(ifp);
@@ -76433,7 +80427,15 @@
}
/*
-@@ -2841,7 +2845,9 @@ static void addrconf_dad_timer(unsigned long data)
+@@ -2801,6 +2806,7 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp, u32 flags)
+ if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) ||
+ idev->cnf.accept_dad < 1 ||
+ !(ifp->flags&IFA_F_TENTATIVE) ||
++ dev->owner_env->disable_net ||
+ ifp->flags & IFA_F_NODAD) {
+ ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
+ spin_unlock_bh(&ifp->lock);
+@@ -2841,7 +2847,9 @@ static void addrconf_dad_timer(unsigned long data)
struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;
@@ -76443,7 +80445,7 @@
read_lock_bh(&idev->lock);
if (idev->dead) {
read_unlock_bh(&idev->lock);
-@@ -2872,6 +2878,7 @@ static void addrconf_dad_timer(unsigned long data)
+@@ -2872,6 +2880,7 @@ static void addrconf_dad_timer(unsigned long data)
ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
@@ -76451,7 +80453,7 @@
}
static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
-@@ -3093,6 +3100,7 @@ static void addrconf_verify(unsigned long foo)
+@@ -3093,6 +3102,7 @@ static void addrconf_verify(unsigned long foo)
struct inet6_ifaddr *ifp;
unsigned long now, next;
int i;
@@ -76459,7 +80461,7 @@
spin_lock_bh(&addrconf_verify_lock);
now = jiffies;
-@@ -3113,6 +3121,8 @@ restart:
+@@ -3113,6 +3123,8 @@ restart:
if (ifp->flags & IFA_F_PERMANENT)
continue;
@@ -76468,7 +80470,7 @@
spin_lock(&ifp->lock);
age = (now - ifp->tstamp) / HZ;
-@@ -3128,9 +3138,11 @@ restart:
+@@ -3128,9 +3140,11 @@ restart:
in6_ifa_hold(ifp);
read_unlock(&addrconf_hash_lock);
ipv6_del_addr(ifp);
@@ -76480,7 +80482,7 @@
continue;
} else if (age >= ifp->prefered_lft) {
/* jiffies - ifp->tstamp > age >= ifp->prefered_lft */
-@@ -3152,6 +3164,7 @@ restart:
+@@ -3152,6 +3166,7 @@ restart:
ipv6_ifa_notify(0, ifp);
in6_ifa_put(ifp);
@@ -76488,7 +80490,7 @@
goto restart;
}
#ifdef CONFIG_IPV6_PRIVACY
-@@ -3173,6 +3186,7 @@ restart:
+@@ -3173,6 +3188,7 @@ restart:
ipv6_create_tempaddr(ifpub, ifp);
in6_ifa_put(ifpub);
in6_ifa_put(ifp);
@@ -76496,7 +80498,7 @@
goto restart;
}
} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
-@@ -3185,6 +3199,7 @@ restart:
+@@ -3185,6 +3201,7 @@ restart:
next = ifp->tstamp + ifp->prefered_lft * HZ;
spin_unlock(&ifp->lock);
}
@@ -77335,7 +81337,7 @@
return err;
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
-index 21d100b..0ecd5b4 100644
+index 21d100b..1c534b7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -61,6 +61,8 @@
@@ -77356,7 +81358,15 @@
static const struct inet_connection_sock_af_ops ipv6_specific;
#ifdef CONFIG_TCP_MD5SIG
static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
-@@ -1496,6 +1498,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -892,6 +894,7 @@ struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
+ .destructor = tcp_v6_reqsk_destructor,
+ .send_reset = tcp_v6_send_reset
+ };
++EXPORT_SYMBOL(tcp6_request_sock_ops);
+
+ #ifdef CONFIG_TCP_MD5SIG
+ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
+@@ -1496,6 +1499,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp;
struct sk_buff *opt_skb = NULL;
@@ -77364,7 +81374,7 @@
/* Imagine: socket is IPv6. IPv4 packet arrives,
goes to IPv4 receive handler and backlogged.
-@@ -1508,6 +1511,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1508,6 +1512,8 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (skb->protocol == htons(ETH_P_IP))
return tcp_v4_do_rcv(sk, skb);
@@ -77373,7 +81383,7 @@
#ifdef CONFIG_TCP_MD5SIG
if (tcp_v6_inbound_md5_hash (sk, skb))
goto discard;
-@@ -1544,7 +1549,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1544,7 +1550,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
TCP_CHECK_TIMER(sk);
if (opt_skb)
goto ipv6_pktoptions;
@@ -77382,7 +81392,7 @@
}
if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
-@@ -1565,7 +1570,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1565,7 +1571,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto reset;
if (opt_skb)
__kfree_skb(opt_skb);
@@ -77391,7 +81401,7 @@
}
}
-@@ -1575,6 +1580,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
+@@ -1575,6 +1581,9 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
TCP_CHECK_TIMER(sk);
if (opt_skb)
goto ipv6_pktoptions;
@@ -77401,7 +81411,7 @@
return 0;
reset:
-@@ -1583,7 +1591,7 @@ discard:
+@@ -1583,7 +1592,7 @@ discard:
if (opt_skb)
__kfree_skb(opt_skb);
kfree_skb(skb);
@@ -77410,7 +81420,7 @@
csum_err:
TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
goto discard;
-@@ -1614,7 +1622,7 @@ ipv6_pktoptions:
+@@ -1614,7 +1623,7 @@ ipv6_pktoptions:
}
kfree_skb(opt_skb);
@@ -77419,7 +81429,7 @@
}
static int tcp_v6_rcv(struct sk_buff *skb)
-@@ -1793,7 +1801,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
+@@ -1793,7 +1802,7 @@ static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
* TCP over IPv4 via INET6 API
*/
@@ -77428,7 +81438,7 @@
.queue_xmit = ip_queue_xmit,
.send_check = tcp_v4_send_check,
.rebuild_header = inet_sk_rebuild_header,
-@@ -1812,6 +1820,8 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
+@@ -1812,6 +1821,8 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = {
#endif
};
@@ -80219,7 +84229,7 @@
cleanup_socket_xprt();
svc_cleanup_xprt_sock();
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
-index 1c246a4..f969dee 100644
+index 70b0a22..f66b225 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -229,6 +229,9 @@ static int svc_sendto(struct svc_rqst *rqstp, struct xdr_buf *xdr)
@@ -80241,7 +84251,7 @@
return len;
}
-@@ -1436,8 +1441,9 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
+@@ -1437,8 +1442,9 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
error = sock_create_kern(family, type, protocol, &sock);
if (error < 0)
@@ -80252,7 +84262,7 @@
svc_reclassify_socket(sock);
/*
-@@ -1488,6 +1494,8 @@ static void svc_sock_detach(struct svc_xprt *xprt)
+@@ -1489,6 +1495,8 @@ static void svc_sock_detach(struct svc_xprt *xprt)
dprintk("svc: svc_sock_detach(%p)\n", svsk);
@@ -80668,7 +84678,7 @@
This allows you to choose different security modules to be
configured into your kernel.
diff --git a/security/commoncap.c b/security/commoncap.c
-index fe30751..6110691 100644
+index fe30751..3579774 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -58,6 +58,10 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb)
@@ -80700,15 +84710,18 @@
return -EPERM;
return 0;
}
-@@ -962,7 +966,7 @@ error:
+@@ -962,8 +966,9 @@ error:
*/
int cap_syslog(int type)
{
- if ((type != 3 && type != 10) && !capable(CAP_SYS_ADMIN))
-+ if ((type != 3 && type != 10) && !capable(CAP_VE_SYS_ADMIN))
- return -EPERM;
+- return -EPERM;
++ if ((type != 3 && type != 10) &&
++ !capable(CAP_VE_SYS_ADMIN) && !capable(CAP_SYS_ADMIN))
++ return -EPERM;
return 0;
}
+
diff --git a/security/device_cgroup.c b/security/device_cgroup.c
index 6cf8fd2..02aeae6 100644
--- a/security/device_cgroup.c
More information about the Kernel-svn-changes
mailing list