[kernel] r15422 - in dists/sid/linux-2.6/debian: . patches/features/all/openvz
Maximilian Attems
maks at alioth.debian.org
Thu Mar 18 19:14:29 UTC 2010
Author: maks
Date: Thu Mar 18 19:14:12 2010
New Revision: 15422
Log:
update openvz patch to 026c0e96af23
Modified:
dists/sid/linux-2.6/debian/changelog
dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch
Modified: dists/sid/linux-2.6/debian/changelog
==============================================================================
--- dists/sid/linux-2.6/debian/changelog Thu Mar 18 17:36:27 2010 (r15421)
+++ dists/sid/linux-2.6/debian/changelog Thu Mar 18 19:14:12 2010 (r15422)
@@ -7,6 +7,7 @@
[ maximilian attems]
* [alpha, hppa] Disable oprofile as tracing code is unsupported here.
(closes: #574368)
+ * Update openvz patch to 026c0e96af23.
-- maximilian attems <maks at debian.org> Wed, 17 Mar 2010 18:48:22 +0100
Modified: dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch
==============================================================================
--- dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch Thu Mar 18 17:36:27 2010 (r15421)
+++ dists/sid/linux-2.6/debian/patches/features/all/openvz/openvz.patch Thu Mar 18 19:14:12 2010 (r15422)
@@ -5,6 +5,94 @@
git diff 2.6.32.10...HEAD >> openvz.patch
-maks
+commit 026c0e96af23e154b741645fe2d61a0278451268
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date: Thu Mar 18 19:30:31 2010 +0300
+
+ cpt: Add support for sit, ipip and ipgre tunnels.
+
+ These patches were dropped during port on 2.6.26 (!) and now
+ it's time to resurrect them.
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit db266821b6c7f7ee39b97fe58fd336b04d7d689f
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date: Thu Mar 18 18:35:01 2010 +0300
+
+ ve: Get rid of ksyms completely
+
+ The last user of it (do_env_free) is easy due to module refcounting
+ kinda working properly ;)
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit 69309430a023aa75a73c8ebd5b1952e733cd1c6a
+Author: Cyrill Gorcunov <gorcunov at openvz.org>
+Date: Thu Mar 18 18:05:01 2010 +0300
+
+ iptables: rework module management in VE v3
+
+ Most of xt_ tables are per-net compatible so there is no need
+ to put an access restriction on them from a VE.
+
+ Typically we need to restrict access to certan netfilter tables:
+ "nat", "forward", "mangle" and "conntrack".
+
+ Note the "conntrack" is not covered by this patch yet, ie it's
+ allowed in VE regardless the VE configuration.
+
+ Technical details of the patch
+
+ 1) nfcalls.h is almost completely not needed anymore
+ but we still have one user of it ve.c + vecalls.c
+
+ 2) VE_IPT_CMP macro is wrapped with inline helper mask_ipt_allow
+ which allow to check types passed in at compile time
+
+ 3) net_ipt_module_set code beautification
+
+ 4) ve_xt_table_forbidden inline helper introduced. It wraps open
+ coded NULL pointer check in a sake of "grep"ability (pointing
+ out that the check is not a mainline code)
+
+ 5) vziptable_defs.h has been re-made in a sake of easier reading.
+ Backward compatibility preserved.
+
+ 6) VE_IP_ALL introduced to be used instead of opencoded "all allowed"
+ mask.
+
+ 7) no_module global variable is no longer needed, we don't use legacy
+ symbol resolution technique. Net namespaces do all work for us by
+ own.
+
+ 8) Because of vzmon being a module we need do_env_free_hook (side effect
+ of removing KSYMs) and vzmon_mod pointer as well.
+
+ 9) We no longer need init_ve_iptables, do_ve_iptables and fini_ve_iptables.
+ The reason the same as in (7)
+
+ 10) And finally "nat", "forward", "mangle" tables are controlled via
+ VE configuration.
+
+ TODO
+
+ 1) Contol "conntrack" table via VE configuration supplied.
+ 2) Get rid of KSYM completely
+
+ Signed-off-by: Cyrill Gorcunov <gorcunov at openvz.org>
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
+commit d95919fda88c144bac75019f692c3f2f8e2b9d2e
+Author: Pavel Emelyanov <xemul at openvz.org>
+Date: Wed Mar 17 15:56:18 2010 +0300
+
+ config: Disable DEVTMPFS
+
+ http://bugzilla.openvz.org/show_bug.cgi?id=1469
+
+ Signed-off-by: Pavel Emelyanov <xemul at openvz.org>
+
commit f492a5013944b559cd809565250df2027dbb2c51
Author: Pavel Emelyanov <xemul at openvz.org>
Date: Tue Mar 16 14:34:19 2010 +0300
@@ -3665,6 +3753,19 @@
static char *block_devnode(struct device *dev, mode_t *mode)
{
+diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
+index ee37727..0361045 100644
+--- a/drivers/base/Kconfig
++++ b/drivers/base/Kconfig
+@@ -10,7 +10,7 @@ config UEVENT_HELPER_PATH
+
+ config DEVTMPFS
+ bool "Create a kernel maintained /dev tmpfs (EXPERIMENTAL)"
+- depends on HOTPLUG && SHMEM && TMPFS
++ depends on HOTPLUG && SHMEM && TMPFS && !VE
+ help
+ This creates a tmpfs filesystem, and mounts it at bootup
+ and mounts it at /dev. The kernel driver core creates device
diff --git a/drivers/base/base.h b/drivers/base/base.h
index 2ca7f5b..88d57ee 100644
--- a/drivers/base/base.h
@@ -6647,10 +6748,10 @@
diff --git a/drivers/net/vzethdev.c b/drivers/net/vzethdev.c
new file mode 100644
-index 0000000..825b30a
+index 0000000..e073e3e
--- /dev/null
+++ b/drivers/net/vzethdev.c
-@@ -0,0 +1,742 @@
+@@ -0,0 +1,741 @@
+/*
+ * veth.c
+ *
@@ -6705,7 +6806,6 @@
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/vzcalluser.h>
-+#include <linux/nfcalls.h>
+
+#include <linux/cpt_image.h>
+#include <linux/cpt_export.h>
@@ -22683,10 +22783,10 @@
+
diff --git a/include/linux/cpt_image.h b/include/linux/cpt_image.h
new file mode 100644
-index 0000000..d769826
+index 0000000..e25c3ce
--- /dev/null
+++ b/include/linux/cpt_image.h
-@@ -0,0 +1,1779 @@
+@@ -0,0 +1,1799 @@
+/*
+ *
+ * include/linux/cpt_image.h
@@ -22788,6 +22888,7 @@
+ CPT_OBJ_NET_HWADDR,
+ CPT_OBJ_NET_VETH,
+ CPT_OBJ_NET_STATS,
++ CPT_OBJ_NET_IPIP_TUNNEL,
+
+ /* 2.6.27-specific */
+ CPT_OBJ_NET_TAP_FILTER = 0x01000000,
@@ -24223,6 +24324,25 @@
+ __u32 __cpt_pad;
+} __attribute__ ((aligned (8)));
+
++struct cpt_tunnel_image {
++ __u64 cpt_next;
++ __u32 cpt_object;
++ __u16 cpt_hdrlen;
++ __u16 cpt_content;
++
++ __u32 cpt_tnl_flags;
++#define CPT_TUNNEL_FBDEV 0x1
++#define CPT_TUNNEL_SIT 0x2
++#define CPT_TUNNEL_GRE 0x4
++ __u16 cpt_i_flags;
++ __u16 cpt_o_flags;
++ __u32 cpt_i_key;
++ __u32 cpt_o_key;
++ __u32 cpt_iphdr[5];
++ __u32 cpt_i_seqno;
++ __u32 cpt_o_seqno;
++} __attribute__ ((aligned (8)));
++
+struct cpt_hwaddr_image {
+ __u64 cpt_next;
+ __u32 cpt_object;
@@ -25865,10 +25985,10 @@
{
int feature = gso_type << NETIF_F_GSO_SHIFT;
diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h
-index 6132b5e..4c7cf65 100644
+index 6132b5e..cc0d48e 100644
--- a/include/linux/netfilter.h
+++ b/include/linux/netfilter.h
-@@ -353,5 +353,24 @@ extern void (*nf_ct_destroy)(struct nf_conntrack *);
+@@ -353,5 +353,25 @@ extern void (*nf_ct_destroy)(struct nf_conntrack *);
static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {}
#endif
@@ -25876,15 +25996,16 @@
+#include <linux/vziptable_defs.h>
+
+#define net_ipt_module_permitted(netns, ipt) \
-+ (VE_IPT_CMP((netns)->owner_ve->ipt_mask, ipt) && \
-+ VE_IPT_CMP((netns)->owner_ve->_iptables_modules, \
++ (mask_ipt_allow((netns)->owner_ve->ipt_mask, ipt) && \
++ mask_ipt_allow((netns)->owner_ve->_iptables_modules, \
+ (ipt) & ~(ipt##_MOD)))
+
-+#define net_ipt_module_set(netns, ipt) ({ \
++#define net_ipt_module_set(netns, ipt) \
++ ({ \
+ (netns)->owner_ve->_iptables_modules |= ipt##_MOD; \
-+ })
-+#define net_is_ipt_module_set(netns, ipt) ( \
-+ (netns)->owner_ve->_iptables_modules & (ipt##_MOD))
++ })
++#define net_is_ipt_module_set(netns, ipt) \
++ ((netns)->owner_ve->_iptables_modules & (ipt##_MOD))
+#else
+#define net_ipt_module_permitted(netns, ipt) (1)
+#define net_ipt_module_set(netns, ipt)
@@ -25894,7 +26015,7 @@
#endif /*__KERNEL__*/
#endif /*__LINUX_NETFILTER_H*/
diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h
-index 812cb15..4612bc8 100644
+index 812cb15..4339ac7 100644
--- a/include/linux/netfilter/x_tables.h
+++ b/include/linux/netfilter/x_tables.h
@@ -375,6 +375,7 @@ struct xt_table_info
@@ -25905,6 +26026,30 @@
/* Number of entries: FIXME. --RR */
unsigned int number;
/* Initial number of entries. Needed for module usage count */
+@@ -605,6 +606,23 @@ extern int xt_compat_target_to_user(struct xt_entry_target *t,
+ void __user **dstptr, unsigned int *size);
+
+ #endif /* CONFIG_COMPAT */
++
++#ifdef CONFIG_VE
++static inline bool ve_xt_table_forbidden(struct xt_table *xt)
++{
++ /*
++ * The only purpose to have this check as a separate
++ * helper is "grep"-a-bility
++ *
++ * If this helper hit it means that a VE has been
++ * configured without the particular xt_table support
++ */
++ return xt == NULL;
++}
++#else
++static inline bool ve_xt_table_forbidden(struct xt_table *xt) { return true; }
++#endif
++
+ #endif /* __KERNEL__ */
+
+ #endif /* _X_TABLES_H */
diff --git a/include/linux/netfilter/xt_hashlimit.h b/include/linux/netfilter/xt_hashlimit.h
index b1925b5..65eaf2b 100644
--- a/include/linux/netfilter/xt_hashlimit.h
@@ -25941,179 +26086,6 @@
+};
+#endif
#endif /* _LINUX_NETFILTER_XT_RECENT_H */
-diff --git a/include/linux/nfcalls.h b/include/linux/nfcalls.h
-new file mode 100644
-index 0000000..ebfd642
---- /dev/null
-+++ b/include/linux/nfcalls.h
-@@ -0,0 +1,167 @@
-+/*
-+ * include/linux/nfcalls.h
-+ *
-+ * Copyright (C) 2005 SWsoft
-+ * All rights reserved.
-+ *
-+ * Licensing governed by "linux/COPYING.SWsoft" file.
-+ *
-+ */
-+
-+#ifndef _LINUX_NFCALLS_H
-+#define _LINUX_NFCALLS_H
-+
-+#include <linux/rcupdate.h>
-+
-+#ifdef CONFIG_MODULES
-+extern struct module no_module;
-+
-+#define DECL_KSYM_MODULE(name) \
-+ extern struct module *vz_mod_##name
-+
-+#define INIT_KSYM_MODULE(name) \
-+ struct module *vz_mod_##name = &no_module; \
-+ EXPORT_SYMBOL(vz_mod_##name)
-+
-+static inline void __vzksym_modresolve(struct module **modp, struct module *mod)
-+{
-+ /*
-+ * we want to be sure, that pointer updates are visible first:
-+ * 1. wmb() is here only for piece of sure
-+ * (note, no rmb() in KSYMSAFECALL)
-+ * 2. synchronize_sched() guarantees that updates are visible
-+ * on all cpus and allows us to remove rmb() in KSYMSAFECALL
-+ */
-+ wmb(); synchronize_sched();
-+ *modp = mod;
-+ /* just to be sure, our changes are visible as soon as possible */
-+ wmb(); synchronize_sched();
-+}
-+
-+static inline void __vzksym_modunresolve(struct module **modp)
-+{
-+ /*
-+ * try_module_get() in KSYMSAFECALL should fail at this moment since
-+ * THIS_MODULE in in unloading state (we should be called from fini),
-+ * no need to syncronize pointers/ve_module updates.
-+ */
-+ *modp = &no_module;
-+ /*
-+ * synchronize_sched() guarantees here that we see
-+ * updated module pointer before the module really gets away
-+ */
-+ synchronize_sched();
-+}
-+
-+static inline int __vzksym_module_get(struct module *mod)
-+{
-+ /*
-+ * we want to avoid rmb(), so use synchronize_sched() in KSYMUNRESOLVE
-+ * and smp_read_barrier_depends() here...
-+ */
-+ smp_read_barrier_depends(); /* for module loading */
-+ if (!try_module_get(mod))
-+ return -EBUSY;
-+
-+ return 0;
-+}
-+
-+static inline void __vzksym_module_put(struct module *mod)
-+{
-+ module_put(mod);
-+}
-+#else
-+#define DECL_KSYM_MODULE(name)
-+#define INIT_KSYM_MODULE(name)
-+#define __vzksym_modresolve(modp, mod)
-+#define __vzksym_modunresolve(modp)
-+#define __vzksym_module_get(mod) 0
-+#define __vzksym_module_put(mod)
-+#endif
-+
-+#define __KSYMERRCALL(err, type, mod, name, args) \
-+({ \
-+ type ret = (type)err; \
-+ if (!__vzksym_module_get(vz_mod_##mod)) { \
-+ if (vz_##name) \
-+ ret = ((*vz_##name)args); \
-+ __vzksym_module_put(vz_mod_##mod); \
-+ } \
-+ ret; \
-+})
-+
-+#define __KSYMSAFECALL_VOID(mod, name, args) \
-+ do { \
-+ if (!__vzksym_module_get(vz_mod_##mod)) { \
-+ if (vz_##name) \
-+ ((*vz_##name)args); \
-+ __vzksym_module_put(vz_mod_##mod); \
-+ } \
-+ } while (0)
-+
-+#define DECL_KSYM_CALL(type, name, args) \
-+ extern type (*vz_##name) args
-+#define INIT_KSYM_CALL(type, name, args) \
-+ type (*vz_##name) args; \
-+EXPORT_SYMBOL(vz_##name)
-+
-+#define KSYMERRCALL(err, mod, name, args) \
-+ __KSYMERRCALL(err, int, mod, name, args)
-+#define KSYMSAFECALL(type, mod, name, args) \
-+ __KSYMERRCALL(0, type, mod, name, args)
-+#define KSYMSAFECALL_VOID(mod, name, args) \
-+ __KSYMSAFECALL_VOID(mod, name, args)
-+#define KSYMREF(name) vz_##name
-+
-+/* should be called _after_ KSYMRESOLVE's */
-+#define KSYMMODRESOLVE(name) \
-+ __vzksym_modresolve(&vz_mod_##name, THIS_MODULE)
-+#define KSYMMODUNRESOLVE(name) \
-+ __vzksym_modunresolve(&vz_mod_##name)
-+
-+#define KSYMRESOLVE(name) \
-+ vz_##name = &name
-+#define KSYMUNRESOLVE(name) \
-+ vz_##name = NULL
-+
-+#if defined(CONFIG_VE)
-+DECL_KSYM_MODULE(ip_tables);
-+DECL_KSYM_MODULE(ip6_tables);
-+DECL_KSYM_MODULE(iptable_filter);
-+DECL_KSYM_MODULE(ip6table_filter);
-+DECL_KSYM_MODULE(iptable_mangle);
-+DECL_KSYM_MODULE(ip6table_mangle);
-+DECL_KSYM_MODULE(ip_conntrack);
-+DECL_KSYM_MODULE(nf_conntrack);
-+DECL_KSYM_MODULE(nf_conntrack_ipv4);
-+DECL_KSYM_MODULE(nf_conntrack_ipv6);
-+DECL_KSYM_MODULE(xt_conntrack);
-+DECL_KSYM_MODULE(ip_nat);
-+DECL_KSYM_MODULE(nf_nat);
-+DECL_KSYM_MODULE(iptable_nat);
-+
-+struct sk_buff;
-+
-+DECL_KSYM_CALL(int, init_iptable_conntrack, (void));
-+DECL_KSYM_CALL(int, nf_conntrack_init_ve, (void));
-+DECL_KSYM_CALL(int, init_nf_ct_l3proto_ipv4, (void));
-+DECL_KSYM_CALL(int, init_nf_ct_l3proto_ipv6, (void));
-+DECL_KSYM_CALL(int, nf_nat_init, (void));
-+DECL_KSYM_CALL(int, init_nftable_nat, (void));
-+DECL_KSYM_CALL(int, nf_nat_init, (void));
-+DECL_KSYM_CALL(void, fini_nftable_nat, (void));
-+DECL_KSYM_CALL(void, nf_nat_cleanup, (void));
-+DECL_KSYM_CALL(void, fini_iptable_conntrack, (void));
-+DECL_KSYM_CALL(void, nf_conntrack_cleanup_ve, (void));
-+DECL_KSYM_CALL(void, fini_nf_ct_l3proto_ipv4, (void));
-+DECL_KSYM_CALL(void, fini_nf_ct_l3proto_ipv6, (void));
-+
-+#include <linux/netfilter/x_tables.h>
-+#endif
-+
-+#if defined(CONFIG_VE_CALLS) || defined(CONFIG_VE_CALLS_MODULE)
-+DECL_KSYM_MODULE(vzmon);
-+DECL_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
-+#endif
-+
-+#endif /* _LINUX_NFCALLS_H */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 320569e..8e0d228 100644
--- a/include/linux/nfs_fs_sb.h
@@ -27712,10 +27684,10 @@
static inline void get_uts_ns(struct uts_namespace *ns)
diff --git a/include/linux/ve.h b/include/linux/ve.h
new file mode 100644
-index 0000000..c677499
+index 0000000..8f8d083
--- /dev/null
+++ b/include/linux/ve.h
-@@ -0,0 +1,363 @@
+@@ -0,0 +1,361 @@
+/*
+ * include/linux/ve.h
+ *
@@ -28038,11 +28010,8 @@
+
+static inline void put_ve(struct ve_struct *ptr)
+{
-+ if (ptr && atomic_dec_and_test(&ptr->counter)) {
-+ BUG_ON(atomic_read(&ptr->pcounter) > 0);
-+ BUG_ON(ptr->is_running);
++ if (ptr && atomic_dec_and_test(&ptr->counter))
+ do_env_free(ptr);
-+ }
+}
+
+static inline void pget_ve(struct ve_struct *ptr)
@@ -28062,6 +28031,7 @@
+extern struct task_struct *ve_cleanup_thread;
+
+extern int (*do_ve_enter_hook)(struct ve_struct *ve, unsigned int flags);
++extern void (*do_env_free_hook)(struct ve_struct *ve);
+
+extern unsigned long long ve_relative_clock(struct timespec * ts);
+
@@ -29240,59 +29210,89 @@
+#endif /* __LINUX_VZ_EVENT_H__ */
diff --git a/include/linux/vziptable_defs.h b/include/linux/vziptable_defs.h
new file mode 100644
-index 0000000..ec7586f
+index 0000000..204e9d8
--- /dev/null
+++ b/include/linux/vziptable_defs.h
-@@ -0,0 +1,51 @@
+@@ -0,0 +1,81 @@
+#ifndef _LINUX_VZIPTABLE_DEFS_H
+#define _LINUX_VZIPTABLE_DEFS_H
+
-+/* these masks represent modules */
-+#define VE_IP_IPTABLES_MOD (1U<<0)
-+#define VE_IP_FILTER_MOD (1U<<1)
-+#define VE_IP_MANGLE_MOD (1U<<2)
-+#define VE_IP_CONNTRACK_MOD (1U<<14)
-+#define VE_IP_CONNTRACK_FTP_MOD (1U<<15)
-+#define VE_IP_CONNTRACK_IRC_MOD (1U<<16)
-+#define VE_IP_NAT_MOD (1U<<20)
-+#define VE_IP_NAT_FTP_MOD (1U<<21)
-+#define VE_IP_NAT_IRC_MOD (1U<<22)
-+#define VE_IP_IPTABLES6_MOD (1U<<26)
-+#define VE_IP_FILTER6_MOD (1U<<27)
-+#define VE_IP_MANGLE6_MOD (1U<<28)
-+#define VE_IP_IPTABLE_NAT_MOD (1U<<29)
-+#define VE_NF_CONNTRACK_MOD (1U<<30)
++#include <linux/types.h>
++#include <linux/sched.h>
++
++/*
++ * This masks represent modules
++ *
++ * Strictly speaking we use only a small subset
++ * of this bits novadays but we MUST RESERVE all
++ * the bits were ever used in a sake of ABI compatibility
++ * (ie compatibility with vzctl user-space utility)
++ *
++ * DON'T EVER DELETE/MODIFY THESE BITS
++ */
++#define VE_IPT_GENERATE(name, shift) name = (1U << shift)
++
++enum ve_ipt_mods {
++ VE_IPT_GENERATE(VE_IP_IPTABLES_MOD, 0),
++ VE_IPT_GENERATE(VE_IP_FILTER_MOD, 1),
++ VE_IPT_GENERATE(VE_IP_MANGLE_MOD, 2),
++ VE_IPT_GENERATE(VE_IP_MATCH_LIMIT_MOD, 3),
++ VE_IPT_GENERATE(VE_IP_MATCH_MULTIPORT_MOD, 4),
++ VE_IPT_GENERATE(VE_IP_MATCH_TOS_MOD, 5),
++ VE_IPT_GENERATE(VE_IP_TARGET_TOS_MOD, 6),
++ VE_IPT_GENERATE(VE_IP_TARGET_REJECT_MOD, 7),
++ VE_IPT_GENERATE(VE_IP_TARGET_TCPMSS_MOD, 8),
++ VE_IPT_GENERATE(VE_IP_MATCH_TCPMSS_MOD, 9),
++ VE_IPT_GENERATE(VE_IP_MATCH_TTL_MOD, 10),
++ VE_IPT_GENERATE(VE_IP_TARGET_LOG_MOD, 11),
++ VE_IPT_GENERATE(VE_IP_MATCH_LENGTH_MOD, 12),
++ VE_IPT_GENERATE(VE_IP_CONNTRACK_MOD, 14),
++ VE_IPT_GENERATE(VE_IP_CONNTRACK_FTP_MOD, 15),
++ VE_IPT_GENERATE(VE_IP_CONNTRACK_IRC_MOD, 16),
++ VE_IPT_GENERATE(VE_IP_MATCH_CONNTRACK_MOD, 17),
++ VE_IPT_GENERATE(VE_IP_MATCH_STATE_MOD, 18),
++ VE_IPT_GENERATE(VE_IP_MATCH_HELPER_MOD, 19),
++ VE_IPT_GENERATE(VE_IP_NAT_MOD, 20),
++ VE_IPT_GENERATE(VE_IP_NAT_FTP_MOD, 21),
++ VE_IPT_GENERATE(VE_IP_NAT_IRC_MOD, 22),
++ VE_IPT_GENERATE(VE_IP_TARGET_REDIRECT_MOD, 23),
++ VE_IPT_GENERATE(VE_IP_MATCH_OWNER_MOD, 24),
++ VE_IPT_GENERATE(VE_IP_MATCH_MAC_MOD, 25),
++ VE_IPT_GENERATE(VE_IP_IPTABLES6_MOD, 26),
++ VE_IPT_GENERATE(VE_IP_FILTER6_MOD, 27),
++ VE_IPT_GENERATE(VE_IP_MANGLE6_MOD, 28),
++ VE_IPT_GENERATE(VE_IP_IPTABLE_NAT_MOD, 29),
++ VE_IPT_GENERATE(VE_NF_CONNTRACK_MOD, 30),
++};
+
+/* these masks represent modules with their dependences */
+#define VE_IP_IPTABLES (VE_IP_IPTABLES_MOD)
-+#define VE_IP_FILTER (VE_IP_FILTER_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_MANGLE (VE_IP_MANGLE_MOD \
-+ | VE_IP_IPTABLES)
++#define VE_IP_FILTER (VE_IP_FILTER_MOD | VE_IP_IPTABLES)
++#define VE_IP_MANGLE (VE_IP_MANGLE_MOD | VE_IP_IPTABLES)
+#define VE_IP_IPTABLES6 (VE_IP_IPTABLES6_MOD)
+#define VE_IP_FILTER6 (VE_IP_FILTER6_MOD | VE_IP_IPTABLES6)
+#define VE_IP_MANGLE6 (VE_IP_MANGLE6_MOD | VE_IP_IPTABLES6)
+#define VE_NF_CONNTRACK (VE_NF_CONNTRACK_MOD | VE_IP_IPTABLES)
-+#define VE_IP_CONNTRACK (VE_IP_CONNTRACK_MOD \
-+ | VE_IP_IPTABLES)
-+#define VE_IP_CONNTRACK_FTP (VE_IP_CONNTRACK_FTP_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_CONNTRACK_IRC (VE_IP_CONNTRACK_IRC_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_NAT (VE_IP_NAT_MOD \
-+ | VE_IP_CONNTRACK)
-+#define VE_IP_NAT_FTP (VE_IP_NAT_FTP_MOD \
-+ | VE_IP_NAT | VE_IP_CONNTRACK_FTP)
-+#define VE_IP_NAT_IRC (VE_IP_NAT_IRC_MOD \
-+ | VE_IP_NAT | VE_IP_CONNTRACK_IRC)
++#define VE_IP_CONNTRACK (VE_IP_CONNTRACK_MOD | VE_IP_IPTABLES)
++#define VE_IP_CONNTRACK_FTP (VE_IP_CONNTRACK_FTP_MOD | VE_IP_CONNTRACK)
++#define VE_IP_CONNTRACK_IRC (VE_IP_CONNTRACK_IRC_MOD | VE_IP_CONNTRACK)
++#define VE_IP_NAT (VE_IP_NAT_MOD | VE_IP_CONNTRACK)
++#define VE_IP_NAT_FTP (VE_IP_NAT_FTP_MOD | VE_IP_NAT | VE_IP_CONNTRACK_FTP)
++#define VE_IP_NAT_IRC (VE_IP_NAT_IRC_MOD | VE_IP_NAT | VE_IP_CONNTRACK_IRC)
+#define VE_IP_IPTABLE_NAT (VE_IP_IPTABLE_NAT_MOD | VE_IP_CONNTRACK)
+
+/* safe iptables mask to be used by default */
-+#define VE_IP_DEFAULT \
-+ (VE_IP_IPTABLES | \
-+ VE_IP_FILTER | VE_IP_MANGLE)
++#define VE_IP_DEFAULT (VE_IP_IPTABLES | VE_IP_FILTER | VE_IP_MANGLE)
++
++/* allowed all */
++#define VE_IP_ALL (~0ULL)
+
-+#define VE_IPT_CMP(x, y) (((x) & (y)) == (y))
++#define VE_IPT_CMP(x, y) (((x) & (y)) == (y))
++
++static inline bool mask_ipt_allow(__u64 permitted, __u64 mask)
++{
++ return VE_IPT_CMP(permitted, mask);
++}
+
+#endif /* _LINUX_VZIPTABLE_DEFS_H */
diff --git a/include/linux/vzquota.h b/include/linux/vzquota.h
@@ -38998,10 +38998,10 @@
+}
diff --git a/kernel/cpt/cpt_dump.c b/kernel/cpt/cpt_dump.c
new file mode 100644
-index 0000000..db6c5a2
+index 0000000..7a36b4e
--- /dev/null
+++ b/kernel/cpt/cpt_dump.c
-@@ -0,0 +1,1249 @@
+@@ -0,0 +1,1248 @@
+/*
+ *
+ * kernel/cpt/cpt_dump.c
@@ -39034,7 +39034,6 @@
+#include <linux/nsproxy.h>
+#include <linux/mnt_namespace.h>
+#include <linux/netdevice.h>
-+#include <linux/nfcalls.h>
+#include <linux/dcache.h>
+#include <linux/if_tun.h>
+#include <linux/utsname.h>
@@ -43584,10 +43583,10 @@
+extern struct vm_operations_struct special_mapping_vmops;
diff --git a/kernel/cpt/cpt_net.c b/kernel/cpt/cpt_net.c
new file mode 100644
-index 0000000..329b1a8
+index 0000000..9e09675
--- /dev/null
+++ b/kernel/cpt/cpt_net.c
-@@ -0,0 +1,545 @@
+@@ -0,0 +1,544 @@
+/*
+ *
+ * kernel/cpt/cpt_net.c
@@ -43619,7 +43618,6 @@
+#include <linux/ve_proto.h>
+#include <linux/vzcalluser.h>
+#include <linux/cpt_image.h>
-+#include <linux/nfcalls.h>
+#include <linux/if_tun.h>
+#include <linux/veth.h>
+#include <linux/fdtable.h>
@@ -52512,10 +52510,10 @@
+}
diff --git a/kernel/cpt/rst_net.c b/kernel/cpt/rst_net.c
new file mode 100644
-index 0000000..95939e6
+index 0000000..dc5de80
--- /dev/null
+++ b/kernel/cpt/rst_net.c
-@@ -0,0 +1,629 @@
+@@ -0,0 +1,628 @@
+/*
+ *
+ * kernel/cpt/rst_net.c
@@ -52549,7 +52547,6 @@
+#include <net/addrconf.h>
+#include <linux/if_tun.h>
+#include <linux/veth.h>
-+#include <linux/nfcalls.h>
+#include <linux/venet.h>
+#include <linux/fdtable.h>
+#include <net/net_namespace.h>
@@ -63610,10 +63607,10 @@
+
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
new file mode 100644
-index 0000000..0562a4e
+index 0000000..7da3e04
--- /dev/null
+++ b/kernel/ve/ve.c
-@@ -0,0 +1,143 @@
+@@ -0,0 +1,118 @@
+/*
+ * linux/kernel/ve/ve.c
+ *
@@ -63652,7 +63649,6 @@
+#include <linux/devpts_fs.h>
+#include <linux/user_namespace.h>
+
-+#include <linux/nfcalls.h>
+#include <linux/vzcalluser.h>
+
+unsigned long vz_rstamp = 0x37e0f59d;
@@ -63662,42 +63658,18 @@
+EXPORT_SYMBOL(no_module);
+#endif
+
-+INIT_KSYM_MODULE(ip_tables);
-+INIT_KSYM_MODULE(ip6_tables);
-+INIT_KSYM_MODULE(iptable_filter);
-+INIT_KSYM_MODULE(ip6table_filter);
-+INIT_KSYM_MODULE(iptable_mangle);
-+INIT_KSYM_MODULE(ip6table_mangle);
-+INIT_KSYM_MODULE(ip_conntrack);
-+INIT_KSYM_MODULE(nf_conntrack);
-+INIT_KSYM_MODULE(nf_conntrack_ipv4);
-+INIT_KSYM_MODULE(nf_conntrack_ipv6);
-+INIT_KSYM_MODULE(ip_nat);
-+INIT_KSYM_MODULE(nf_nat);
-+INIT_KSYM_MODULE(iptable_nat);
-+
-+INIT_KSYM_CALL(int, init_iptable_conntrack, (void));
-+INIT_KSYM_CALL(int, nf_conntrack_init_ve, (void));
-+INIT_KSYM_CALL(int, init_nf_ct_l3proto_ipv4, (void));
-+INIT_KSYM_CALL(int, init_nf_ct_l3proto_ipv6, (void));
-+INIT_KSYM_CALL(int, nf_nat_init, (void));
-+INIT_KSYM_CALL(int, init_iptable_nat, (void));
-+INIT_KSYM_CALL(void, fini_iptable_nat, (void));
-+INIT_KSYM_CALL(int, init_nftable_nat, (void));
-+INIT_KSYM_CALL(void, fini_nftable_nat, (void));
-+INIT_KSYM_CALL(void, nf_nat_cleanup, (void));
-+INIT_KSYM_CALL(void, fini_iptable_conntrack, (void));
-+INIT_KSYM_CALL(void, nf_conntrack_cleanup_ve, (void));
-+INIT_KSYM_CALL(void, fini_nf_ct_l3proto_ipv4, (void));
-+INIT_KSYM_CALL(void, fini_nf_ct_l3proto_ipv6, (void));
-+
+#if defined(CONFIG_VE_CALLS_MODULE) || defined(CONFIG_VE_CALLS)
-+INIT_KSYM_MODULE(vzmon);
-+INIT_KSYM_CALL(void, real_do_env_free, (struct ve_struct *env));
++void (*do_env_free_hook)(struct ve_struct *ve);
++EXPORT_SYMBOL(do_env_free_hook);
+
+void do_env_free(struct ve_struct *env)
+{
-+ KSYMSAFECALL_VOID(vzmon, real_do_env_free, (env));
++ BUG_ON(atomic_read(&env->pcounter) > 0);
++ BUG_ON(env->is_running);
++
++ preempt_disable();
++ do_env_free_hook(env);
++ preempt_enable();
+}
+EXPORT_SYMBOL(do_env_free);
+#endif
@@ -63717,7 +63689,7 @@
+ .is_running = 1,
+ .op_sem = __RWSEM_INITIALIZER(ve0.op_sem),
+#ifdef CONFIG_VE_IPTABLES
-+ .ipt_mask = ~0ULL,
++ .ipt_mask = VE_IP_ALL,
+#endif
+ .features = VE_FEATURE_SIT | VE_FEATURE_IPIP |
+ VE_FEATURE_PPP,
@@ -63759,10 +63731,10 @@
+}
diff --git a/kernel/ve/vecalls.c b/kernel/ve/vecalls.c
new file mode 100644
-index 0000000..825d8e6
+index 0000000..29b455d
--- /dev/null
+++ b/kernel/ve/vecalls.c
-@@ -0,0 +1,2394 @@
+@@ -0,0 +1,2264 @@
+/*
+ * linux/kernel/ve/vecalls.c
+ *
@@ -63837,7 +63809,6 @@
+#include <linux/fairsched.h>
+#endif
+
-+#include <linux/nfcalls.h>
+#include <linux/virtinfo.h>
+#include <linux/utsrelease.h>
+#include <linux/major.h>
@@ -63882,7 +63853,7 @@
+/*
+ * real_put_ve() MUST be used instead of put_ve() inside vecalls.
+ */
-+void real_do_env_free(struct ve_struct *ve);
++static void real_do_env_free(struct ve_struct *ve);
+static inline void real_put_ve(struct ve_struct *ve)
+{
+ if (ve && atomic_dec_and_test(&ve->counter)) {
@@ -64635,100 +64606,6 @@
+
+#ifdef CONFIG_VE_IPTABLES
+
-+#define KSYMIPTINIT(mask, ve, full_mask, mod, name, args) \
-+({ \
-+ int ret = 0; \
-+ if (VE_IPT_CMP(mask, full_mask) && \
-+ VE_IPT_CMP((ve)->_iptables_modules, \
-+ full_mask & ~(full_mask##_MOD))) { \
-+ ret = KSYMERRCALL(1, mod, name, args); \
-+ if (ret == 0) \
-+ (ve)->_iptables_modules |= \
-+ full_mask##_MOD; \
-+ if (ret == 1) \
-+ ret = 0; \
-+ } \
-+ ret; \
-+})
-+
-+#define KSYMIPTFINI(mask, full_mask, mod, name, args) \
-+({ \
-+ if (VE_IPT_CMP(mask, full_mask##_MOD)) \
-+ KSYMSAFECALL_VOID(mod, name, args); \
-+})
-+
-+
-+static int do_ve_iptables(struct ve_struct *ve, __u64 init_mask,
-+ int init_or_cleanup)
-+{
-+ int err = 0;
-+
-+ if (!init_or_cleanup)
-+ goto cleanup;
-+
-+ /* init part */
-+#if defined(CONFIG_NF_CONNTRACK_IPV4) || \
-+ defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_NF_CONNTRACK,
-+ nf_conntrack, nf_conntrack_init_ve, ());
-+ if (err < 0)
-+ goto err_nf_conntrack;
-+
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_CONNTRACK,
-+ nf_conntrack_ipv4, init_nf_ct_l3proto_ipv4, ());
-+ if (err < 0)
-+ goto err_nf_conntrack_ipv4;
-+#endif
-+#if defined(CONFIG_NF_NAT) || \
-+ defined(CONFIG_NF_NAT_MODULE)
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_NAT,
-+ nf_nat, nf_nat_init, ());
-+ if (err < 0)
-+ goto err_nftable_nat;
-+ err = KSYMIPTINIT(init_mask, ve, VE_IP_IPTABLE_NAT,
-+ iptable_nat, init_nftable_nat, ());
-+ if (err < 0)
-+ goto err_nftable_nat2;
-+#endif
-+ return 0;
-+
-+/* ------------------------------------------------------------------------- */
-+
-+cleanup:
-+#if defined(CONFIG_NF_NAT) || \
-+ defined(CONFIG_NF_NAT_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_IPTABLE_NAT,
-+ iptable_nat, fini_nftable_nat, ());
-+err_nftable_nat2:
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_NAT,
-+ nf_nat, nf_nat_cleanup, ());
-+err_nftable_nat:
-+#endif
-+#if defined(CONFIG_NF_CONNTRACK_IPV4) || \
-+ defined(CONFIG_NF_CONNTRACK_IPV4_MODULE)
-+ KSYMIPTFINI(ve->_iptables_modules, VE_IP_CONNTRACK,
-+ nf_conntrack_ipv4, fini_nf_ct_l3proto_ipv4, ());
-+err_nf_conntrack_ipv4:
-+ KSYMIPTFINI(ve->_iptables_modules, VE_NF_CONNTRACK,
-+ nf_conntrack, nf_conntrack_cleanup_ve, ());
-+err_nf_conntrack:
-+#endif
-+ /* Do not reset _iptables_modules as
-+ * net hooks used one
-+ */
-+ return err;
-+}
-+
-+static inline int init_ve_iptables(struct ve_struct *ve, __u64 init_mask)
-+{
-+ return do_ve_iptables(ve, init_mask, 1);
-+}
-+
-+static inline void fini_ve_iptables(struct ve_struct *ve, __u64 init_mask)
-+{
-+ (void)do_ve_iptables(ve, init_mask, 0);
-+}
-+
+static __u64 setup_iptables_mask(__u64 init_mask)
+{
+ /* Remove when userspace will start supplying IPv6-related bits. */
@@ -64737,24 +64614,21 @@
+ init_mask &= ~VE_IP_MANGLE6;
+ init_mask &= ~VE_IP_IPTABLE_NAT_MOD;
+ init_mask &= ~VE_NF_CONNTRACK_MOD;
-+ if ((init_mask & VE_IP_IPTABLES) == VE_IP_IPTABLES)
++
++ if (mask_ipt_allow(init_mask, VE_IP_IPTABLES))
+ init_mask |= VE_IP_IPTABLES6;
-+ if ((init_mask & VE_IP_FILTER) == VE_IP_FILTER)
++ if (mask_ipt_allow(init_mask, VE_IP_FILTER))
+ init_mask |= VE_IP_FILTER6;
-+ if ((init_mask & VE_IP_MANGLE) == VE_IP_MANGLE)
++ if (mask_ipt_allow(init_mask, VE_IP_MANGLE))
+ init_mask |= VE_IP_MANGLE6;
-+ if ((init_mask & VE_IP_NAT) == VE_IP_NAT)
++ if (mask_ipt_allow(init_mask, VE_IP_NAT))
+ init_mask |= VE_IP_IPTABLE_NAT;
-+
-+ if ((init_mask & VE_IP_CONNTRACK) == VE_IP_CONNTRACK)
++ if (mask_ipt_allow(init_mask, VE_IP_CONNTRACK))
+ init_mask |= VE_NF_CONNTRACK;
+
+ return init_mask;
+}
+
-+#else
-+#define init_ve_iptables(x, y) (0)
-+#define fini_ve_iptables(x, y) do { } while (0)
+#endif
+
+static inline int init_ve_cpustats(struct ve_struct *ve)
@@ -64910,15 +64784,6 @@
+
+ set_ve_caps(ve, tsk);
+
-+ /* It is safe to initialize netfilter here as routing initialization and
-+ interface setup will be done below. This means that NO skb can be
-+ passed inside. Den */
-+ /* iptables ve initialization for non ve0;
-+ ve0 init is in module_init */
-+
-+ if ((err = init_ve_iptables(ve, init_mask)) < 0)
-+ goto err_iptables;
-+
+ if ((err = pid_ns_attach_init(ve->ve_ns->pid_ns, tsk)) < 0)
+ goto err_vpid;
+
@@ -64952,8 +64817,6 @@
+ mntget(ve->proc_mnt);
+err_vpid:
+ fini_venet(ve);
-+ fini_ve_iptables(ve, init_mask);
-+err_iptables:
+ fini_ve_meminfo(ve);
+err_meminf:
+ fini_ve_devpts(ve);
@@ -65022,6 +64885,7 @@
+ free_ve_cpustats(ve);
+err_cpu_stats:
+ kfree(ve);
++ module_put(THIS_MODULE);
+ goto err_struct;
+}
+
@@ -65159,11 +65023,6 @@
+
+ /* no new packets in flight beyond this point */
+
-+ /* kill iptables */
-+ /* No skb belonging to VE can exist at this point as unregister_netdev
-+ is an operation awaiting until ALL skb's gone */
-+ fini_ve_iptables(ve, ve->_iptables_modules);
-+
+ fini_ve_sched(ve);
+
+ fini_ve_devpts(ve);
@@ -65269,7 +65128,7 @@
+ WARN_ON(!list_empty(&ve_cleanup_list));
+}
+
-+void real_do_env_free(struct ve_struct *ve)
++static void real_do_env_free(struct ve_struct *ve)
+{
+ VZTRACE("real_do_env_free\n");
+
@@ -65281,8 +65140,6 @@
+
+ module_put(THIS_MODULE);
+}
-+EXPORT_SYMBOL(real_do_env_free);
-+
+
+/**********************************************************************
+ **********************************************************************
@@ -66037,19 +65894,6 @@
+ **********************************************************************
+ **********************************************************************/
+
-+static int __init init_vecalls_symbols(void)
-+{
-+ KSYMRESOLVE(real_do_env_free);
-+ KSYMMODRESOLVE(vzmon);
-+ return 0;
-+}
-+
-+static void fini_vecalls_symbols(void)
-+{
-+ KSYMMODUNRESOLVE(vzmon);
-+ KSYMUNRESOLVE(real_do_env_free);
-+}
-+
+static inline __init int init_vecalls_ioctls(void)
+{
+ vzioctl_register(&vzcalls);
@@ -66109,10 +65953,6 @@
+ if (err < 0)
+ goto out_sysctl;
+
-+ err = init_vecalls_symbols();
-+ if (err < 0)
-+ goto out_sym;
-+
+ err = init_vecalls_proc();
+ if (err < 0)
+ goto out_proc;
@@ -66125,15 +65965,17 @@
+ * because in this case vzmon refcount > 0
+ */
+ do_ve_enter_hook = do_env_enter;
-+
++ /*
++ * This one can also be dereferenced since not freed
++ * VE holds reference on module
++ */
++ do_env_free_hook = real_do_env_free;
+
+ return 0;
+
+out_ioctls:
+ fini_vecalls_proc();
+out_proc:
-+ fini_vecalls_symbols();
-+out_sym:
+ fini_vzmond();
+out_sysctl:
+ fini_vecalls_sysctl();
@@ -66143,10 +65985,10 @@
+
+static void vecalls_exit(void)
+{
++ do_env_free_hook = NULL;
+ do_ve_enter_hook = NULL;
+ fini_vecalls_ioctls();
+ fini_vecalls_proc();
-+ fini_vecalls_symbols();
+ fini_vzmond();
+ fini_vecalls_sysctl();
+}
@@ -73458,6 +73300,166 @@
}
skb_shinfo(head)->frag_list = head->next;
+diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
+index 1433338..6efea1a 100644
+--- a/net/ipv4/ip_gre.c
++++ b/net/ipv4/ip_gre.c
+@@ -50,6 +50,9 @@
+ #include <net/ip6_route.h>
+ #endif
+
++#include <linux/cpt_image.h>
++#include <linux/cpt_export.h>
++
+ /*
+ Problems & solutions
+ --------------------
+@@ -1202,6 +1205,8 @@ static int ipgre_close(struct net_device *dev)
+
+ #endif
+
++static void ipgre_cpt(struct net_device *dev,
++ struct cpt_ops *ops, struct cpt_context *ctx);
+ static const struct net_device_ops ipgre_netdev_ops = {
+ .ndo_init = ipgre_tunnel_init,
+ .ndo_uninit = ipgre_tunnel_uninit,
+@@ -1212,6 +1217,7 @@ static const struct net_device_ops ipgre_netdev_ops = {
+ .ndo_start_xmit = ipgre_tunnel_xmit,
+ .ndo_do_ioctl = ipgre_tunnel_ioctl,
+ .ndo_change_mtu = ipgre_tunnel_change_mtu,
++ .ndo_cpt = ipgre_cpt,
+ };
+
+ static void ipgre_tunnel_setup(struct net_device *dev)
+@@ -1297,6 +1303,112 @@ static void ipgre_destroy_tunnels(struct ipgre_net *ign)
+ }
+ }
+
++static void ipgre_cpt(struct net_device *dev,
++ struct cpt_ops *ops, struct cpt_context *ctx)
++{
++ struct cpt_tunnel_image v;
++ struct ip_tunnel *t;
++ struct ipgre_net *ign;
++
++ t = netdev_priv(dev);
++ ign = net_generic(get_exec_env()->ve_netns, ipgre_net_id);
++ BUG_ON(ign == NULL);
++
++ v.cpt_next = CPT_NULL;
++ v.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL;
++ v.cpt_hdrlen = sizeof(v);
++ v.cpt_content = CPT_CONTENT_VOID;
++
++ /* mark fb dev */
++ v.cpt_tnl_flags = CPT_TUNNEL_GRE;
++ if (dev == ign->fb_tunnel_dev)
++ v.cpt_tnl_flags |= CPT_TUNNEL_FBDEV;
++
++ v.cpt_i_flags = t->parms.i_flags;
++ v.cpt_o_flags = t->parms.o_flags;
++ v.cpt_i_key = t->parms.i_key;
++ v.cpt_o_key = t->parms.o_key;
++ v.cpt_i_seqno = t->i_seqno;
++ v.cpt_o_seqno = t->o_seqno;
++
++ BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
++ memcpy(&v.cpt_iphdr, &t->parms.iph, sizeof(t->parms.iph));
++
++ ops->write(&v, sizeof(v), ctx);
++}
++
++static int ipgre_rst(loff_t start, struct cpt_netdev_image *di,
++ struct rst_ops *ops, struct cpt_context *ctx)
++{
++ int err = -ENODEV;
++ struct cpt_tunnel_image v;
++ struct net_device *dev;
++ struct ip_tunnel *t;
++ loff_t pos;
++ int fbdev;
++ struct ipgre_net *ign;
++
++ ign = net_generic(get_exec_env()->ve_netns, ipgre_net_id);
++ if (ign == NULL)
++ return -EOPNOTSUPP;
++
++ pos = start + di->cpt_hdrlen;
++ err = ops->get_object(CPT_OBJ_NET_IPIP_TUNNEL,
++ pos, &v, sizeof(v), ctx);
++ if (err)
++ return err;
++
++ /* some sanity */
++ if (v.cpt_content != CPT_CONTENT_VOID)
++ return -EINVAL;
++
++ if (!(v.cpt_tnl_flags & CPT_TUNNEL_GRE))
++ return 1;
++
++ if (v.cpt_tnl_flags & CPT_TUNNEL_FBDEV) {
++ fbdev = 1;
++ err = 0;
++ dev = ign->fb_tunnel_dev;
++ } else {
++ fbdev = 0;
++ err = -ENOMEM;
++ dev = alloc_netdev(sizeof(struct ip_tunnel), di->cpt_name,
++ ipgre_tunnel_setup);
++ if (!dev)
++ goto out;
++ }
++
++ t = netdev_priv(dev);
++ t->parms.i_flags = v.cpt_i_flags;
++ t->parms.o_flags = v.cpt_o_flags;
++ t->parms.i_key = v.cpt_i_key;
++ t->parms.o_key = v.cpt_o_key;
++ t->i_seqno = v.cpt_i_seqno;
++ t->o_seqno = v.cpt_o_seqno;
++
++ BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
++ memcpy(&t->parms.iph, &v.cpt_iphdr, sizeof(t->parms.iph));
++
++ if (!fbdev) {
++ ipgre_tunnel_init(dev);
++ err = register_netdevice(dev);
++ if (err) {
++ free_netdev(dev);
++ goto out;
++ }
++
++ dev_hold(dev);
++ ipgre_tunnel_link(ign, t);
++ }
++out:
++ return err;
++}
++
++static struct netdev_rst ipgre_netdev_rst = {
++ .cpt_object = CPT_OBJ_NET_IPIP_TUNNEL,
++ .ndo_rst = ipgre_rst,
++};
++
+ static int ipgre_init_net(struct net *net)
+ {
+ int err;
+@@ -1682,6 +1794,7 @@ static int __init ipgre_init(void)
+ if (err < 0)
+ goto tap_ops_failed;
+
++ register_netdev_rst(&ipgre_netdev_rst);
+ out:
+ return err;
+
+@@ -1696,6 +1809,7 @@ gen_device_failed:
+
+ static void __exit ipgre_fini(void)
+ {
++ unregister_netdev_rst(&ipgre_netdev_rst);
+ rtnl_link_unregister(&ipgre_tap_ops);
+ rtnl_link_unregister(&ipgre_link_ops);
+ unregister_pernet_gen_device(ipgre_net_id, &ipgre_net_ops);
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index 6c98b43..7e2f617 100644
--- a/net/ipv4/ip_input.c
@@ -73547,7 +73549,7 @@
if (skb->pkt_type == PACKET_OTHERHOST)
goto drop;
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
-index ae40ed1..00aaec4 100644
+index ae40ed1..b763d8a 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -106,6 +106,7 @@
@@ -73558,7 +73560,17 @@
#include <net/sock.h>
#include <net/ip.h>
-@@ -144,6 +145,9 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
+@@ -116,6 +117,9 @@
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+
++#include <linux/cpt_image.h>
++#include <linux/cpt_export.h>
++
+ #define HASH_SIZE 16
+ #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF)
+
+@@ -144,6 +148,9 @@ static struct ip_tunnel * ipip_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct ipip_net *ipn = net_generic(net, ipip_net_id);
@@ -73568,7 +73580,129 @@
for (t = ipn->tunnels_r_l[h0^h1]; t; t = t->next) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr && (t->dev->flags&IFF_UP))
-@@ -767,6 +771,9 @@ static int ipip_init_net(struct net *net)
+@@ -686,11 +693,14 @@ static int ipip_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+ return 0;
+ }
+
++static void ipip_cpt(struct net_device *dev,
++ struct cpt_ops *ops, struct cpt_context *ctx);
+ static const struct net_device_ops ipip_netdev_ops = {
+ .ndo_uninit = ipip_tunnel_uninit,
+ .ndo_start_xmit = ipip_tunnel_xmit,
+ .ndo_do_ioctl = ipip_tunnel_ioctl,
+ .ndo_change_mtu = ipip_tunnel_change_mtu,
++ .ndo_cpt = ipip_cpt,
+
+ };
+
+@@ -762,11 +772,116 @@ static void ipip_destroy_tunnels(struct ipip_net *ipn)
+ }
+ }
+
++static void ipip_cpt(struct net_device *dev,
++ struct cpt_ops *ops, struct cpt_context *ctx)
++{
++ struct cpt_tunnel_image v;
++ struct ip_tunnel *t;
++ struct ipip_net *ipn;
++
++ t = netdev_priv(dev);
++ ipn = net_generic(get_exec_env()->ve_netns, ipip_net_id);
++ BUG_ON(ipn == NULL);
++
++ v.cpt_next = CPT_NULL;
++ v.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL;
++ v.cpt_hdrlen = sizeof(v);
++ v.cpt_content = CPT_CONTENT_VOID;
++
++ /* mark fb dev */
++ v.cpt_tnl_flags = 0;
++ if (dev == ipn->fb_tunnel_dev)
++ v.cpt_tnl_flags |= CPT_TUNNEL_FBDEV;
++
++ v.cpt_i_flags = t->parms.i_flags;
++ v.cpt_o_flags = t->parms.o_flags;
++ v.cpt_i_key = t->parms.i_key;
++ v.cpt_o_key = t->parms.o_key;
++
++ BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
++ memcpy(&v.cpt_iphdr, &t->parms.iph, sizeof(t->parms.iph));
++
++ ops->write(&v, sizeof(v), ctx);
++}
++
++static int ipip_rst(loff_t start, struct cpt_netdev_image *di,
++ struct rst_ops *ops, struct cpt_context *ctx)
++{
++ int err = -ENODEV;
++ struct cpt_tunnel_image v;
++ struct net_device *dev;
++ struct ip_tunnel *t;
++ loff_t pos;
++ int fbdev;
++ struct ipip_net *ipn;
++
++ ipn = net_generic(get_exec_env()->ve_netns, ipip_net_id);
++ if (ipn == NULL)
++ return -EOPNOTSUPP;
++
++ pos = start + di->cpt_hdrlen;
++ err = ops->get_object(CPT_OBJ_NET_IPIP_TUNNEL,
++ pos, &v, sizeof(v), ctx);
++ if (err)
++ return err;
++
++ /* some sanity */
++ if (v.cpt_content != CPT_CONTENT_VOID)
++ return -EINVAL;
++
++ if (v.cpt_tnl_flags & (~CPT_TUNNEL_FBDEV))
++ return 1;
++
++ if (v.cpt_tnl_flags & CPT_TUNNEL_FBDEV) {
++ fbdev = 1;
++ err = 0;
++ dev = ipn->fb_tunnel_dev;
++ } else {
++ fbdev = 0;
++ err = -ENOMEM;
++ dev = alloc_netdev(sizeof(struct ip_tunnel), di->cpt_name,
++ ipip_tunnel_setup);
++ if (!dev)
++ goto out;
++ }
++
++ t = netdev_priv(dev);
++ t->parms.i_flags = v.cpt_i_flags;
++ t->parms.o_flags = v.cpt_o_flags;
++ t->parms.i_key = v.cpt_i_key;
++ t->parms.o_key = v.cpt_o_key;
++
++ BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
++ memcpy(&t->parms.iph, &v.cpt_iphdr, sizeof(t->parms.iph));
++
++ if (!fbdev) {
++ ipip_tunnel_init(dev);
++ err = register_netdevice(dev);
++ if (err) {
++ free_netdev(dev);
++ goto out;
++ }
++
++ dev_hold(dev);
++ ipip_tunnel_link(ipn, t);
++ }
++out:
++ return err;
++}
++
++static struct netdev_rst ipip_netdev_rst = {
++ .cpt_object = CPT_OBJ_NET_IPIP_TUNNEL,
++ .ndo_rst = ipip_rst,
++};
++
+ static int ipip_init_net(struct net *net)
+ {
int err;
struct ipip_net *ipn;
@@ -73578,7 +73712,7 @@
err = -ENOMEM;
ipn = kzalloc(sizeof(struct ipip_net), GFP_KERNEL);
if (ipn == NULL)
-@@ -812,6 +819,9 @@ static void ipip_exit_net(struct net *net)
+@@ -812,6 +927,9 @@ static void ipip_exit_net(struct net *net)
struct ipip_net *ipn;
ipn = net_generic(net, ipip_net_id);
@@ -73588,6 +73722,22 @@
rtnl_lock();
ipip_destroy_tunnels(ipn);
unregister_netdevice(ipn->fb_tunnel_dev);
+@@ -838,12 +956,15 @@ static int __init ipip_init(void)
+ err = register_pernet_gen_device(&ipip_net_id, &ipip_net_ops);
+ if (err)
+ xfrm4_tunnel_deregister(&ipip_handler, AF_INET);
++ else
++ register_netdev_rst(&ipip_netdev_rst);
+
+ return err;
+ }
+
+ static void __exit ipip_fini(void)
+ {
++ unregister_netdev_rst(&ipip_netdev_rst);
+ if (xfrm4_tunnel_deregister(&ipip_handler, AF_INET))
+ printk(KERN_INFO "ipip close: can't deregister tunnel\n");
+
diff --git a/net/ipv4/netfilter/ip_queue.c b/net/ipv4/netfilter/ip_queue.c
index c156db2..3d4e78f 100644
--- a/net/ipv4/netfilter/ip_queue.c
@@ -73634,14 +73784,14 @@
write_unlock_bh(&queue_lock);
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
-index 62aff31..2de35eb 100644
+index 62aff31..93e2fd1 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -321,6 +321,9 @@ ipt_do_table(struct sk_buff *skb,
struct xt_match_param mtpar;
struct xt_target_param tgpar;
-+ if (!table) /* VE is not allowed to have this xtable */
++ if (ve_xt_table_forbidden(table))
+ return NF_ACCEPT;
+
/* Initialization */
@@ -74346,6 +74496,30 @@
/* It's done. */
if (maniptype == IP_NAT_MANIP_DST)
+diff --git a/net/ipv4/netfilter/nf_nat_rule.c b/net/ipv4/netfilter/nf_nat_rule.c
+index 9e81e0d..883cd2e 100644
+--- a/net/ipv4/netfilter/nf_nat_rule.c
++++ b/net/ipv4/netfilter/nf_nat_rule.c
+@@ -186,6 +186,9 @@ static struct xt_target ipt_dnat_reg __read_mostly = {
+
+ static int __net_init nf_nat_rule_net_init(struct net *net)
+ {
++ if (!net_ipt_module_permitted(net, VE_IP_IPTABLE_NAT))
++ return 0;
++
+ net->ipv4.nat_table = ipt_register_table(net, &nat_table,
+ &nat_initial_table.repl);
+ if (IS_ERR(net->ipv4.nat_table))
+@@ -195,6 +198,9 @@ static int __net_init nf_nat_rule_net_init(struct net *net)
+
+ static void __net_exit nf_nat_rule_net_exit(struct net *net)
+ {
++ if (!net_is_ipt_module_set(net, VE_IP_IPTABLE_NAT))
++ return;
++
+ ipt_unregister_table(net->ipv4.nat_table);
+ }
+
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f25542c..777a7b5 100644
--- a/net/ipv4/proc.c
@@ -75950,14 +76124,14 @@
write_unlock_bh(&queue_lock);
}
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
-index 1de56fd..9a6634b 100644
+index 1de56fd..4260468 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -351,6 +351,9 @@ ip6t_do_table(struct sk_buff *skb,
struct xt_match_param mtpar;
struct xt_target_param tgpar;
-+ if (!table) /* VE is not allowed to have this xtable */
++ if (ve_xt_table_forbidden(table))
+ return NF_ACCEPT;
+
/* Initialization */
@@ -76343,7 +76517,7 @@
/* We have to remove fragment header from datagram and to relocate
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
-index dbd19a7..d21d34d 100644
+index dbd19a7..9fb663a 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -32,6 +32,7 @@
@@ -76354,7 +76528,17 @@
#include <net/sock.h>
#include <net/snmp.h>
-@@ -87,6 +88,9 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
+@@ -53,6 +54,9 @@
+ #include <net/net_namespace.h>
+ #include <net/netns/generic.h>
+
++#include <linux/cpt_image.h>
++#include <linux/cpt_export.h>
++
+ /*
+ This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
+
+@@ -87,6 +91,9 @@ static struct ip_tunnel * ipip6_tunnel_lookup(struct net *net,
struct ip_tunnel *t;
struct sit_net *sitn = net_generic(net, sit_net_id);
@@ -76364,7 +76548,129 @@
for (t = sitn->tunnels_r_l[h0^h1]; t; t = t->next) {
if (local == t->parms.iph.saddr &&
remote == t->parms.iph.daddr &&
-@@ -1016,6 +1020,9 @@ static int sit_init_net(struct net *net)
+@@ -937,11 +944,14 @@ static int ipip6_tunnel_change_mtu(struct net_device *dev, int new_mtu)
+ return 0;
+ }
+
++static void sit_cpt(struct net_device *dev,
++ struct cpt_ops *ops, struct cpt_context *ctx);
+ static const struct net_device_ops ipip6_netdev_ops = {
+ .ndo_uninit = ipip6_tunnel_uninit,
+ .ndo_start_xmit = ipip6_tunnel_xmit,
+ .ndo_do_ioctl = ipip6_tunnel_ioctl,
+ .ndo_change_mtu = ipip6_tunnel_change_mtu,
++ .ndo_cpt = sit_cpt,
+ };
+
+ static void ipip6_tunnel_setup(struct net_device *dev)
+@@ -1011,11 +1021,116 @@ static void sit_destroy_tunnels(struct sit_net *sitn)
+ }
+ }
+
++static void sit_cpt(struct net_device *dev,
++ struct cpt_ops *ops, struct cpt_context *ctx)
++{
++ struct cpt_tunnel_image v;
++ struct ip_tunnel *t;
++ struct sit_net *sitn;
++
++ t = netdev_priv(dev);
++ sitn = net_generic(get_exec_env()->ve_netns, sit_net_id);
++ BUG_ON(sitn == NULL);
++
++ v.cpt_next = CPT_NULL;
++ v.cpt_object = CPT_OBJ_NET_IPIP_TUNNEL;
++ v.cpt_hdrlen = sizeof(v);
++ v.cpt_content = CPT_CONTENT_VOID;
++
++ /* mark fb dev */
++ v.cpt_tnl_flags = CPT_TUNNEL_SIT;
++ if (dev == sitn->fb_tunnel_dev)
++ v.cpt_tnl_flags |= CPT_TUNNEL_FBDEV;
++
++ v.cpt_i_flags = t->parms.i_flags;
++ v.cpt_o_flags = t->parms.o_flags;
++ v.cpt_i_key = t->parms.i_key;
++ v.cpt_o_key = t->parms.o_key;
++
++ BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
++ memcpy(&v.cpt_iphdr, &t->parms.iph, sizeof(t->parms.iph));
++
++ ops->write(&v, sizeof(v), ctx);
++}
++
++static int sit_rst(loff_t start, struct cpt_netdev_image *di,
++ struct rst_ops *ops, struct cpt_context *ctx)
++{
++ int err = -ENODEV;
++ struct cpt_tunnel_image v;
++ struct net_device *dev;
++ struct ip_tunnel *t;
++ loff_t pos;
++ int fbdev;
++ struct sit_net *sitn;
++
++ sitn = net_generic(get_exec_env()->ve_netns, sit_net_id);
++ if (sitn == NULL)
++ return -EOPNOTSUPP;
++
++ pos = start + di->cpt_hdrlen;
++ err = ops->get_object(CPT_OBJ_NET_IPIP_TUNNEL,
++ pos, &v, sizeof(v), ctx);
++ if (err)
++ return err;
++
++ /* some sanity */
++ if (v.cpt_content != CPT_CONTENT_VOID)
++ return -EINVAL;
++
++ if (!(v.cpt_tnl_flags & CPT_TUNNEL_SIT))
++ return 1;
++
++ if (v.cpt_tnl_flags & CPT_TUNNEL_FBDEV) {
++ fbdev = 1;
++ err = 0;
++ dev = sitn->fb_tunnel_dev;
++ } else {
++ fbdev = 0;
++ err = -ENOMEM;
++ dev = alloc_netdev(sizeof(struct ip_tunnel), di->cpt_name,
++ ipip6_tunnel_setup);
++ if (!dev)
++ goto out;
++ }
++
++ t = netdev_priv(dev);
++ t->parms.i_flags = v.cpt_i_flags;
++ t->parms.o_flags = v.cpt_o_flags;
++ t->parms.i_key = v.cpt_i_key;
++ t->parms.o_key = v.cpt_o_key;
++
++ BUILD_BUG_ON(sizeof(v.cpt_iphdr) != sizeof(t->parms.iph));
++ memcpy(&t->parms.iph, &v.cpt_iphdr, sizeof(t->parms.iph));
++
++ if (!fbdev) {
++ ipip6_tunnel_init(dev);
++ err = register_netdevice(dev);
++ if (err) {
++ free_netdev(dev);
++ goto out;
++ }
++
++ dev_hold(dev);
++ ipip6_tunnel_link(sitn, t);
++ }
++out:
++ return err;
++}
++
++static struct netdev_rst sit_netdev_rst = {
++ .cpt_object = CPT_OBJ_NET_IPIP_TUNNEL,
++ .ndo_rst = sit_rst,
++};
++
+ static int sit_init_net(struct net *net)
+ {
int err;
struct sit_net *sitn;
@@ -76374,7 +76680,7 @@
err = -ENOMEM;
sitn = kzalloc(sizeof(struct sit_net), GFP_KERNEL);
if (sitn == NULL)
-@@ -1061,6 +1068,9 @@ static void sit_exit_net(struct net *net)
+@@ -1061,6 +1176,9 @@ static void sit_exit_net(struct net *net)
struct sit_net *sitn;
sitn = net_generic(net, sit_net_id);
@@ -76384,6 +76690,23 @@
rtnl_lock();
sit_destroy_tunnels(sitn);
unregister_netdevice(sitn->fb_tunnel_dev);
+@@ -1075,6 +1193,7 @@ static struct pernet_operations sit_net_ops = {
+
+ static void __exit sit_cleanup(void)
+ {
++ unregister_netdev_rst(&sit_netdev_rst);
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
+
+ unregister_pernet_gen_device(sit_net_id, &sit_net_ops);
+@@ -1094,6 +1213,8 @@ static int __init sit_init(void)
+ err = register_pernet_gen_device(&sit_net_id, &sit_net_ops);
+ if (err < 0)
+ xfrm4_tunnel_deregister(&sit_handler, AF_INET6);
++ else
++ register_netdev_rst(&sit_netdev_rst);
+
+ return err;
+ }
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 21d100b..0ecd5b4 100644
--- a/net/ipv6/tcp_ipv6.c
More information about the Kernel-svn-changes
mailing list