r2487 - in trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian: . patches patches/series

Joshua Kwan joshk at costa.debian.org
Fri Oct 19 10:58:24 UTC 2007


Author: joshk
Date: 2005-02-15 01:36:04 +0100 (Tue, 15 Feb 2005)
New Revision: 2487

Added:
   trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/097_ipsec.diff
Modified:
   trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/changelog
   trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/series/2.4.29-1
Log:
add ipsec patch!


Modified: trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/changelog
===================================================================
--- trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/changelog	2005-02-14 17:51:54 UTC (rev 2486)
+++ trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/changelog	2005-02-15 00:36:04 UTC (rev 2487)
@@ -7,8 +7,10 @@
     - 077_isofs_ignore_volseqno.diff (irrelevant)
     - 093_tty_lockup.diff (backport)
     - 114-binfmt_aout-CAN-2004-1074.diff (backport)
+  * Patches added
+    - 097_ipsec.diff (Herbert's backport)
 
- -- Joshua Kwan <joshk at triplehelix.org>  Sat,  5 Feb 2005 16:28:05 -0800
+ -- Joshua Kwan <joshk at triplehelix.org>  Mon, 14 Feb 2005 16:35:49 -0800
 
 kernel-source-2.4.28 (2.4.28-1) unstable; urgency=low
 

Added: trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/097_ipsec.diff
===================================================================
--- trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/097_ipsec.diff	2005-02-14 17:51:54 UTC (rev 2486)
+++ trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/097_ipsec.diff	2005-02-15 00:36:04 UTC (rev 2487)
@@ -0,0 +1,33556 @@
+# origin: http://gondor.apana.org.au/~herbert/ipsec-2.4/files/ipsec-2.4.29-20050213-1.bz2
+# cset: n/a
+# description: Backport of 2.6 IPsec to 2.4.29
+# revision date: 2005-02-14
+
+diff -Nru a/Documentation/Configure.help b/Documentation/Configure.help
+--- a/Documentation/Configure.help	2005-02-13 21:25:10 +11:00
++++ b/Documentation/Configure.help	2005-02-13 21:25:10 +11:00
+@@ -5992,6 +5992,14 @@
+   and you should also say Y to "Kernel/User network link driver",
+   below. If unsure, say N.
+ 
++PF_KEY sockets
++CONFIG_NET_KEY
++  PF_KEYv2 socket family, compatible to KAME ones.
++  They are required if you are going to use IPsec tools ported
++  from KAME.
++
++  Say Y unless you know what you are doing.
++
+ TCP/IP networking
+ CONFIG_INET
+   These are the protocols used on the Internet and on most local
+@@ -6251,6 +6259,39 @@
+   gated-5). This routing protocol is not used widely, so say N unless
+   you want to play with it.
+ 
++IP: AH transformation
++CONFIG_INET_AH
++  Support for IPsec AH.
++
++  If unsure, say Y.
++
++IP: ESP transformation
++CONFIG_INET_ESP
++  Support for IPsec ESP.
++
++  If unsure, say Y.
++
++IP: IPComp transformation
++CONFIG_INET_IPCOMP
++  Support for IP Paylod Compression (RFC3173), typically needed
++  for IPsec.
++
++  If unsure, say Y.
++
++IP: tunnel transformation
++CONFIG_INET_TUNNEL
++  Support for generic IP tunnel transformation, which is required by
++  the IP tunneling module as well as tunnel mode IPComp.
++	  
++  If unsure, say Y.
++
++IP: IPsec user configuration interface
++CONFIG_XFRM_USER
++  Support for IPsec user configuration interface used
++  by native Linux tools.
++
++  If unsure, say Y.
++
+ Unix domain sockets
+ CONFIG_UNIX
+   If you say Y here, you will include support for Unix domain sockets;
+@@ -6295,6 +6336,28 @@
+   as a module, say M here and read <file:Documentation/modules.txt>.
+ 
+   It is safe to say N here for now.
++
++IPv6: Privacy Extensions (RFC 3041) support
++CONFIG_IPV6_PRIVACY
++  Privacy Extensions for Stateless Address Autoconfiguration in IPv6
++  support.  With this option, additional periodically-alter 
++  pseudo-random global-scope unicast address(es) will assigned to
++  your interface(s).
++
++  By default, kernel do not generate temporary addresses.
++  To use temporary addresses, do
++
++        echo 2 >/proc/sys/net/ipv6/conf/all/use_tempaddr 
++
++  See <file:Documentation/networking/ip-sysctl.txt> for details.
++
++IPv6: tunnel transformation
++CONFIG_INET6_TUNNEL
++  Support for generic IPv6-in-IPv6 tunnel transformation, which is
++  required by the IPv6-in-IPv6 tunneling module as well as tunnel mode
++  IPComp.
++
++  If unsure, say Y.
+ 
+ The SCTP Protocol (EXPERIMENTAL)
+ CONFIG_IP_SCTP
+diff -Nru a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
+--- a/Documentation/networking/ip-sysctl.txt	2005-02-13 21:25:09 +11:00
++++ b/Documentation/networking/ip-sysctl.txt	2005-02-13 21:25:09 +11:00
+@@ -708,6 +708,37 @@
+ 	0 to disable any limiting, otherwise the maximal rate in jiffies(1)
+ 	Default: 100
+ 
++use_tempaddr - INTEGER
++	Preference for Privacy Extensions (RFC3041).
++	  <= 0 : disable Privacy Extensions
++	  == 1 : enable Privacy Extensions, but prefer public
++	         addresses over temporary addresses.
++	  >  1 : enable Privacy Extensions and prefer temporary
++	         addresses over public addresses.
++	Default:  0 (for most devices)
++		 -1 (for point-to-point devices and loopback devices)
++
++temp_valid_lft - INTEGER
++	valid lifetime (in seconds) for temporary addresses.
++	Default: 604800 (7 days)
++
++temp_prefered_lft - INTEGER
++	Preferred lifetime (in seconds) for temorary addresses.
++	Default: 86400 (1 day)
++
++max_desync_factor - INTEGER
++	Maximum value for DESYNC_FACTOR, which is a random value
++	that ensures that clients don't synchronize with each 
++	other and generage new addresses at exactly the same time.
++	value is in seconds.
++	Default: 600
++	
++regen_max_retry - INTEGER
++	Number of attempts before give up attempting to generate
++	valid temporary addresses.
++	Default: 5
++
++
+ IPv6 Update by:
+ Pekka Savola <pekkas at netcore.fi>
+ YOSHIFUJI Hideaki / USAGI Project <yoshfuji at linux-ipv6.org>
+diff -Nru a/arch/alpha/defconfig b/arch/alpha/defconfig
+--- a/arch/alpha/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/alpha/defconfig	2005-02-13 21:25:09 +11:00
+@@ -127,6 +127,7 @@
+ # CONFIG_NETFILTER_DEBUG is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/arm/defconfig b/arch/arm/defconfig
+--- a/arch/arm/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/arm/defconfig	2005-02-13 21:25:09 +11:00
+@@ -170,6 +170,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ # CONFIG_IP_MULTICAST is not set
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/cris/defconfig b/arch/cris/defconfig
+--- a/arch/cris/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/cris/defconfig	2005-02-13 21:25:09 +11:00
+@@ -214,6 +214,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ # CONFIG_IP_MULTICAST is not set
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/i386/defconfig b/arch/i386/defconfig
+--- a/arch/i386/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/i386/defconfig	2005-02-13 21:25:09 +11:00
+@@ -184,6 +184,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/ia64/defconfig b/arch/ia64/defconfig
+--- a/arch/ia64/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/ia64/defconfig	2005-02-13 21:25:09 +11:00
+@@ -101,6 +101,7 @@
+ # CONFIG_NETFILTER is not set
+ CONFIG_FILTER=y
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ # CONFIG_IP_MULTICAST is not set
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/m68k/defconfig b/arch/m68k/defconfig
+--- a/arch/m68k/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/m68k/defconfig	2005-02-13 21:25:09 +11:00
+@@ -82,6 +82,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ # CONFIG_IP_MULTICAST is not set
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/mips/defconfig b/arch/mips/defconfig
+--- a/arch/mips/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/mips/defconfig	2005-02-13 21:25:09 +11:00
+@@ -207,6 +207,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/mips64/defconfig b/arch/mips64/defconfig
+--- a/arch/mips64/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/mips64/defconfig	2005-02-13 21:25:09 +11:00
+@@ -212,6 +212,7 @@
+ # CONFIG_NETFILTER_DEBUG is not set
+ CONFIG_FILTER=y
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/parisc/defconfig b/arch/parisc/defconfig
+--- a/arch/parisc/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/parisc/defconfig	2005-02-13 21:25:09 +11:00
+@@ -116,6 +116,7 @@
+ # CONFIG_NETFILTER is not set
+ CONFIG_FILTER=y
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/ppc/defconfig b/arch/ppc/defconfig
+--- a/arch/ppc/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/ppc/defconfig	2005-02-13 21:25:09 +11:00
+@@ -134,6 +134,7 @@
+ # CONFIG_NETFILTER_DEBUG is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/ppc/kernel/head_8xx.S b/arch/ppc/kernel/head_8xx.S
+--- a/arch/ppc/kernel/head_8xx.S	2005-02-13 21:25:09 +11:00
++++ b/arch/ppc/kernel/head_8xx.S	2005-02-13 21:25:09 +11:00
+@@ -338,13 +338,13 @@
+ 3:
+ 	lwz	r21, 0(r20)	/* Get the level 1 entry */
+ 	rlwinm.	r20, r21,0,0,19	/* Extract page descriptor page address */
+-	beq	2f		/* If zero, don't try to find a pte */
+ 
+ 	/* We have a pte table, so load the MI_TWC with the attributes
+ 	 * for this "segment."
+ 	 */
+ 	tophys(r21,r21)
+ 	ori	r21,r21,1		/* Set valid bit */
++	beq-	2f			/* If zero, don't try to find a pte */
+ #ifdef CONFIG_8xx_CPU6
+ 	li	r3, 0x2b80
+ 	stw	r3, 12(r0)
+@@ -369,7 +369,7 @@
+ 	 * set.  All other Linux PTE bits control the behavior
+ 	 * of the MMU.
+ 	 */
+-	li	r21, 0x00f0
++2:	li	r21, 0x00f0
+ 	rlwimi	r20, r21, 0, 24, 28	/* Set 24-27, clear 28 */
+ 
+ #ifdef CONFIG_8xx_CPU6
+@@ -388,15 +388,6 @@
+ #endif
+ 	rfi
+ 
+-2:	mfspr	r20, M_TW	/* Restore registers */
+-	lwz	r21, 0(r0)
+-	mtcr	r21
+-	lwz	r21, 4(r0)
+-#ifdef CONFIG_8xx_CPU6
+-	lwz	r3, 8(r0)
+-#endif
+-	b	InstructionAccess
+-
+ 	. = 0x1200
+ DataStoreTLBMiss:
+ #ifdef CONFIG_8xx_CPU6
+@@ -422,12 +413,12 @@
+ 3:
+ 	lwz	r21, 0(r20)	/* Get the level 1 entry */
+ 	rlwinm.	r20, r21,0,0,19	/* Extract page descriptor page address */
+-	beq	2f		/* If zero, don't try to find a pte */
+ 
+ 	/* We have a pte table, so load fetch the pte from the table.
+ 	 */
+ 	tophys(r21, r21)
+ 	ori	r21, r21, 1	/* Set valid bit in physical L2 page */
++	beq-	2f		/* If zero, don't try to find a pte */
+ #ifdef CONFIG_8xx_CPU6
+ 	li	r3, 0x3b80
+ 	stw	r3, 12(r0)
+@@ -461,7 +452,7 @@
+ 	 * set.  All other Linux PTE bits control the behavior
+ 	 * of the MMU.
+ 	 */
+-	li	r21, 0x00f0
++2:	li	r21, 0x00f0
+ 	rlwimi	r20, r21, 0, 24, 28	/* Set 24-27, clear 28 */
+ 
+ #ifdef CONFIG_8xx_CPU6
+@@ -479,24 +470,6 @@
+ 	lwz	r3, 8(r0)
+ #endif
+ 	rfi
+-
+-2:
+-	/* Copy 20 msb from MD_EPN to DAR since the dcxx instructions fail
+-	 * to update DAR when they cause a DTLB miss.
+-	 */
+-	mfspr	r21, MD_EPN
+-	mfspr	r20, DAR
+-	rlwimi	r20, r21, 0, 0, 19
+-	mtspr	DAR, r20
+-
+-	mfspr	r20, M_TW	/* Restore registers */
+-	lwz	r21, 0(r0)
+-	mtcr	r21
+-	lwz	r21, 4(r0)
+-#ifdef CONFIG_8xx_CPU6
+-	lwz	r3, 8(r0)
+-#endif
+-	b	DataAccess
+ 
+ /* This is an instruction TLB error on the MPC8xx.  This could be due
+  * to many reasons, such as executing guarded memory or illegal instruction
+diff -Nru a/arch/ppc64/defconfig b/arch/ppc64/defconfig
+--- a/arch/ppc64/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/ppc64/defconfig	2005-02-13 21:25:09 +11:00
+@@ -110,6 +110,7 @@
+ # CONFIG_NETFILTER is not set
+ CONFIG_FILTER=y
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/s390/defconfig b/arch/s390/defconfig
+--- a/arch/s390/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/s390/defconfig	2005-02-13 21:25:09 +11:00
+@@ -150,6 +150,7 @@
+ # CONFIG_NETFILTER_DEBUG is not set
+ CONFIG_FILTER=y
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/s390x/defconfig b/arch/s390x/defconfig
+--- a/arch/s390x/defconfig	2005-02-13 21:25:08 +11:00
++++ b/arch/s390x/defconfig	2005-02-13 21:25:08 +11:00
+@@ -150,6 +150,7 @@
+ # CONFIG_NETFILTER_DEBUG is not set
+ CONFIG_FILTER=y
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/sh64/defconfig b/arch/sh64/defconfig
+--- a/arch/sh64/defconfig	2005-02-13 21:25:10 +11:00
++++ b/arch/sh64/defconfig	2005-02-13 21:25:10 +11:00
+@@ -113,6 +113,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ # CONFIG_IP_MULTICAST is not set
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/sparc/defconfig b/arch/sparc/defconfig
+--- a/arch/sparc/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/sparc/defconfig	2005-02-13 21:25:09 +11:00
+@@ -144,6 +144,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ # CONFIG_IP_MULTICAST is not set
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/sparc64/defconfig b/arch/sparc64/defconfig
+--- a/arch/sparc64/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/sparc64/defconfig	2005-02-13 21:25:09 +11:00
+@@ -203,6 +203,7 @@
+ # CONFIG_NETFILTER_DEBUG is not set
+ CONFIG_FILTER=y
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ CONFIG_IP_MULTICAST=y
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/arch/x86_64/defconfig b/arch/x86_64/defconfig
+--- a/arch/x86_64/defconfig	2005-02-13 21:25:09 +11:00
++++ b/arch/x86_64/defconfig	2005-02-13 21:25:09 +11:00
+@@ -144,6 +144,7 @@
+ # CONFIG_NETFILTER is not set
+ # CONFIG_FILTER is not set
+ CONFIG_UNIX=y
++CONFIG_NET_KEY=y
+ CONFIG_INET=y
+ # CONFIG_IP_MULTICAST is not set
+ # CONFIG_IP_ADVANCED_ROUTER is not set
+diff -Nru a/crypto/Config.in b/crypto/Config.in
+--- a/crypto/Config.in	2005-02-13 21:25:09 +11:00
++++ b/crypto/Config.in	2005-02-13 21:25:09 +11:00
+@@ -11,7 +11,8 @@
+      "$CONFIG_INET6_AH" = "y" -o \
+      "$CONFIG_INET6_AH" = "m" -o \
+      "$CONFIG_INET6_ESP" = "y" -o \
+-     "$CONFIG_INET6_ESP" = "m" ]; then
++     "$CONFIG_INET6_ESP" = "m" -o \
++     "$CONFIG_IPV6_PRIVACY" = "y" ]; then
+   define_bool CONFIG_CRYPTO y
+ else
+   bool 'Cryptographic API' CONFIG_CRYPTO
+@@ -25,7 +26,8 @@
+        "$CONFIG_INET6_AH" = "y" -o \
+        "$CONFIG_INET6_AH" = "m" -o \
+        "$CONFIG_INET6_ESP" = "y" -o \
+-       "$CONFIG_INET6_ESP" = "m" ]; then
++       "$CONFIG_INET6_ESP" = "m" -o \
++       "$CONFIG_IPV6_PRIVACY" = "y" ]; then
+     define_bool CONFIG_CRYPTO_HMAC y
+   else
+     bool           '  HMAC support' CONFIG_CRYPTO_HMAC
+@@ -33,39 +35,56 @@
+   tristate       '  NULL algorithms' CONFIG_CRYPTO_NULL
+   tristate       '  MD4 digest algorithm' CONFIG_CRYPTO_MD4
+   if [ "$CONFIG_INET_AH" = "y" -o \
+-       "$CONFIG_INET_AH" = "m" -o \
+        "$CONFIG_INET_ESP" = "y" -o \
+-       "$CONFIG_INET_ESP" = "m" -o \
+        "$CONFIG_INET6_AH" = "y" -o \
+-       "$CONFIG_INET6_AH" = "m" -o \
+-       "$CONFIG_INET6_ESP" = "y" -o \
+-       "$CONFIG_INET6_ESP" = "m" ]; then
+-    define_bool CONFIG_CRYPTO_MD5 y
++       "$CONFIG_INET6_ESP" = "y" ]; then
++    define_tristate CONFIG_CRYPTO_MD5 y
+   else
+-    tristate       '  MD5 digest algorithm' CONFIG_CRYPTO_MD5
++    if [ "$CONFIG_IPV6" = "y" -a "$CONFIG_IPV6_PRIVACY" = "y" ]; then
++      define_tristate CONFIG_CRYPTO_MD5 y
++    else
++      if [ "$CONFIG_INET_AH" = "m" -o \
++	   "$CONFIG_INET_ESP" = "m" -o \
++	   "$CONFIG_INET6_AH" = "m" -o \
++	   "$CONFIG_INET6_ESP" = "m" ]; then
++	define_tristate CONFIG_CRYPTO_MD5 m
++      else
++	if [ "$CONFIG_IPV6" = "m" -a "$CONFIG_IPV6_PRIVACY" = "y" ]; then
++	  define_tristate CONFIG_CRYPTO_MD5 m
++	else
++	  tristate       '  MD5 digest algorithm' CONFIG_CRYPTO_MD5
++	fi
++      fi
++    fi
+   fi
+   if [ "$CONFIG_INET_AH" = "y" -o \
+-       "$CONFIG_INET_AH" = "m" -o \
+        "$CONFIG_INET_ESP" = "y" -o \
+-       "$CONFIG_INET_ESP" = "m" -o \
+        "$CONFIG_INET6_AH" = "y" -o \
+-       "$CONFIG_INET6_AH" = "m" -o \
+-       "$CONFIG_INET6_ESP" = "y" -o \
+-       "$CONFIG_INET6_ESP" = "m" ]; then
+-    define_bool CONFIG_CRYPTO_SHA1 y
++       "$CONFIG_INET6_ESP" = "y" ]; then
++    define_tristate CONFIG_CRYPTO_SHA1 y
+   else
+-    tristate       '  SHA1 digest algorithm' CONFIG_CRYPTO_SHA1
++    if [ "$CONFIG_INET_AH" = "m" -o \
++	 "$CONFIG_INET_ESP" = "m" -o \
++	 "$CONFIG_INET6_AH" = "m" -o \
++	 "$CONFIG_INET6_ESP" = "m" ]; then
++      define_tristate CONFIG_CRYPTO_SHA1 m
++    else
++      tristate       '  SHA1 digest algorithm' CONFIG_CRYPTO_SHA1
++    fi
+   fi
+   tristate       '  SHA256 digest algorithm' CONFIG_CRYPTO_SHA256
+   tristate       '  SHA384 and SHA512 digest algorithms' CONFIG_CRYPTO_SHA512
+   tristate       '  Whirlpool digest algorithms' CONFIG_CRYPTO_WP512
+   if [ "$CONFIG_INET_ESP" = "y" -o \
+-       "$CONFIG_INET_ESP" = "m" -o \
+-       "$CONFIG_INET6_ESP" = "y" -o \
+-       "$CONFIG_INET6_ESP" = "m" ]; then
+-    define_bool CONFIG_CRYPTO_DES y
++       "$CONFIG_INET6_ESP" = "y" ]; then
++    define_tristate CONFIG_CRYPTO_DES y
+   else
+-    tristate       '  DES and Triple DES EDE cipher algorithms' CONFIG_CRYPTO_DES
++    if [ "$CONFIG_INET_ESP" = "m" -o \
++	 "$CONFIG_INET6_ESP" = "m" ]; then
++      define_tristate CONFIG_CRYPTO_DES m
++    else
++      tristate       '  DES and Triple DES EDE cipher algorithms' CONFIG_CRYPTO_DES
++    fi
+   fi
+   tristate       '  Blowfish cipher algorithm' CONFIG_CRYPTO_BLOWFISH
+   tristate       '  Twofish cipher algorithm' CONFIG_CRYPTO_TWOFISH
+@@ -78,12 +97,15 @@
+   tristate       '  Anubis cipher algorithm' CONFIG_CRYPTO_ANUBIS
+   tristate       '  ARC4 cipher algorithm' CONFIG_CRYPTO_ARC4
+   if [ "$CONFIG_INET_IPCOMP" = "y" -o \
+-       "$CONFIG_INET_IPCOMP" = "m" -o \
+-       "$CONFIG_INET6_IPCOMP" = "y" -o \
+-       "$CONFIG_INET6_IPCOMP" = "m" ]; then
+-    define_bool CONFIG_CRYPTO_DEFLATE y
++       "$CONFIG_INET6_IPCOMP" = "y" ]; then
++    define_tristate CONFIG_CRYPTO_DEFLATE y
+   else
+-    tristate       '  Deflate compression algorithm' CONFIG_CRYPTO_DEFLATE
++    if [ "$CONFIG_INET_IPCOMP" = "m" -o \
++	 "$CONFIG_INET6_IPCOMP" = "m" ]; then
++      define_tristate CONFIG_CRYPTO_DEFLATE m
++    else
++      tristate       '  Deflate compression algorithm' CONFIG_CRYPTO_DEFLATE
++    fi
+   fi
+   tristate       '  Michael MIC keyed digest algorithm' CONFIG_CRYPTO_MICHAEL_MIC
+   tristate       '  Testing module' CONFIG_CRYPTO_TEST
+diff -Nru a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
+--- a/drivers/net/ppp_generic.c	2005-02-13 21:25:10 +11:00
++++ b/drivers/net/ppp_generic.c	2005-02-13 21:25:10 +11:00
+@@ -57,7 +57,9 @@
+ #define NP_IPV6	1		/* Internet Protocol V6 */
+ #define NP_IPX	2		/* IPX protocol */
+ #define NP_AT	3		/* Appletalk protocol */
+-#define NUM_NP	4		/* Number of NPs. */
++#define NP_MPLS_UC 4		/* MPLS unicast */
++#define NP_MPLS_MC 5		/* MPLS multicast */
++#define NUM_NP	6		/* Number of NPs. */
+ 
+ #define MPHDRLEN	6	/* multilink protocol header length */
+ #define MPHDRLEN_SSN	4	/* ditto with short sequence numbers */
+@@ -281,6 +283,10 @@
+ 		return NP_IPX;
+ 	case PPP_AT:
+ 		return NP_AT;
++	case PPP_MPLS_UC:
++		return NP_MPLS_UC;
++	case PPP_MPLS_MC:
++		return NP_MPLS_MC;
+ 	}
+ 	return -EINVAL;
+ }
+@@ -291,6 +297,8 @@
+ 	PPP_IPV6,
+ 	PPP_IPX,
+ 	PPP_AT,
++	PPP_MPLS_UC,
++	PPP_MPLS_MC,
+ };
+ 	
+ /* Translates an ethertype into an NP index */
+@@ -306,6 +314,10 @@
+ 	case ETH_P_PPPTALK:
+ 	case ETH_P_ATALK:
+ 		return NP_AT;
++	case ETH_P_MPLS_UC:
++		return NP_MPLS_UC;
++	case ETH_P_MPLS_MC:
++		return NP_MPLS_MC;
+ 	}
+ 	return -1;
+ }
+@@ -316,6 +328,8 @@
+ 	ETH_P_IPV6,
+ 	ETH_P_IPX,
+ 	ETH_P_PPPTALK,
++	ETH_P_MPLS_UC,
++	ETH_P_MPLS_MC,
+ };
+ 
+ /*
+diff -Nru a/drivers/scsi/megaraid2.c b/drivers/scsi/megaraid2.c
+--- a/drivers/scsi/megaraid2.c	2005-02-13 21:25:09 +11:00
++++ b/drivers/scsi/megaraid2.c	2005-02-13 21:25:09 +11:00
+@@ -2819,7 +2819,7 @@
+ 		}
+ 
+ 		if( iter++ < MBOX_ABORT_SLEEP*1000 ) {
+-			mdelay(1);
++			msleep(1);
+ 		}
+ 		else {
+ 			printk(KERN_WARNING
+@@ -2899,7 +2899,7 @@
+ 		}
+ 
+ 		if( iter++ < MBOX_RESET_SLEEP*1000 ) {
+-			mdelay(1);
++			msleep(1);
+ 		}
+ 		else {
+ 			printk(KERN_WARNING
+@@ -4040,10 +4040,10 @@
+ 	printk(KERN_INFO "megaraid: cache flush delay:   ");
+ 	for( i = 9; i >= 0; i-- ) {
+ 		printk("\b\b\b[%d]", i);
+-		mdelay(1000);
++		msleep(1000);
+ 	}
+ 	printk("\b\b\b[done]\n");
+-	mdelay(1000);
++	msleep(1000);
+ 
+ 	return NOTIFY_DONE;
+ }
+diff -Nru a/include/asm-alpha/scatterlist.h b/include/asm-alpha/scatterlist.h
+--- a/include/asm-alpha/scatterlist.h	2005-02-13 21:25:09 +11:00
++++ b/include/asm-alpha/scatterlist.h	2005-02-13 21:25:09 +11:00
+@@ -2,6 +2,7 @@
+ #define _ALPHA_SCATTERLIST_H
+ 
+ #include <asm/page.h>
++#include <linux/types.h>
+   
+ struct scatterlist {
+ 	/* This will disappear in 2.5.x */
+diff -Nru a/include/linux/if_arp.h b/include/linux/if_arp.h
+--- a/include/linux/if_arp.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/if_arp.h	2005-02-13 21:25:09 +11:00
+@@ -60,7 +60,7 @@
+ #define ARPHRD_RAWHDLC	518		/* Raw HDLC			*/
+ 
+ #define ARPHRD_TUNNEL	768		/* IPIP tunnel			*/
+-#define ARPHRD_TUNNEL6	769		/* IPIP6 tunnel			*/
++#define ARPHRD_TUNNEL6	769		/* IP6IP6 tunnel       		*/
+ #define ARPHRD_FRAD	770             /* Frame Relay Access Device    */
+ #define ARPHRD_SKIP	771		/* SKIP vif			*/
+ #define ARPHRD_LOOPBACK	772		/* Loopback device		*/
+diff -Nru a/include/linux/in.h b/include/linux/in.h
+--- a/include/linux/in.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/in.h	2005-02-13 21:25:09 +11:00
+@@ -18,6 +18,7 @@
+ #ifndef _LINUX_IN_H
+ #define _LINUX_IN_H
+ 
++#include <linux/socket.h>
+ #include <linux/types.h>
+ #include <linux/socket.h>
+ 
+@@ -69,6 +70,8 @@
+ #define	IP_RECVTOS	13
+ #define IP_MTU		14
+ #define IP_FREEBIND	15
++#define IP_IPSEC_POLICY	16
++#define IP_XFRM_POLICY	17
+ 
+ /* BSD compatibility */
+ #define IP_RECVRETOPTS	IP_RETOPTS
+diff -Nru a/include/linux/in6.h b/include/linux/in6.h
+--- a/include/linux/in6.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/in6.h	2005-02-13 21:25:09 +11:00
+@@ -180,5 +180,8 @@
+ #define IPV6_FLOWLABEL_MGR	32
+ #define IPV6_FLOWINFO_SEND	33
+ 
++#define IPV6_IPSEC_POLICY	34
++#define IPV6_XFRM_POLICY	35
++
+ 
+ #endif
+diff -Nru a/include/linux/inetdevice.h b/include/linux/inetdevice.h
+--- a/include/linux/inetdevice.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/inetdevice.h	2005-02-13 21:25:09 +11:00
+@@ -21,6 +21,8 @@
+ 	int	arp_announce;
+ 	int	arp_ignore;
+ 	int	medium_id;
++	int	no_xfrm;
++	int	no_policy;
+ 	int	force_igmp_version;
+ 	void	*sysctl;
+ };
+diff -Nru a/include/linux/ip.h b/include/linux/ip.h
+--- a/include/linux/ip.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/ip.h	2005-02-13 21:25:09 +11:00
+@@ -18,8 +18,6 @@
+ #define _LINUX_IP_H
+ #include <asm/byteorder.h>
+ 
+-/* SOL_IP socket options */
+-
+ #define IPTOS_TOS_MASK		0x1E
+ #define IPTOS_TOS(tos)		((tos)&IPTOS_TOS_MASK)
+ #define	IPTOS_LOWDELAY		0x10
+@@ -67,14 +65,6 @@
+ #define MAXTTL		255
+ #define IPDEFTTL	64
+ 
+-/* struct timestamp, struct route and MAX_ROUTES are removed.
+-
+-   REASONS: it is clear that nobody used them because:
+-   - MAX_ROUTES value was wrong.
+-   - "struct route" was wrong.
+-   - "struct timestamp" had fatally misaligned bitfields and was completely unusable.
+- */
+-
+ #define IPOPT_OPTVAL 0
+ #define IPOPT_OLEN   1
+ #define IPOPT_OFFSET 2
+@@ -133,6 +123,27 @@
+ 	__u32	saddr;
+ 	__u32	daddr;
+ 	/*The options start here. */
++};
++
++struct ip_auth_hdr {
++	__u8  nexthdr;
++	__u8  hdrlen;		/* This one is measured in 32 bit units! */
++	__u16 reserved;
++	__u32 spi;
++	__u32 seq_no;		/* Sequence number */
++	__u8  auth_data[0];	/* Variable len but >=4. Mind the 64 bit alignment! */
++};
++
++struct ip_esp_hdr {
++	__u32 spi;
++	__u32 seq_no;		/* Sequence number */
++	__u8  enc_data[0];	/* Variable len but >=8. Mind the 64 bit alignment! */
++};
++
++struct ip_comp_hdr {
++	__u8 nexthdr;
++	__u8 flags;
++	__u16 cpi;
+ };
+ 
+ #endif	/* _LINUX_IP_H */
+diff -Nru a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/linux/ip6_tunnel.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,32 @@
++/*
++ * $Id$
++ */
++
++#ifndef _IP6_TUNNEL_H
++#define _IP6_TUNNEL_H
++
++#define IPV6_TLV_TNL_ENCAP_LIMIT 4
++#define IPV6_DEFAULT_TNL_ENCAP_LIMIT 4
++
++/* don't add encapsulation limit if one isn't present in inner packet */
++#define IP6_TNL_F_IGN_ENCAP_LIMIT 0x1
++/* copy the traffic class field from the inner packet */
++#define IP6_TNL_F_USE_ORIG_TCLASS 0x2
++/* copy the flowlabel from the inner packet */
++#define IP6_TNL_F_USE_ORIG_FLOWLABEL 0x4
++/* being used for Mobile IPv6 */
++#define IP6_TNL_F_MIP6_DEV 0x8
++
++struct ip6_tnl_parm {
++	char name[IFNAMSIZ];	/* name of tunnel device */
++	int link;		/* ifindex of underlying L2 interface */
++	__u8 proto;		/* tunnel protocol */
++	__u8 encap_limit;	/* encapsulation limit for tunnel */
++	__u8 hop_limit;		/* hop limit for tunnel */
++	__u32 flowinfo;		/* traffic class and flowlabel for tunnel */
++	__u32 flags;		/* tunnel flags */
++	struct in6_addr laddr;	/* local tunnel end-point address */
++	struct in6_addr raddr;	/* remote tunnel end-point address */
++};
++
++#endif
+diff -Nru a/include/linux/ipsec.h b/include/linux/ipsec.h
+--- a/include/linux/ipsec.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/ipsec.h	2005-02-13 21:25:09 +11:00
+@@ -1,69 +1,46 @@
+-/*
+- *	Definitions for the SECurity layer
+- *
+- *	Author:
+- *		Robert Muchsel <muchsel at acm.org>
+- *
+- *	This program is free software; you can redistribute it and/or
+- *	modify it under the terms of the GNU General Public License
+- *	as published by the Free Software Foundation; either version
+- *	2 of the License, or (at your option) any later version.
+- */
+- 
+ #ifndef _LINUX_IPSEC_H
+ #define _LINUX_IPSEC_H
+ 
+-#include <linux/config.h>
+-#include <linux/socket.h>
+-#include <net/sock.h>
+-#include <linux/skbuff.h>
+-
+-/* Values for the set/getsockopt calls */
+-
+-/* These defines are compatible with NRL IPv6, however their semantics
+-   is different */
+-
+-#define IPSEC_LEVEL_NONE	-1	/* send plaintext, accept any */
+-#define IPSEC_LEVEL_DEFAULT	0	/* encrypt/authenticate if possible */
+-					/* the default MUST be 0, because a */
+-					/* socket is initialized with 0's */
+-#define IPSEC_LEVEL_USE		1	/* use outbound, don't require inbound */
+-#define IPSEC_LEVEL_REQUIRE	2	/* require both directions */
+-#define IPSEC_LEVEL_UNIQUE	2	/* for compatibility only */
+-
+-#ifdef __KERNEL__
+-
+-/* skb bit flags set on packet input processing */
+-
+-#define RCV_SEC			0x0f	/* options on receive */
+-#define RCV_AUTH		0x01	/* was authenticated */
+-#define RCV_CRYPT		0x02	/* was encrypted */
+-#define RCV_TUNNEL		0x04	/* was tunneled */
+-#define SND_SEC			0xf0	/* options on send, these are */
+-#define SND_AUTH		0x10	/* currently unused */
+-#define SND_CRYPT		0x20
+-#define SND_TUNNEL		0x40
+-
+-/*
+- *	FIXME: ignores network encryption for now..
+- */
+- 
+-#ifdef CONFIG_NET_SECURITY
+-static __inline__ int ipsec_sk_policy(struct sock *sk, struct sk_buff *skb)
+-{
+-	return ((sk->authentication < IPSEC_LEVEL_REQUIRE) ||
+-		(skb->security & RCV_AUTH)) &&
+-		((sk->encryption < IPSEC_LEVEL_REQUIRE) ||
+-		(skb->security & RCV_CRYPT));
+-}
+-
+-#else
+-
+-static __inline__ int ipsec_sk_policy(struct sock *sk, struct sk_buff *skb)
+-{
+-	return 1;
+-}
+-#endif /* CONFIG */
++/* The definitions, required to talk to KAME racoon IKE. */
++
++#include <linux/pfkeyv2.h>
++
++#define IPSEC_PORT_ANY		0
++#define IPSEC_ULPROTO_ANY	255
++#define IPSEC_PROTO_ANY		255
++
++enum {
++	IPSEC_MODE_ANY		= 0,	/* We do not support this for SA */
++	IPSEC_MODE_TRANSPORT	= 1,
++	IPSEC_MODE_TUNNEL	= 2
++};
++
++enum {
++	IPSEC_DIR_ANY		= 0,
++	IPSEC_DIR_INBOUND	= 1,
++	IPSEC_DIR_OUTBOUND	= 2,
++	IPSEC_DIR_FWD		= 3,	/* It is our own */
++	IPSEC_DIR_MAX		= 4,
++	IPSEC_DIR_INVALID	= 5
++};
++
++enum {
++	IPSEC_POLICY_DISCARD	= 0,
++	IPSEC_POLICY_NONE	= 1,
++	IPSEC_POLICY_IPSEC	= 2,
++	IPSEC_POLICY_ENTRUST	= 3,
++	IPSEC_POLICY_BYPASS	= 4
++};
++
++enum {
++	IPSEC_LEVEL_DEFAULT	= 0,
++	IPSEC_LEVEL_USE		= 1,
++	IPSEC_LEVEL_REQUIRE	= 2,
++	IPSEC_LEVEL_UNIQUE	= 3
++};
++
++#define IPSEC_MANUAL_REQID_MAX	0x3fff
++
++#define IPSEC_REPLAYWSIZE  32
+ 
+-#endif	/* __KERNEL__ */
+ #endif	/* _LINUX_IPSEC_H */
+diff -Nru a/include/linux/ipv6.h b/include/linux/ipv6.h
+--- a/include/linux/ipv6.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/ipv6.h	2005-02-13 21:25:09 +11:00
+@@ -73,6 +73,27 @@
+ #define rt0_type		rt_hdr.type
+ };
+ 
++struct ipv6_auth_hdr {
++	__u8  nexthdr;
++	__u8  hdrlen;           /* This one is measured in 32 bit units! */
++	__u16 reserved;
++	__u32 spi;
++	__u32 seq_no;           /* Sequence number */
++	__u8  auth_data[0];     /* Length variable but >=4. Mind the 64 bit alignment! */
++};
++
++struct ipv6_esp_hdr {
++	__u32 spi;
++	__u32 seq_no;           /* Sequence number */
++	__u8  enc_data[0];      /* Length variable but >=8. Mind the 64 bit alignment! */
++};
++
++struct ipv6_comp_hdr {
++	__u8 nexthdr;
++	__u8 flags;
++	__u16 cpi;
++};
++
+ /*
+  *	IPv6 fixed header
+  *
+diff -Nru a/include/linux/ipv6_route.h b/include/linux/ipv6_route.h
+--- a/include/linux/ipv6_route.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/ipv6_route.h	2005-02-13 21:25:09 +11:00
+@@ -13,15 +13,6 @@
+ #ifndef _LINUX_IPV6_ROUTE_H
+ #define _LINUX_IPV6_ROUTE_H
+ 
+-enum
+-{
+-	RTA_IPV6_UNSPEC,
+-	RTA_IPV6_HOPLIMIT,
+-};
+-
+-#define	RTA_IPV6_MAX RTA_IPV6_HOPLIMIT
+-
+-
+ #define RTF_DEFAULT	0x00010000	/* default - learned via ND	*/
+ #define RTF_ALLONLINK	0x00020000	/* fallback, no routers on link	*/
+ #define RTF_ADDRCONF	0x00040000	/* addrconf route - RA		*/
+@@ -33,6 +24,7 @@
+ #define RTF_CACHE	0x01000000	/* cache entry			*/
+ #define RTF_FLOW	0x02000000	/* flow significant route	*/
+ #define RTF_POLICY	0x04000000	/* policy route			*/
++#define RTF_NDISC	0x08000000	/* ndisc route			*/
+ 
+ #define RTF_LOCAL	0x80000000
+ 
+diff -Nru a/include/linux/kernel.h b/include/linux/kernel.h
+--- a/include/linux/kernel.h	2005-02-13 21:25:08 +11:00
++++ b/include/linux/kernel.h	2005-02-13 21:25:08 +11:00
+@@ -133,6 +133,16 @@
+ 	((unsigned char *)&addr)[2], \
+ 	((unsigned char *)&addr)[3]
+ 
++#define NIP6(addr) \
++	ntohs((addr).s6_addr16[0]), \
++	ntohs((addr).s6_addr16[1]), \
++	ntohs((addr).s6_addr16[2]), \
++	ntohs((addr).s6_addr16[3]), \
++	ntohs((addr).s6_addr16[4]), \
++	ntohs((addr).s6_addr16[5]), \
++	ntohs((addr).s6_addr16[6]), \
++	ntohs((addr).s6_addr16[7])
++
+ #if defined(__LITTLE_ENDIAN)
+ #define HIPQUAD(addr) \
+ 	((unsigned char *)&addr)[3], \
+diff -Nru a/include/linux/list.h b/include/linux/list.h
+--- a/include/linux/list.h	2005-02-13 21:25:08 +11:00
++++ b/include/linux/list.h	2005-02-13 21:25:08 +11:00
+@@ -3,6 +3,7 @@
+ 
+ #if defined(__KERNEL__) || defined(_LVM_H_INCLUDE)
+ 
++#include <linux/stddef.h>
+ #include <linux/prefetch.h>
+ 
+ /*
+@@ -254,6 +255,152 @@
+ 	     pos = list_entry(pos->member.next, typeof(*pos), member),	\
+ 		     prefetch(pos->member.next))
+ 
++/*
++ * Double linked lists with a single pointer list head.
++ * Mostly useful for hash tables where the two pointer list head is
++ * too wasteful.
++ * You lose the ability to access the tail in O(1).
++ */
++
++struct hlist_head {
++	struct hlist_node *first;
++};
++
++struct hlist_node {
++	struct hlist_node *next, **pprev;
++};
++
++#define HLIST_HEAD_INIT { .first = NULL }
++#define HLIST_HEAD(name) struct hlist_head name = {  .first = NULL }
++#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL)
++#define INIT_HLIST_NODE(ptr) ((ptr)->next = NULL, (ptr)->pprev = NULL)
++
++static inline int hlist_unhashed(const struct hlist_node *h)
++{
++	return !h->pprev;
++}
++
++static inline int hlist_empty(const struct hlist_head *h)
++{
++	return !h->first;
++}
++
++static inline void __hlist_del(struct hlist_node *n)
++{
++	struct hlist_node *next = n->next;
++	struct hlist_node **pprev = n->pprev;
++	*pprev = next;
++	if (next)
++		next->pprev = pprev;
++}
++
++static inline void hlist_del(struct hlist_node *n)
++{
++	__hlist_del(n);
++	n->next = NULL;
++	n->pprev = NULL;
++}
++
++static inline void hlist_del_init(struct hlist_node *n)
++{
++	if (n->pprev)  {
++		__hlist_del(n);
++		INIT_HLIST_NODE(n);
++	}
++}
++
++static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h)
++{
++	struct hlist_node *first = h->first;
++	n->next = first;
++	if (first)
++		first->pprev = &n->next;
++	h->first = n;
++	n->pprev = &h->first;
++}
++
++/* next must be != NULL */
++static inline void hlist_add_before(struct hlist_node *n,
++					struct hlist_node *next)
++{
++	n->pprev = next->pprev;
++	n->next = next;
++	next->pprev = &n->next;
++	*(n->pprev) = n;
++}
++
++static inline void hlist_add_after(struct hlist_node *n,
++					struct hlist_node *next)
++{
++	next->next = n->next;
++	n->next = next;
++	next->pprev = &n->next;
++
++	if(next->next)
++		next->next->pprev  = &next->next;
++}
++
++#define hlist_entry(ptr, type, member) \
++	((type *)((char *)(ptr)-(unsigned long)(&((type *)0)->member)))
++
++/* Cannot easily do prefetch unfortunately */
++#define hlist_for_each(pos, head) \
++	for (pos = (head)->first; pos && ({ prefetch(pos->next); 1; }); \
++	     pos = pos->next)
++
++#define hlist_for_each_safe(pos, n, head) \
++	for (pos = (head)->first; n = pos ? pos->next : 0, pos; \
++	     pos = n)
++
++/**
++ * hlist_for_each_entry	- iterate over list of given type
++ * @tpos:	the type * to use as a loop counter.
++ * @pos:	the &struct hlist_node to use as a loop counter.
++ * @head:	the head for your list.
++ * @member:	the name of the hlist_node within the struct.
++ */
++#define hlist_for_each_entry(tpos, pos, head, member)			 \
++	for (pos = (head)->first;					 \
++	     pos && ({ prefetch(pos->next); 1;}) &&			 \
++		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
++	     pos = pos->next)
++
++/**
++ * hlist_for_each_entry_continue - iterate over a hlist continuing after existing point
++ * @tpos:	the type * to use as a loop counter.
++ * @pos:	the &struct hlist_node to use as a loop counter.
++ * @member:	the name of the hlist_node within the struct.
++ */
++#define hlist_for_each_entry_continue(tpos, pos, member)		 \
++	for (pos = (pos)->next;						 \
++	     pos && ({ prefetch(pos->next); 1;}) &&			 \
++		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
++	     pos = pos->next)
++
++/**
++ * hlist_for_each_entry_from - iterate over a hlist continuing from existing point
++ * @tpos:	the type * to use as a loop counter.
++ * @pos:	the &struct hlist_node to use as a loop counter.
++ * @member:	the name of the hlist_node within the struct.
++ */
++#define hlist_for_each_entry_from(tpos, pos, member)			 \
++	for (; pos && ({ prefetch(pos->next); 1;}) &&			 \
++		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
++	     pos = pos->next)
++
++/**
++ * hlist_for_each_entry_safe - iterate over list of given type safe against removal of list entry
++ * @tpos:	the type * to use as a loop counter.
++ * @pos:	the &struct hlist_node to use as a loop counter.
++ * @n:		another &struct hlist_node to use as temporary storage
++ * @head:	the head for your list.
++ * @member:	the name of the hlist_node within the struct.
++ */
++#define hlist_for_each_entry_safe(tpos, pos, n, head, member) 		 \
++	for (pos = (head)->first;					 \
++	     pos && ({ n = pos->next; 1; }) && 				 \
++		({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \
++	     pos = n)
+ #endif /* __KERNEL__ || _LVM_H_INCLUDE */
+ 
+ #endif
+diff -Nru a/include/linux/netdevice.h b/include/linux/netdevice.h
+--- a/include/linux/netdevice.h	2005-02-13 21:25:10 +11:00
++++ b/include/linux/netdevice.h	2005-02-13 21:25:10 +11:00
+@@ -96,6 +96,11 @@
+ #define MAX_HEADER (LL_MAX_HEADER + 48)
+ #endif
+ 
++/* Reserve 16byte aligned hard_header_len, but at least 16.
++ * Alternative is: dev->hard_header_len ? (dev->hard_header_len + 15)&~15 : 0
++ */
++#define LL_RESERVED_SPACE(dev) (((dev)->hard_header_len&~15) + 16)
++
+ /*
+  *	Network device statistics. Akin to the 2.0 ether stats but
+  *	with byte counters.
+@@ -499,6 +504,7 @@
+ extern int		dev_queue_xmit(struct sk_buff *skb);
+ extern int		register_netdevice(struct net_device *dev);
+ extern int		unregister_netdevice(struct net_device *dev);
++extern void		synchronize_net(void);
+ extern int 		register_netdevice_notifier(struct notifier_block *nb);
+ extern int		unregister_netdevice_notifier(struct notifier_block *nb);
+ extern int		dev_new_index(void);
+diff -Nru a/include/linux/netlink.h b/include/linux/netlink.h
+--- a/include/linux/netlink.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/netlink.h	2005-02-13 21:25:09 +11:00
+@@ -7,6 +7,7 @@
+ #define NETLINK_FIREWALL	3	/* Firewalling hook				*/
+ #define NETLINK_TCPDIAG		4	/* TCP socket monitoring			*/
+ #define NETLINK_NFLOG		5	/* netfilter/iptables ULOG */
++#define NETLINK_XFRM		6	/* ipsec */
+ #define NETLINK_ARPD		8
+ #define NETLINK_ROUTE6		11	/* af_inet6 route comm channel */
+ #define NETLINK_IP6_FW		13
+@@ -87,6 +88,8 @@
+ 
+ #ifdef __KERNEL__
+ 
++#include <linux/capability.h>
++
+ struct netlink_skb_parms
+ {
+ 	struct ucred		creds;		/* Skb credentials	*/
+@@ -108,8 +111,8 @@
+ extern struct sock *netlink_kernel_create(int unit, void (*input)(struct sock *sk, int len));
+ extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err);
+ extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock);
+-extern void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
+-			      __u32 group, int allocation);
++extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid,
++			     __u32 group, int allocation);
+ extern void netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code);
+ extern int netlink_register_notifier(struct notifier_block *nb);
+ extern int netlink_unregister_notifier(struct notifier_block *nb);
+diff -Nru a/include/linux/pfkeyv2.h b/include/linux/pfkeyv2.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/linux/pfkeyv2.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,335 @@
++/* PF_KEY user interface, this is defined by rfc2367 so
++ * do not make arbitrary modifications or else this header
++ * file will not be compliant.
++ */
++
++#ifndef _LINUX_PFKEY2_H
++#define _LINUX_PFKEY2_H
++
++#include <linux/types.h>
++
++#define PF_KEY_V2		2
++#define PFKEYV2_REVISION	199806L
++
++struct sadb_msg {
++	uint8_t		sadb_msg_version;
++	uint8_t		sadb_msg_type;
++	uint8_t		sadb_msg_errno;
++	uint8_t		sadb_msg_satype;
++	uint16_t	sadb_msg_len;
++	uint16_t	sadb_msg_reserved;
++	uint32_t	sadb_msg_seq;
++	uint32_t	sadb_msg_pid;
++} __attribute__((packed));
++/* sizeof(struct sadb_msg) == 16 */
++
++struct sadb_ext {
++	uint16_t	sadb_ext_len;
++	uint16_t	sadb_ext_type;
++} __attribute__((packed));
++/* sizeof(struct sadb_ext) == 4 */
++
++struct sadb_sa {
++	uint16_t	sadb_sa_len;
++	uint16_t	sadb_sa_exttype;
++	uint32_t	sadb_sa_spi;
++	uint8_t		sadb_sa_replay;
++	uint8_t		sadb_sa_state;
++	uint8_t		sadb_sa_auth;
++	uint8_t		sadb_sa_encrypt;
++	uint32_t	sadb_sa_flags;
++} __attribute__((packed));
++/* sizeof(struct sadb_sa) == 16 */
++
++struct sadb_lifetime {
++	uint16_t	sadb_lifetime_len;
++	uint16_t	sadb_lifetime_exttype;
++	uint32_t	sadb_lifetime_allocations;
++	uint64_t	sadb_lifetime_bytes;
++	uint64_t	sadb_lifetime_addtime;
++	uint64_t	sadb_lifetime_usetime;
++} __attribute__((packed));
++/* sizeof(struct sadb_lifetime) == 32 */
++
++struct sadb_address {
++	uint16_t	sadb_address_len;
++	uint16_t	sadb_address_exttype;
++	uint8_t		sadb_address_proto;
++	uint8_t		sadb_address_prefixlen;
++	uint16_t	sadb_address_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_address) == 8 */
++
++struct sadb_key {
++	uint16_t	sadb_key_len;
++	uint16_t	sadb_key_exttype;
++	uint16_t	sadb_key_bits;
++	uint16_t	sadb_key_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_key) == 8 */
++
++struct sadb_ident {
++	uint16_t	sadb_ident_len;
++	uint16_t	sadb_ident_exttype;
++	uint16_t	sadb_ident_type;
++	uint16_t	sadb_ident_reserved;
++	uint64_t	sadb_ident_id;
++} __attribute__((packed));
++/* sizeof(struct sadb_ident) == 16 */
++
++struct sadb_sens {
++	uint16_t	sadb_sens_len;
++	uint16_t	sadb_sens_exttype;
++	uint32_t	sadb_sens_dpd;
++	uint8_t		sadb_sens_sens_level;
++	uint8_t		sadb_sens_sens_len;
++	uint8_t		sadb_sens_integ_level;
++	uint8_t		sadb_sens_integ_len;
++	uint32_t	sadb_sens_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_sens) == 16 */
++
++/* followed by:
++	uint64_t	sadb_sens_bitmap[sens_len];
++	uint64_t	sadb_integ_bitmap[integ_len];  */
++
++struct sadb_prop {
++	uint16_t	sadb_prop_len;
++	uint16_t	sadb_prop_exttype;
++	uint8_t		sadb_prop_replay;
++	uint8_t		sadb_prop_reserved[3];
++} __attribute__((packed));
++/* sizeof(struct sadb_prop) == 8 */
++
++/* followed by:
++	struct sadb_comb sadb_combs[(sadb_prop_len +
++		sizeof(uint64_t) - sizeof(struct sadb_prop)) /
++		sizeof(strut sadb_comb)]; */
++
++struct sadb_comb {
++	uint8_t		sadb_comb_auth;
++	uint8_t		sadb_comb_encrypt;
++	uint16_t	sadb_comb_flags;
++	uint16_t	sadb_comb_auth_minbits;
++	uint16_t	sadb_comb_auth_maxbits;
++	uint16_t	sadb_comb_encrypt_minbits;
++	uint16_t	sadb_comb_encrypt_maxbits;
++	uint32_t	sadb_comb_reserved;
++	uint32_t	sadb_comb_soft_allocations;
++	uint32_t	sadb_comb_hard_allocations;
++	uint64_t	sadb_comb_soft_bytes;
++	uint64_t	sadb_comb_hard_bytes;
++	uint64_t	sadb_comb_soft_addtime;
++	uint64_t	sadb_comb_hard_addtime;
++	uint64_t	sadb_comb_soft_usetime;
++	uint64_t	sadb_comb_hard_usetime;
++} __attribute__((packed));
++/* sizeof(struct sadb_comb) == 72 */
++
++struct sadb_supported {
++	uint16_t	sadb_supported_len;
++	uint16_t	sadb_supported_exttype;
++	uint32_t	sadb_supported_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_supported) == 8 */
++
++/* followed by:
++	struct sadb_alg sadb_algs[(sadb_supported_len +
++		sizeof(uint64_t) - sizeof(struct sadb_supported)) /
++		sizeof(struct sadb_alg)]; */
++
++struct sadb_alg {
++	uint8_t		sadb_alg_id;
++	uint8_t		sadb_alg_ivlen;
++	uint16_t	sadb_alg_minbits;
++	uint16_t	sadb_alg_maxbits;
++	uint16_t	sadb_alg_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_alg) == 8 */
++
++struct sadb_spirange {
++	uint16_t	sadb_spirange_len;
++	uint16_t	sadb_spirange_exttype;
++	uint32_t	sadb_spirange_min;
++	uint32_t	sadb_spirange_max;
++	uint32_t	sadb_spirange_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_spirange) == 16 */
++
++struct sadb_x_kmprivate {
++	uint16_t	sadb_x_kmprivate_len;
++	uint16_t	sadb_x_kmprivate_exttype;
++	u_int32_t	sadb_x_kmprivate_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_x_kmprivate) == 8 */
++
++struct sadb_x_sa2 {
++	uint16_t	sadb_x_sa2_len;
++	uint16_t	sadb_x_sa2_exttype;
++	uint8_t		sadb_x_sa2_mode;
++	uint8_t		sadb_x_sa2_reserved1;
++	uint16_t	sadb_x_sa2_reserved2;
++	uint32_t	sadb_x_sa2_sequence;
++	uint32_t	sadb_x_sa2_reqid;
++} __attribute__((packed));
++/* sizeof(struct sadb_x_sa2) == 16 */
++
++struct sadb_x_policy {
++	uint16_t	sadb_x_policy_len;
++	uint16_t	sadb_x_policy_exttype;
++	uint16_t	sadb_x_policy_type;
++	uint8_t		sadb_x_policy_dir;
++	uint8_t		sadb_x_policy_reserved;
++	uint32_t	sadb_x_policy_id;
++	uint32_t	sadb_x_policy_priority;
++} __attribute__((packed));
++/* sizeof(struct sadb_x_policy) == 16 */
++
++struct sadb_x_ipsecrequest {
++	uint16_t	sadb_x_ipsecrequest_len;
++	uint16_t	sadb_x_ipsecrequest_proto;
++	uint8_t		sadb_x_ipsecrequest_mode;
++	uint8_t		sadb_x_ipsecrequest_level;
++	uint16_t	sadb_x_ipsecrequest_reserved1;
++	uint32_t	sadb_x_ipsecrequest_reqid;
++	uint32_t	sadb_x_ipsecrequest_reserved2;
++} __attribute__((packed));
++/* sizeof(struct sadb_x_ipsecrequest) == 16 */
++
++/* This defines the TYPE of Nat Traversal in use.  Currently only one
++ * type of NAT-T is supported, draft-ietf-ipsec-udp-encaps-06
++ */
++struct sadb_x_nat_t_type {
++	uint16_t	sadb_x_nat_t_type_len;
++	uint16_t	sadb_x_nat_t_type_exttype;
++	uint8_t		sadb_x_nat_t_type_type;
++	uint8_t		sadb_x_nat_t_type_reserved[3];
++} __attribute__((packed));
++/* sizeof(struct sadb_x_nat_t_type) == 8 */
++
++/* Pass a NAT Traversal port (Source or Dest port) */
++struct sadb_x_nat_t_port {
++	uint16_t	sadb_x_nat_t_port_len;
++	uint16_t	sadb_x_nat_t_port_exttype;
++	uint16_t	sadb_x_nat_t_port_port;
++	uint16_t	sadb_x_nat_t_port_reserved;
++} __attribute__((packed));
++/* sizeof(struct sadb_x_nat_t_port) == 8 */
++
++/* Message types */
++#define SADB_RESERVED		0
++#define SADB_GETSPI		1
++#define SADB_UPDATE		2
++#define SADB_ADD		3
++#define SADB_DELETE		4
++#define SADB_GET		5
++#define SADB_ACQUIRE		6
++#define SADB_REGISTER		7
++#define SADB_EXPIRE		8
++#define SADB_FLUSH		9
++#define SADB_DUMP		10
++#define SADB_X_PROMISC		11
++#define SADB_X_PCHANGE		12
++#define SADB_X_SPDUPDATE	13
++#define SADB_X_SPDADD		14
++#define SADB_X_SPDDELETE	15
++#define SADB_X_SPDGET		16
++#define SADB_X_SPDACQUIRE	17
++#define SADB_X_SPDDUMP		18
++#define SADB_X_SPDFLUSH		19
++#define SADB_X_SPDSETIDX	20
++#define SADB_X_SPDEXPIRE	21
++#define SADB_X_SPDDELETE2	22
++#define SADB_X_NAT_T_NEW_MAPPING	23
++#define SADB_MAX		23
++
++/* Security Association flags */
++#define SADB_SAFLAGS_PFS	1
++#define SADB_SAFLAGS_NOECN	0x80000000
++
++/* Security Association states */
++#define SADB_SASTATE_LARVAL	0
++#define SADB_SASTATE_MATURE	1
++#define SADB_SASTATE_DYING	2
++#define SADB_SASTATE_DEAD	3
++#define SADB_SASTATE_MAX	3
++
++/* Security Association types */
++#define SADB_SATYPE_UNSPEC	0
++#define SADB_SATYPE_AH		2
++#define SADB_SATYPE_ESP		3
++#define SADB_SATYPE_RSVP	5
++#define SADB_SATYPE_OSPFV2	6
++#define SADB_SATYPE_RIPV2	7
++#define SADB_SATYPE_MIP		8
++#define SADB_X_SATYPE_IPCOMP	9
++#define SADB_SATYPE_MAX		9
++
++/* Authentication algorithms */
++#define SADB_AALG_NONE			0
++#define SADB_AALG_MD5HMAC		2
++#define SADB_AALG_SHA1HMAC		3
++#define SADB_X_AALG_SHA2_256HMAC	5
++#define SADB_X_AALG_SHA2_384HMAC	6
++#define SADB_X_AALG_SHA2_512HMAC	7
++#define SADB_X_AALG_RIPEMD160HMAC	8
++#define SADB_X_AALG_NULL		251	/* kame */
++#define SADB_AALG_MAX			251
++
++/* Encryption algorithms */
++#define SADB_EALG_NONE			0
++#define SADB_EALG_DESCBC		2
++#define SADB_EALG_3DESCBC		3
++#define SADB_X_EALG_CASTCBC		6
++#define SADB_X_EALG_BLOWFISHCBC		7
++#define SADB_EALG_NULL			11
++#define SADB_X_EALG_AESCBC		12
++#define SADB_EALG_MAX                   253 /* last EALG */
++/* private allocations should use 249-255 (RFC2407) */
++#define SADB_X_EALG_SERPENTCBC  252     /* draft-ietf-ipsec-ciph-aes-cbc-00 */
++#define SADB_X_EALG_TWOFISHCBC  253     /* draft-ietf-ipsec-ciph-aes-cbc-00 */
++
++/* Compression algorithms */
++#define SADB_X_CALG_NONE		0
++#define SADB_X_CALG_OUI			1
++#define SADB_X_CALG_DEFLATE		2
++#define SADB_X_CALG_LZS			3
++#define SADB_X_CALG_LZJH		4
++#define SADB_X_CALG_MAX			4
++
++/* Extension Header values */
++#define SADB_EXT_RESERVED		0
++#define SADB_EXT_SA			1
++#define SADB_EXT_LIFETIME_CURRENT	2
++#define SADB_EXT_LIFETIME_HARD		3
++#define SADB_EXT_LIFETIME_SOFT		4
++#define SADB_EXT_ADDRESS_SRC		5
++#define SADB_EXT_ADDRESS_DST		6
++#define SADB_EXT_ADDRESS_PROXY		7
++#define SADB_EXT_KEY_AUTH		8
++#define SADB_EXT_KEY_ENCRYPT		9
++#define SADB_EXT_IDENTITY_SRC		10
++#define SADB_EXT_IDENTITY_DST		11
++#define SADB_EXT_SENSITIVITY		12
++#define SADB_EXT_PROPOSAL		13
++#define SADB_EXT_SUPPORTED_AUTH		14
++#define SADB_EXT_SUPPORTED_ENCRYPT	15
++#define SADB_EXT_SPIRANGE		16
++#define SADB_X_EXT_KMPRIVATE		17
++#define SADB_X_EXT_POLICY		18
++#define SADB_X_EXT_SA2			19
++/* The next four entries are for setting up NAT Traversal */
++#define SADB_X_EXT_NAT_T_TYPE		20
++#define SADB_X_EXT_NAT_T_SPORT		21
++#define SADB_X_EXT_NAT_T_DPORT		22
++#define SADB_X_EXT_NAT_T_OA		23
++#define SADB_EXT_MAX			23
++
++/* Identity Extension values */
++#define SADB_IDENTTYPE_RESERVED	0
++#define SADB_IDENTTYPE_PREFIX	1
++#define SADB_IDENTTYPE_FQDN	2
++#define SADB_IDENTTYPE_USERFQDN	3
++#define SADB_IDENTTYPE_MAX	3
++
++#endif /* !(_LINUX_PFKEY2_H) */
+diff -Nru a/include/linux/ppp_defs.h b/include/linux/ppp_defs.h
+--- a/include/linux/ppp_defs.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/ppp_defs.h	2005-02-13 21:25:09 +11:00
+@@ -74,12 +74,15 @@
+ #define PPP_IPV6	0x57	/* Internet Protocol Version 6 */
+ #define PPP_COMPFRAG	0xfb	/* fragment compressed below bundle */
+ #define PPP_COMP	0xfd	/* compressed packet */
++#define PPP_MPLS_UC	0x0281	/* Multi Protocol Label Switching - Unicast */
++#define PPP_MPLS_MC	0x0283	/* Multi Protocol Label Switching - Multicast */
+ #define PPP_IPCP	0x8021	/* IP Control Protocol */
+ #define PPP_ATCP	0x8029	/* AppleTalk Control Protocol */
+ #define PPP_IPXCP	0x802b	/* IPX Control Protocol */
+ #define PPP_IPV6CP	0x8057	/* IPv6 Control Protocol */
+ #define PPP_CCPFRAG	0x80fb	/* CCP at link level (below MP bundle) */
+ #define PPP_CCP		0x80fd	/* Compression Control Protocol */
++#define PPP_MPLSCP	0x80fd	/* MPLS Control Protocol */
+ #define PPP_LCP		0xc021	/* Link Control Protocol */
+ #define PPP_PAP		0xc023	/* Password Authentication Protocol */
+ #define PPP_LQR		0xc025	/* Link Quality Report protocol */
+diff -Nru a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
+--- a/include/linux/rtnetlink.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/rtnetlink.h	2005-02-13 21:25:09 +11:00
+@@ -204,6 +204,7 @@
+ 	RTA_PROTOINFO,
+ 	RTA_FLOW,
+ 	RTA_CACHEINFO,
++	RTA_SESSION,
+ 	__RTA_MAX
+ };
+ 
+@@ -286,11 +287,40 @@
+ #define RTAX_ADVMSS RTAX_ADVMSS
+ 	RTAX_REORDERING,
+ #define RTAX_REORDERING RTAX_REORDERING
++	RTAX_HOPLIMIT,
++#define RTAX_HOPLIMIT RTAX_HOPLIMIT
++	RTAX_INITCWND,
++#define RTAX_INITCWND RTAX_INITCWND
++	RTAX_FEATURES,
++#define RTAX_FEATURES RTAX_FEATURES
+ 	__RTAX_MAX
+ };
+ 
+ #define RTAX_MAX (__RTAX_MAX - 1)
+ 
++#define RTAX_FEATURE_ECN	0x00000001
++#define RTAX_FEATURE_SACK	0x00000002
++#define RTAX_FEATURE_TIMESTAMP	0x00000004
++
++struct rta_session
++{
++	__u8	proto;
++
++	union {
++		struct {
++			__u16	sport;
++			__u16	dport;
++		} ports;
++
++		struct {
++			__u8	type;
++			__u8	code;
++			__u16	ident;
++		} icmpt;
++
++		__u32		spi;
++	} u;
++};
+ 
+ 
+ /*********************************************************
+@@ -323,6 +353,7 @@
+ /* ifa_flags */
+ 
+ #define IFA_F_SECONDARY		0x01
++#define IFA_F_TEMPORARY		IFA_F_SECONDARY
+ 
+ #define IFA_F_DEPRECATED	0x20
+ #define IFA_F_TENTATIVE		0x40
+@@ -585,7 +616,7 @@
+ extern struct rtnetlink_link * rtnetlink_links[NPROTO];
+ extern int rtnetlink_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb);
+ extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo);
+-extern int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics);
++extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics);
+ 
+ extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data);
+ 
+diff -Nru a/include/linux/skbuff.h b/include/linux/skbuff.h
+--- a/include/linux/skbuff.h	2005-02-13 21:25:08 +11:00
++++ b/include/linux/skbuff.h	2005-02-13 21:25:08 +11:00
+@@ -148,6 +148,7 @@
+ 		struct icmphdr	*icmph;
+ 		struct igmphdr	*igmph;
+ 		struct iphdr	*ipiph;
++		struct ipv6hdr	*ipv6h;
+ 		struct spxhdr	*spxh;
+ 		unsigned char	*raw;
+ 	} h;
+@@ -169,7 +170,8 @@
+ 	  	unsigned char 	*raw;
+ 	} mac;
+ 
+-	struct  dst_entry *dst;
++	struct  dst_entry	*dst;
++	struct	sec_path	*sp;
+ 
+ 	/* 
+ 	 * This is the control buffer. It is free to use for every
+@@ -181,8 +183,9 @@
+ 
+ 	unsigned int 	len;			/* Length of actual data			*/
+  	unsigned int 	data_len;
++	unsigned int	mac_len;		/* Length of link layer header			*/
+ 	unsigned int	csum;			/* Checksum 					*/
+-	unsigned char 	__unused,		/* Dead field, may be reused			*/
++	unsigned char 	local_df,
+ 			cloned, 		/* head may be cloned (check refcnt to be sure). */
+   			pkt_type,		/* Packet class					*/
+   			ip_summed;		/* Driver fed us an IP checksum			*/
+@@ -756,6 +759,24 @@
+ static inline unsigned int skb_headlen(const struct sk_buff *skb)
+ {
+ 	return skb->len - skb->data_len;
++}
++
++static inline int skb_pagelen(const struct sk_buff *skb)
++{
++	int i, len = 0;
++
++	for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--)
++		len += skb_shinfo(skb)->frags[i].size;
++	return len + skb_headlen(skb);
++}
++
++static inline void skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size)
++{
++	skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
++	frag->page = page;
++	frag->page_offset = off;
++	frag->size = size;
++	skb_shinfo(skb)->nr_frags = i+1;
+ }
+ 
+ #define SKB_PAGE_ASSERT(skb) do { if (skb_shinfo(skb)->nr_frags) out_of_line_bug(); } while (0)
+diff -Nru a/include/linux/sysctl.h b/include/linux/sysctl.h
+--- a/include/linux/sysctl.h	2005-02-13 21:25:08 +11:00
++++ b/include/linux/sysctl.h	2005-02-13 21:25:08 +11:00
+@@ -373,6 +373,8 @@
+ 	NET_IPV4_CONF_TAG=12,
+ 	NET_IPV4_CONF_ARPFILTER=13,
+ 	NET_IPV4_CONF_MEDIUM_ID=14,
++	NET_IPV4_CONF_NOXFRM=15,
++	NET_IPV4_CONF_NOPOLICY=16,
+ 	NET_IPV4_CONF_FORCE_IGMP_VERSION=17,
+ 	NET_IPV4_CONF_ARP_ANNOUNCE=18,
+ 	NET_IPV4_CONF_ARP_IGNORE=19,
+@@ -429,7 +431,12 @@
+ 	NET_IPV6_DAD_TRANSMITS=7,
+ 	NET_IPV6_RTR_SOLICITS=8,
+ 	NET_IPV6_RTR_SOLICIT_INTERVAL=9,
+-	NET_IPV6_RTR_SOLICIT_DELAY=10
++	NET_IPV6_RTR_SOLICIT_DELAY=10,
++	NET_IPV6_USE_TEMPADDR=11,
++	NET_IPV6_TEMP_VALID_LFT=12,
++	NET_IPV6_TEMP_PREFERED_LFT=13,
++	NET_IPV6_REGEN_MAX_RETRY=14,
++	NET_IPV6_MAX_DESYNC_FACTOR=15
+ };
+ 
+ /* /proc/sys/net/ipv6/icmp */
+diff -Nru a/include/linux/timer.h b/include/linux/timer.h
+--- a/include/linux/timer.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/timer.h	2005-02-13 21:25:09 +11:00
+@@ -3,6 +3,7 @@
+ 
+ #include <linux/config.h>
+ #include <linux/list.h>
++#include <linux/stddef.h>
+ 
+ /*
+  * In Linux 2.4, static timers have been removed from the kernel.
+diff -Nru a/include/linux/udp.h b/include/linux/udp.h
+--- a/include/linux/udp.h	2005-02-13 21:25:09 +11:00
++++ b/include/linux/udp.h	2005-02-13 21:25:09 +11:00
+@@ -17,6 +17,7 @@
+ #ifndef _LINUX_UDP_H
+ #define _LINUX_UDP_H
+ 
++#include <linux/types.h>
+ 
+ struct udphdr {
+ 	__u16	source;
+@@ -25,5 +26,12 @@
+ 	__u16	check;
+ };
+ 
++/* UDP socket options */
++#define UDP_CORK	1	/* Never send partially complete segments */
++#define UDP_ENCAP	100	/* Set the socket to accept encapsulated packets */
++
++/* UDP encapsulation types */
++#define UDP_ENCAP_ESPINUDP_NON_IKE	1 /* draft-ietf-ipsec-nat-t-ike-00/01 */
++#define UDP_ENCAP_ESPINUDP	2 /* draft-ietf-ipsec-udp-encaps-06 */
+ 
+ #endif	/* _LINUX_UDP_H */
+diff -Nru a/include/linux/xfrm.h b/include/linux/xfrm.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/linux/xfrm.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,257 @@
++#ifndef _LINUX_XFRM_H
++#define _LINUX_XFRM_H
++
++#include <linux/types.h>
++
++/* All of the structures in this file may not change size as they are
++ * passed into the kernel from userspace via netlink sockets.
++ */
++
++/* Structure to encapsulate addresses. I do not want to use
++ * "standard" structure. My apologies.
++ */
++typedef union
++{
++	__u32		a4;
++	__u32		a6[4];
++} xfrm_address_t;
++
++/* Ident of a specific xfrm_state. It is used on input to lookup
++ * the state by (spi,daddr,ah/esp) or to store information about
++ * spi, protocol and tunnel address on output.
++ */
++struct xfrm_id
++{
++	xfrm_address_t	daddr;
++	__u32		spi;
++	__u8		proto;
++};
++
++/* Selector, used as selector both on policy rules (SPD) and SAs. */
++
++struct xfrm_selector
++{
++	xfrm_address_t	daddr;
++	xfrm_address_t	saddr;
++	__u16	dport;
++	__u16	dport_mask;
++	__u16	sport;
++	__u16	sport_mask;
++	__u16	family;
++	__u8	prefixlen_d;
++	__u8	prefixlen_s;
++	__u8	proto;
++	int	ifindex;
++	uid_t	user;
++};
++
++#define XFRM_INF (~(__u64)0)
++
++struct xfrm_lifetime_cfg
++{
++	__u64	soft_byte_limit;
++	__u64	hard_byte_limit;
++	__u64	soft_packet_limit;
++	__u64	hard_packet_limit;
++	__u64	soft_add_expires_seconds;
++	__u64	hard_add_expires_seconds;
++	__u64	soft_use_expires_seconds;
++	__u64	hard_use_expires_seconds;
++};
++
++struct xfrm_lifetime_cur
++{
++	__u64	bytes;
++	__u64	packets;
++	__u64	add_time;
++	__u64	use_time;
++};
++
++struct xfrm_replay_state
++{
++	__u32	oseq;
++	__u32	seq;
++	__u32	bitmap;
++};
++
++struct xfrm_algo {
++	char	alg_name[64];
++	int	alg_key_len;    /* in bits */
++	char	alg_key[0];
++};
++
++struct xfrm_stats {
++	__u32	replay_window;
++	__u32	replay;
++	__u32	integrity_failed;
++};
++
++enum
++{
++	XFRM_POLICY_IN	= 0,
++	XFRM_POLICY_OUT	= 1,
++	XFRM_POLICY_FWD	= 2,
++	XFRM_POLICY_MAX	= 3
++};
++
++enum
++{
++	XFRM_SHARE_ANY,		/* No limitations */
++	XFRM_SHARE_SESSION,	/* For this session only */
++	XFRM_SHARE_USER,	/* For this user only */
++	XFRM_SHARE_UNIQUE	/* Use once */
++};
++
++/* Netlink configuration messages.  */
++enum {
++	XFRM_MSG_BASE = 0x10,
++
++	XFRM_MSG_NEWSA = 0x10,
++#define XFRM_MSG_NEWSA XFRM_MSG_NEWSA
++	XFRM_MSG_DELSA,
++#define XFRM_MSG_DELSA XFRM_MSG_DELSA
++	XFRM_MSG_GETSA,
++#define XFRM_MSG_GETSA XFRM_MSG_GETSA
++
++	XFRM_MSG_NEWPOLICY,
++#define XFRM_MSG_NEWPOLICY XFRM_MSG_NEWPOLICY
++	XFRM_MSG_DELPOLICY,
++#define XFRM_MSG_DELPOLICY XFRM_MSG_DELPOLICY
++	XFRM_MSG_GETPOLICY,
++#define XFRM_MSG_GETPOLICY XFRM_MSG_GETPOLICY
++
++	XFRM_MSG_ALLOCSPI,
++#define XFRM_MSG_ALLOCSPI XFRM_MSG_ALLOCSPI
++	XFRM_MSG_ACQUIRE,
++#define XFRM_MSG_ACQUIRE XFRM_MSG_ACQUIRE
++	XFRM_MSG_EXPIRE,
++#define XFRM_MSG_EXPIRE XFRM_MSG_EXPIRE
++
++	XFRM_MSG_UPDPOLICY,
++#define XFRM_MSG_UPDPOLICY XFRM_MSG_UPDPOLICY
++	XFRM_MSG_UPDSA,
++#define XFRM_MSG_UPDSA XFRM_MSG_UPDSA
++
++	XFRM_MSG_POLEXPIRE,
++#define XFRM_MSG_POLEXPIRE XFRM_MSG_POLEXPIRE
++
++	XFRM_MSG_FLUSHSA,
++#define XFRM_MSG_FLUSHSA XFRM_MSG_FLUSHSA
++	XFRM_MSG_FLUSHPOLICY,
++#define XFRM_MSG_FLUSHPOLICY XFRM_MSG_FLUSHPOLICY
++
++	XFRM_MSG_MAX
++};
++
++struct xfrm_user_tmpl {
++	struct xfrm_id		id;
++	__u16			family;
++	xfrm_address_t		saddr;
++	__u32			reqid;
++	__u8			mode;
++	__u8			share;
++	__u8			optional;
++	__u32			aalgos;
++	__u32			ealgos;
++	__u32			calgos;
++};
++
++struct xfrm_encap_tmpl {
++	__u16		encap_type;
++	__u16		encap_sport;
++	__u16		encap_dport;
++	xfrm_address_t	encap_oa;
++};
++
++/* Netlink message attributes.  */
++enum xfrm_attr_type_t {
++	XFRMA_UNSPEC,
++	XFRMA_ALG_AUTH,		/* struct xfrm_algo */
++	XFRMA_ALG_CRYPT,	/* struct xfrm_algo */
++	XFRMA_ALG_COMP,		/* struct xfrm_algo */
++	XFRMA_ENCAP,		/* struct xfrm_algo + struct xfrm_encap_tmpl */
++	XFRMA_TMPL,		/* 1 or more struct xfrm_user_tmpl */
++	__XFRMA_MAX
++
++#define XFRMA_MAX (__XFRMA_MAX - 1)
++};
++
++struct xfrm_usersa_info {
++	struct xfrm_selector		sel;
++	struct xfrm_id			id;
++	xfrm_address_t			saddr;
++	struct xfrm_lifetime_cfg	lft;
++	struct xfrm_lifetime_cur	curlft;
++	struct xfrm_stats		stats;
++	__u32				seq;
++	__u32				reqid;
++	__u16				family;
++	__u8				mode; /* 0=transport,1=tunnel */
++	__u8				replay_window;
++	__u8				flags;
++#define XFRM_STATE_NOECN	1
++};
++
++struct xfrm_usersa_id {
++	xfrm_address_t			daddr;
++	__u32				spi;
++	__u16				family;
++	__u8				proto;
++};
++
++struct xfrm_userspi_info {
++	struct xfrm_usersa_info		info;
++	__u32				min;
++	__u32				max;
++};
++
++struct xfrm_userpolicy_info {
++	struct xfrm_selector		sel;
++	struct xfrm_lifetime_cfg	lft;
++	struct xfrm_lifetime_cur	curlft;
++	__u32				priority;
++	__u32				index;
++	__u8				dir;
++	__u8				action;
++#define XFRM_POLICY_ALLOW	0
++#define XFRM_POLICY_BLOCK	1
++	__u8				flags;
++#define XFRM_POLICY_LOCALOK	1	/* Allow user to override global policy */
++	__u8				share;
++};
++
++struct xfrm_userpolicy_id {
++	struct xfrm_selector		sel;
++	__u32				index;
++	__u8				dir;
++};
++
++struct xfrm_user_acquire {
++	struct xfrm_id			id;
++	xfrm_address_t			saddr;
++	struct xfrm_selector		sel;
++	struct xfrm_userpolicy_info	policy;
++	__u32				aalgos;
++	__u32				ealgos;
++	__u32				calgos;
++	__u32				seq;
++};
++
++struct xfrm_user_expire {
++	struct xfrm_usersa_info		state;
++	__u8				hard;
++};
++
++struct xfrm_user_polexpire {
++	struct xfrm_userpolicy_info	pol;
++	__u8				hard;
++};
++
++struct xfrm_usersa_flush {
++	__u8				proto;
++};
++
++#define XFRMGRP_ACQUIRE		1
++#define XFRMGRP_EXPIRE		2
++
++#endif /* _LINUX_XFRM_H */
+diff -Nru a/include/net/addrconf.h b/include/net/addrconf.h
+--- a/include/net/addrconf.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/addrconf.h	2005-02-13 21:25:09 +11:00
+@@ -6,6 +6,13 @@
+ #define MAX_RTR_SOLICITATIONS		3
+ #define RTR_SOLICITATION_INTERVAL	(4*HZ)
+ 
++#define MIN_VALID_LIFETIME		(2*3600)	/* 2 hours */
++
++#define TEMP_VALID_LIFETIME		(7*86400)
++#define TEMP_PREFERRED_LIFETIME		(86400)
++#define REGEN_MAX_RETRY			(5)
++#define MAX_DESYNC_FACTOR		(600)
++
+ #define ADDR_CHECK_FREQUENCY		(120*HZ)
+ 
+ struct prefix_info {
+diff -Nru a/include/net/ah.h b/include/net/ah.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/net/ah.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,35 @@
++#ifndef _NET_AH_H
++#define _NET_AH_H
++
++#include <net/xfrm.h>
++
++/* This is the maximum truncated ICV length that we know of. */
++#define MAX_AH_AUTH_LEN	12
++
++struct ah_data
++{
++	u8			*key;
++	int			key_len;
++	u8			*work_icv;
++	int			icv_full_len;
++	int			icv_trunc_len;
++
++	void			(*icv)(struct ah_data*,
++	                               struct sk_buff *skb, u8 *icv);
++
++	struct crypto_tfm	*tfm;
++};
++
++static inline void
++ah_hmac_digest(struct ah_data *ahp, struct sk_buff *skb, u8 *auth_data)
++{
++	struct crypto_tfm *tfm = ahp->tfm;
++
++	memset(auth_data, 0, ahp->icv_trunc_len);
++	crypto_hmac_init(tfm, ahp->key, &ahp->key_len);
++	skb_icv_walk(skb, tfm, 0, skb->len, crypto_hmac_update);
++	crypto_hmac_final(tfm, ahp->key, &ahp->key_len, ahp->work_icv);
++	memcpy(auth_data, ahp->work_icv, ahp->icv_trunc_len);
++}
++
++#endif
+diff -Nru a/include/net/dn_fib.h b/include/net/dn_fib.h
+--- a/include/net/dn_fib.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/dn_fib.h	2005-02-13 21:25:09 +11:00
+@@ -7,6 +7,9 @@
+ 
+ #include <linux/rtnetlink.h>
+ 
++/* WARNING: The ordering of these elements must match ordering
++ *          of RTA_* rtnetlink attribute numbers.
++ */
+ struct dn_kern_rta
+ {
+         void            *rta_dst;
+@@ -19,8 +22,9 @@
+         struct rtattr   *rta_mx;
+         struct rtattr   *rta_mp;
+         unsigned char   *rta_protoinfo;
+-        unsigned char   *rta_flow;
++        u32             *rta_flow;
+         struct rta_cacheinfo *rta_ci;
++	struct rta_session *rta_sess;
+ };
+ 
+ struct dn_fib_key {
+diff -Nru a/include/net/dn_route.h b/include/net/dn_route.h
+--- a/include/net/dn_route.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/dn_route.h	2005-02-13 21:25:09 +11:00
+@@ -122,7 +122,7 @@
+ 	if ((dst = sk->dst_cache) && !dst->obsolete) {
+ try_again:
+ 		skb->dst = dst_clone(dst);
+-		dst->output(skb);
++		dst_output(skb);
+ 		return;
+ 	}
+ 
+diff -Nru a/include/net/dst.h b/include/net/dst.h
+--- a/include/net/dst.h	2005-02-13 21:25:10 +11:00
++++ b/include/net/dst.h	2005-02-13 21:25:10 +11:00
+@@ -9,6 +9,8 @@
+ #define _NET_DST_H
+ 
+ #include <linux/config.h>
++#include <linux/rtnetlink.h>
++#include <linux/netdevice.h>
+ #include <net/neighbour.h>
+ 
+ /*
+@@ -22,6 +24,13 @@
+ #define DST_GC_INC	(HZ/2)
+ #define DST_GC_MAX	(120*HZ)
+ 
++/* Each dst_entry has reference count and sits in some parent list(s).
++ * When it is removed from parent list, it is "freed" (dst_free).
++ * After this it enters dead state (dst->obsolete > 0) and if its refcnt
++ * is zero, it can be destroyed immediately, otherwise it is added
++ * to gc list and garbage collector periodically checks the refcnt.
++ */
++
+ struct sk_buff;
+ 
+ struct dst_entry
+@@ -29,22 +38,22 @@
+ 	struct dst_entry        *next;
+ 	atomic_t		__refcnt;	/* client references	*/
+ 	int			__use;
++	struct dst_entry	*child;
+ 	struct net_device       *dev;
+ 	int			obsolete;
+ 	int			flags;
+ #define DST_HOST		1
++#define DST_NOXFRM		2
++#define DST_NOPOLICY		4
++#define DST_NOHASH		8
+ 	unsigned long		lastuse;
+ 	unsigned long		expires;
+ 
+-	unsigned		mxlock;
+-	unsigned		pmtu;
+-	unsigned		window;
+-	unsigned		rtt;
+-	unsigned		rttvar;
+-	unsigned		ssthresh;
+-	unsigned		cwnd;
+-	unsigned		advmss;
+-	unsigned		reordering;
++	unsigned short		header_len;	/* more space at head required */
++	unsigned short		trailer_len;	/* space to reserve at tail */
++
++	u32			metrics[RTAX_MAX];
++	struct dst_entry	*path;
+ 
+ 	unsigned long		rate_last;	/* rate limiting for ICMP */
+ 	unsigned long		rate_tokens;
+@@ -53,6 +62,7 @@
+ 
+ 	struct neighbour	*neighbour;
+ 	struct hh_cache		*hh;
++	struct xfrm_state	*xfrm;
+ 
+ 	int			(*input)(struct sk_buff*);
+ 	int			(*output)(struct sk_buff*);
+@@ -75,11 +85,11 @@
+ 
+ 	int			(*gc)(void);
+ 	struct dst_entry *	(*check)(struct dst_entry *, __u32 cookie);
+-	struct dst_entry *	(*reroute)(struct dst_entry *,
+-					   struct sk_buff *);
+ 	void			(*destroy)(struct dst_entry *);
+ 	struct dst_entry *	(*negative_advice)(struct dst_entry *);
+ 	void			(*link_failure)(struct sk_buff *);
++	void			(*update_pmtu)(struct dst_entry *dst, u32 mtu);
++	int			(*get_mss)(struct dst_entry *dst, u32 mtu);
+ 	int			entry_size;
+ 
+ 	atomic_t		entries;
+@@ -88,6 +98,33 @@
+ 
+ #ifdef __KERNEL__
+ 
++static inline u32
++dst_metric(struct dst_entry *dst, int metric)
++{
++	return dst->metrics[metric-1];
++}
++
++static inline u32
++dst_path_metric(struct dst_entry *dst, int metric)
++{
++	return dst->path->metrics[metric-1];
++}
++
++static inline u32
++dst_pmtu(struct dst_entry *dst)
++{
++	u32 mtu = dst_path_metric(dst, RTAX_MTU);
++	/* Yes, _exactly_. This is paranoia. */
++	barrier();
++	return mtu;
++}
++
++static inline int
++dst_metric_locked(struct dst_entry *dst, int metric)
++{
++	return dst_metric(dst, RTAX_LOCK) & (1<<metric);
++}
++
+ static inline void dst_hold(struct dst_entry * dst)
+ {
+ 	atomic_inc(&dst->__refcnt);
+@@ -104,22 +141,40 @@
+ static inline
+ void dst_release(struct dst_entry * dst)
+ {
+-	if (dst)
++	if (dst) {
++		if (atomic_read(&dst->__refcnt) < 1) {
++			printk("BUG: dst underflow %d: %p\n",
++			       atomic_read(&dst->__refcnt),
++			       current_text_addr());
++		}
+ 		atomic_dec(&dst->__refcnt);
++	}
++}
++
++/* Children define the path of the packet through the
++ * Linux networking.  Thus, destinations are stackable.
++ */
++
++static inline struct dst_entry *dst_pop(struct dst_entry *dst)
++{
++	struct dst_entry *child = dst_clone(dst->child);
++
++	dst_release(dst);
++	return child;
+ }
+ 
+ extern void * dst_alloc(struct dst_ops * ops);
+ extern void __dst_free(struct dst_entry * dst);
+-extern void dst_destroy(struct dst_entry * dst);
++extern struct dst_entry *dst_destroy(struct dst_entry * dst);
+ 
+-static inline
+-void dst_free(struct dst_entry * dst)
++static inline void dst_free(struct dst_entry * dst)
+ {
+ 	if (dst->obsolete > 1)
+ 		return;
+ 	if (!atomic_read(&dst->__refcnt)) {
+-		dst_destroy(dst);
+-		return;
++		dst = dst_destroy(dst);
++		if (!dst)
++			return;
+ 	}
+ 	__dst_free(dst);
+ }
+@@ -155,8 +210,50 @@
+ 		dst->expires = expires;
+ }
+ 
++/* Output packet to network from transport.  */
++static inline int dst_output(struct sk_buff *skb)
++{
++	int err;
++
++	for (;;) {
++		err = skb->dst->output(skb);
++
++		if (likely(err == 0))
++			return err;
++		if (unlikely(err != NET_XMIT_BYPASS))
++			return err;
++	}
++}
++
++/* Input packet from network to transport.  */
++static inline int dst_input(struct sk_buff *skb)
++{
++	int err;
++
++	for (;;) {
++		err = skb->dst->input(skb);
++
++		if (likely(err == 0))
++			return err;
++		/* Oh, Jamal... Seems, I will not forgive you this mess. :-) */
++		if (unlikely(err != NET_XMIT_BYPASS))
++			return err;
++	}
++}
++
+ extern void		dst_init(void);
+ 
++struct flowi;
++#ifndef CONFIG_XFRM
++static inline int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
++		       struct sock *sk, int flags)
++{
++	return 0;
++} 
++#else
++extern int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
++		       struct sock *sk, int flags);
++#endif
+ #endif
+ 
+ #endif /* _NET_DST_H */
+diff -Nru a/include/net/esp.h b/include/net/esp.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/net/esp.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,59 @@
++#ifndef _NET_ESP_H
++#define _NET_ESP_H
++
++#include <net/xfrm.h>
++#include <asm/scatterlist.h>
++
++#define ESP_NUM_FAST_SG		4
++
++struct esp_data
++{
++	struct scatterlist		sgbuf[ESP_NUM_FAST_SG];
++
++	/* Confidentiality */
++	struct {
++		u8			*key;		/* Key */
++		int			key_len;	/* Key length */
++		u8			*ivec;		/* ivec buffer */
++		/* ivlen is offset from enc_data, where encrypted data start.
++		 * It is logically different of crypto_tfm_alg_ivsize(tfm).
++		 * We assume that it is either zero (no ivec), or
++		 * >= crypto_tfm_alg_ivsize(tfm). */
++		int			ivlen;
++		int			padlen;		/* 0..255 */
++		struct crypto_tfm	*tfm;		/* crypto handle */
++	} conf;
++
++	/* Integrity. It is active when icv_full_len != 0 */
++	struct {
++		u8			*key;		/* Key */
++		int			key_len;	/* Length of the key */
++		u8			*work_icv;
++		int			icv_full_len;
++		int			icv_trunc_len;
++		void			(*icv)(struct esp_data*,
++		                               struct sk_buff *skb,
++		                               int offset, int len, u8 *icv);
++		struct crypto_tfm	*tfm;
++	} auth;
++};
++
++extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len);
++extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer);
++extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len);
++
++static inline void
++esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset,
++                int len, u8 *auth_data)
++{
++	struct crypto_tfm *tfm = esp->auth.tfm;
++	char *icv = esp->auth.work_icv;
++
++	memset(auth_data, 0, esp->auth.icv_trunc_len);
++	crypto_hmac_init(tfm, esp->auth.key, &esp->auth.key_len);
++	skb_icv_walk(skb, tfm, offset, len, crypto_hmac_update);
++	crypto_hmac_final(tfm, esp->auth.key, &esp->auth.key_len, icv);
++	memcpy(auth_data, icv, esp->auth.icv_trunc_len);
++}
++
++#endif
+diff -Nru a/include/net/flow.h b/include/net/flow.h
+--- a/include/net/flow.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/flow.h	2005-02-13 21:25:09 +11:00
+@@ -1,24 +1,31 @@
+ /*
+  *
+- *	Flow based forwarding rules (usage: firewalling, etc)
++ *	Generic internet FLOW.
+  *
+  */
+ 
+ #ifndef _NET_FLOW_H
+ #define _NET_FLOW_H
+ 
++#include <linux/in6.h>
++#include <asm/atomic.h>
++
+ struct flowi {
+-	int	proto;		/*	{TCP, UDP, ICMP}	*/
++	int	oif;
++	int	iif;
+ 
+ 	union {
+ 		struct {
+ 			__u32			daddr;
+ 			__u32			saddr;
++			__u32			fwmark;
++			__u8			tos;
++			__u8			scope;
+ 		} ip4_u;
+ 		
+ 		struct {
+-			struct in6_addr *	daddr;
+-			struct in6_addr *	saddr;
++			struct in6_addr		daddr;
++			struct in6_addr		saddr;
+ 			__u32			flowlabel;
+ 		} ip6_u;
+ 	} nl_u;
+@@ -27,9 +34,12 @@
+ #define fl6_flowlabel	nl_u.ip6_u.flowlabel
+ #define fl4_dst		nl_u.ip4_u.daddr
+ #define fl4_src		nl_u.ip4_u.saddr
++#define fl4_fwmark	nl_u.ip4_u.fwmark
++#define fl4_tos		nl_u.ip4_u.tos
++#define fl4_scope	nl_u.ip4_u.scope
+ 
+-	int	oif;
+-
++	__u8	proto;
++	__u8	flags;
+ 	union {
+ 		struct {
+ 			__u16	sport;
+@@ -41,61 +51,27 @@
+ 			__u8	code;
+ 		} icmpt;
+ 
+-		unsigned long	data;
++		__u32		spi;
+ 	} uli_u;
+-};
+-
+-#define FLOWR_NODECISION	0	/* rule not appliable to flow	*/
+-#define FLOWR_SELECT		1	/* flow must follow this rule	*/
+-#define FLOWR_CLEAR		2	/* priority level clears flow	*/
+-#define FLOWR_ERROR		3
+-
+-struct fl_acc_args {
+-	int	type;
+-
+-
+-#define FL_ARG_FORWARD	1
+-#define FL_ARG_ORIGIN	2
+-
+-	union {
+-		struct sk_buff		*skb;
+-		struct {
+-			struct sock	*sk;
+-			struct flowi	*flow;
+-		} fl_o;
+-	} fl_u;
+-};
+-
+-
+-struct pkt_filter {
+-	atomic_t		refcnt;
+-	unsigned int		offset;
+-	__u32			value;
+-	__u32			mask;
+-	struct pkt_filter	*next;
+-};
+-
+-#define FLR_INPUT		1
+-#define FLR_OUTPUT		2
+-
+-struct flow_filter {
+-	int				type;
+-	union {
+-		struct pkt_filter	*filter;
+-		struct sock		*sk;
+-	} u;
+-};
+-
+-struct flow_rule {
+-	struct flow_rule_ops		*ops;
+-	unsigned char			private[0];
+-};
+-
+-struct flow_rule_ops {
+-	int			(*accept)(struct rt6_info *rt,
+-					  struct rt6_info *rule,
+-					  struct fl_acc_args *args,
+-					  struct rt6_info **nrt);
+-};
++#define fl_ip_sport	uli_u.ports.sport
++#define fl_ip_dport	uli_u.ports.dport
++#define fl_icmp_type	uli_u.icmpt.type
++#define fl_icmp_code	uli_u.icmpt.code
++#define fl_ipsec_spi	uli_u.spi
++
++	u32 __pad;
++} __attribute__((__aligned__(BITS_PER_LONG/8)));
++
++#define FLOW_DIR_IN	0
++#define FLOW_DIR_OUT	1
++#define FLOW_DIR_FWD	2
++
++typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir,
++			       void **objp, atomic_t **obj_refp);
++
++extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
++			       flow_resolve_t resolver);
++extern void flow_cache_flush(void);
++extern atomic_t flow_cache_genid;
+ 
+ #endif
+diff -Nru a/include/net/if_inet6.h b/include/net/if_inet6.h
+--- a/include/net/if_inet6.h	2005-02-13 21:25:10 +11:00
++++ b/include/net/if_inet6.h	2005-02-13 21:25:10 +11:00
+@@ -47,6 +47,12 @@
+ 	struct inet6_ifaddr	*lst_next;      /* next addr in addr_lst */
+ 	struct inet6_ifaddr	*if_next;       /* next addr in inet6_dev */
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++	struct inet6_ifaddr	*tmp_next;	/* next addr in tempaddr_lst */
++	struct inet6_ifaddr	*ifpub;
++	int			regen_count;
++#endif
++
+ 	int			dead;
+ };
+ 
+@@ -150,6 +156,15 @@
+ 	atomic_t		refcnt;
+ 	__u32			if_flags;
+ 	int			dead;
++
++#ifdef CONFIG_IPV6_PRIVACY
++	u8			rndid[8];
++	u8			entropy[8];
++	struct timer_list	regen_timer;
++	struct inet6_ifaddr	*tempaddr_list;
++	__u8			work_eui64[8];
++	__u8			work_digest[16];
++#endif
+ 
+ 	struct neigh_parms	*nd_parms;
+ 	struct inet6_dev	*next;
+diff -Nru a/include/net/inet_ecn.h b/include/net/inet_ecn.h
+--- a/include/net/inet_ecn.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/inet_ecn.h	2005-02-13 21:25:09 +11:00
+@@ -1,6 +1,8 @@
+ #ifndef _INET_ECN_H_
+ #define _INET_ECN_H_
+ 
++#include <linux/ip.h>
++
+ enum {
+ 	INET_ECN_NOT_ECT = 0,
+ 	INET_ECN_ECT_1 = 1,
+@@ -52,11 +54,21 @@
+ 	iph->tos |= 1;
+ }
+ 
++static inline void IP_ECN_clear(struct iphdr *iph)
++{
++	iph->tos &= ~3;
++}
++
+ struct ipv6hdr;
+ 
+ static inline void IP6_ECN_set_ce(struct ipv6hdr *iph)
+ {
+ 	*(u32*)iph |= htonl(1<<20);
++}
++
++static inline void IP6_ECN_clear(struct ipv6hdr *iph)
++{
++	*(u32*)iph &= ~htonl(3<<20);
+ }
+ 
+ #define ip6_get_dsfield(iph) ((ntohs(*(u16*)(iph)) >> 4) & 0xFF)
+diff -Nru a/include/net/ip.h b/include/net/ip.h
+--- a/include/net/ip.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/ip.h	2005-02-13 21:25:09 +11:00
+@@ -29,6 +29,7 @@
+ #include <linux/netdevice.h>
+ #include <linux/inetdevice.h>
+ #include <linux/in_route.h>
++#include <linux/sysctl.h>
+ #include <net/route.h>
+ #include <net/arp.h>
+ 
+@@ -46,6 +47,7 @@
+ #define IPSKB_MASQUERADED	1
+ #define IPSKB_TRANSLATED	2
+ #define IPSKB_FORWARDED		4
++#define IPSKB_XFRM_TUNNEL_SIZE	8
+ };
+ 
+ struct ipcm_cookie
+@@ -98,17 +100,19 @@
+ extern void		ip_send_check(struct iphdr *ip);
+ extern int		ip_queue_xmit(struct sk_buff *skb, int ipfragok);
+ extern void		ip_init(void);
+-extern int		ip_build_xmit(struct sock *sk,
+-				      int getfrag (const void *,
+-						   char *,
+-						   unsigned int,
+-						   unsigned int,
+-						   struct sk_buff *),
+-				      const void *frag,
+-				      unsigned length,
+-				      struct ipcm_cookie *ipc,
+-				      struct rtable *rt,
+-				      int flags);
++extern int		ip_append_data(struct sock *sk,
++				       int getfrag(void *from, char *to, int offset, int len,
++						   int odd, struct sk_buff *skb),
++				void *from, int len, int protolen,
++				struct ipcm_cookie *ipc,
++				struct rtable *rt,
++				unsigned int flags);
++extern int		ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb);
++extern ssize_t		ip_append_page(struct sock *sk, struct page *page,
++				int offset, size_t size, int flags);
++extern int		ip_push_pending_frames(struct sock *sk);
++extern void		ip_flush_pending_frames(struct sock *sk);
++
+ 
+ /*
+  *	Map a multicast IP onto multicast MAC for type Token Ring.
+@@ -128,8 +132,7 @@
+ }
+ 
+ struct ip_reply_arg {
+-	struct iovec iov[2];   
+-	int          n_iov;    /* redundant */
++	struct iovec iov[1];   
+ 	u32 	     csum; 
+ 	int	     csumoffset; /* u16 offset of csum in iov[0].iov_base */
+ 				 /* -1 if not needed */ 
+@@ -161,14 +164,6 @@
+ extern int sysctl_ip_default_ttl;
+ 
+ #ifdef CONFIG_INET
+-static inline int ip_send(struct sk_buff *skb)
+-{
+-	if (skb->len > skb->dst->pmtu)
+-		return ip_fragment(skb, ip_finish_output);
+-	else
+-		return ip_finish_output(skb);
+-}
+-
+ /* The function in 2.2 was invalid, producing wrong result for
+  * check=0xFEFF. It was noticed by Arthur Skawina _year_ ago. --ANK(000625) */
+ static inline
+@@ -185,7 +180,7 @@
+ {
+ 	return (sk->protinfo.af_inet.pmtudisc == IP_PMTUDISC_DO ||
+ 		(sk->protinfo.af_inet.pmtudisc == IP_PMTUDISC_WANT &&
+-		 !(dst->mxlock&(1<<RTAX_MTU))));
++		 !(dst_metric(dst, RTAX_LOCK)&(1<<RTAX_MTU))));
+ }
+ 
+ extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst);
+@@ -268,5 +263,16 @@
+ 			      u16 port, u32 info, u8 *payload);
+ extern void	ip_local_error(struct sock *sk, int err, u32 daddr, u16 dport,
+ 			       u32 info);
++
++/* sysctl helpers - any sysctl which holds a value that ends up being
++ * fed into the routing cache should use these handlers.
++ */
++int ipv4_doint_and_flush(ctl_table *ctl, int write,
++			 struct file* filp, void *buffer,
++			 size_t *lenp);
++int ipv4_doint_and_flush_strategy(ctl_table *table, int *name, int nlen,
++				  void *oldval, size_t *oldlenp,
++				  void *newval, size_t newlen, 
++				  void **context);
+ 
+ #endif	/* _IP_H */
+diff -Nru a/include/net/ip6_fib.h b/include/net/ip6_fib.h
+--- a/include/net/ip6_fib.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/ip6_fib.h	2005-02-13 21:25:09 +11:00
+@@ -67,17 +67,8 @@
+ 	
+ 	u32				rt6i_flags;
+ 	u32				rt6i_metric;
+-	u8				rt6i_hoplimit;
+ 	atomic_t			rt6i_ref;
+ 
+-	union {
+-		struct flow_rule	*rt6iu_flowr;
+-		struct flow_filter	*rt6iu_filter;
+-	} flow_u;
+-
+-#define rt6i_flowr			flow_u.rt6iu_flowr
+-#define rt6i_filter			flow_u.rt6iu_filter
+-
+ 	struct rt6key			rt6i_dst;
+ 	struct rt6key			rt6i_src;
+ 
+@@ -171,10 +162,12 @@
+ 
+ extern int			fib6_add(struct fib6_node *root,
+ 					 struct rt6_info *rt,
+-					 struct nlmsghdr *nlh);
++					 struct nlmsghdr *nlh,
++					 void *rtattr);
+ 
+ extern int			fib6_del(struct rt6_info *rt,
+-					 struct nlmsghdr *nlh);
++					 struct nlmsghdr *nlh,
++					 void *rtattr);
+ 
+ extern void			inet6_rt_notify(int event, struct rt6_info *rt,
+ 						struct nlmsghdr *nlh);
+diff -Nru a/include/net/ip6_fw.h b/include/net/ip6_fw.h
+--- a/include/net/ip6_fw.h	2005-02-13 21:25:09 +11:00
++++ /dev/null	Wed Dec 31 16:00:00 196900
+@@ -1,54 +0,0 @@
+-#ifndef __NET_IP6_FW_H
+-#define __NET_IP6_FW_H
+-
+-#define IP6_FW_LISTHEAD		0x1000
+-#define IP6_FW_ACCEPT		0x0001
+-#define IP6_FW_REJECT		0x0002
+-
+-#define IP6_FW_DEBUG	2
+-
+-#define IP6_FW_MSG_ADD		1
+-#define IP6_FW_MSG_DEL		2
+-#define IP6_FW_MSG_REPORT	3
+-
+-
+-/*
+- *	Fast "hack" user interface
+- */
+-struct ip6_fw_msg {
+-	struct in6_addr		dst;
+-	struct in6_addr		src;
+-	int			dst_len;
+-	int			src_len;
+-	int			action;
+-	int			policy;
+-	int			proto;
+-	union {
+-		struct {
+-			__u16	sport;
+-			__u16	dport;
+-		} transp;
+-
+-		unsigned long	data;
+-
+-		int		icmp_type;
+-	} u;
+-
+-	int			msg_len;
+-};
+-
+-#ifdef __KERNEL__
+-
+-#include <net/flow.h>
+-
+-struct ip6_fw_rule {
+-	struct flow_rule	flowr;
+-	struct ip6_fw_rule	*next;
+-	struct ip6_fw_rule	*prev;
+-	struct flowi		info;
+-	unsigned long		policy;
+-};
+-
+-#endif
+-
+-#endif
+diff -Nru a/include/net/ip6_route.h b/include/net/ip6_route.h
+--- a/include/net/ip6_route.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/ip6_route.h	2005-02-13 21:25:09 +11:00
+@@ -39,12 +39,15 @@
+ extern int			ipv6_route_ioctl(unsigned int cmd, void *arg);
+ 
+ extern int			ip6_route_add(struct in6_rtmsg *rtmsg,
+-					      struct nlmsghdr *);
++					      struct nlmsghdr *,
++					      void *rtattr);
+ extern int			ip6_del_rt(struct rt6_info *,
+-					   struct nlmsghdr *);
++					   struct nlmsghdr *,
++					   void *rtattr);
+ 
+ extern int			ip6_rt_addr_add(struct in6_addr *addr,
+-						struct net_device *dev);
++						struct net_device *dev,
++						int anycast);
+ 
+ extern int			ip6_rt_addr_del(struct in6_addr *addr,
+ 						struct net_device *dev);
+@@ -60,6 +63,12 @@
+ 					    struct in6_addr *saddr,
+ 					    int oif, int flags);
+ 
++extern struct dst_entry *ndisc_dst_alloc(struct net_device *dev,
++					 struct neighbour *neigh,
++					 int (*output)(struct sk_buff *));
++extern int ndisc_dst_gc(int *more);
++extern void fib6_force_start_gc(void);
++
+ /*
+  *	support functions for ND
+  *
+@@ -109,6 +118,13 @@
+ 	np->daddr_cache = daddr;
+ 	np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
+ 	write_unlock(&sk->dst_lock);
++}
++
++static inline int ipv6_unicast_destination(struct sk_buff *skb)
++{
++	struct rt6_info *rt = (struct rt6_info *) skb->dst;
++
++	return rt->rt6i_flags & RTF_LOCAL;
+ }
+ 
+ #endif
+diff -Nru a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/net/ip6_tunnel.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,46 @@
++/*
++ * $Id$
++ */
++
++#ifndef _NET_IP6_TUNNEL_H
++#define _NET_IP6_TUNNEL_H
++
++#include <linux/ipv6.h>
++#include <linux/netdevice.h>
++#include <linux/ip6_tunnel.h>
++
++/* capable of sending packets */
++#define IP6_TNL_F_CAP_XMIT 0x10000
++/* capable of receiving packets */
++#define IP6_TNL_F_CAP_RCV 0x20000
++
++#define IP6_TNL_MAX 128
++
++/* IPv6 tunnel */
++
++struct ip6_tnl {
++	struct ip6_tnl *next;	/* next tunnel in list */
++	struct net_device *dev;	/* virtual device associated with tunnel */
++	struct net_device_stats stat;	/* statistics for tunnel device */
++	int recursion;		/* depth of hard_start_xmit recursion */
++	struct ip6_tnl_parm parms;	/* tunnel configuration paramters */
++	struct flowi fl;	/* flowi template for xmit */
++	struct dst_entry *dst_cache;    /* cached dst */
++	u32 dst_cookie;
++};
++
++/* Tunnel encapsulation limit destination sub-option */
++
++struct ipv6_tlv_tnl_enc_lim {
++	__u8 type;		/* type-code for option         */
++	__u8 length;		/* option length                */
++	__u8 encap_limit;	/* tunnel encapsulation limit   */
++} __attribute__ ((packed));
++
++#ifdef __KERNEL__
++#ifdef CONFIG_IPV6_TUNNEL
++extern int __init ip6_tunnel_init(void);
++extern void ip6_tunnel_cleanup(void);
++#endif
++#endif
++#endif
+diff -Nru a/include/net/ip_fib.h b/include/net/ip_fib.h
+--- a/include/net/ip_fib.h	2005-02-13 21:25:10 +11:00
++++ b/include/net/ip_fib.h	2005-02-13 21:25:10 +11:00
+@@ -17,7 +17,11 @@
+ #define _NET_IP_FIB_H
+ 
+ #include <linux/config.h>
++#include <net/flow.h>
+ 
++/* WARNING: The ordering of these elements must match ordering
++ *          of RTA_* rtnetlink attribute numbers.
++ */
+ struct kern_rta
+ {
+ 	void		*rta_dst;
+@@ -30,8 +34,9 @@
+ 	struct rtattr	*rta_mx;
+ 	struct rtattr	*rta_mp;
+ 	unsigned char	*rta_protoinfo;
+-	unsigned char	*rta_flow;
++	u32		*rta_flow;
+ 	struct rta_cacheinfo *rta_ci;
++	struct rta_session *rta_sess;
+ };
+ 
+ struct fib_nh
+@@ -65,7 +70,7 @@
+ 	int			fib_protocol;
+ 	u32			fib_prefsrc;
+ 	u32			fib_priority;
+-	unsigned		fib_metrics[RTAX_MAX];
++	u32			fib_metrics[RTAX_MAX];
+ #define fib_mtu fib_metrics[RTAX_MTU-1]
+ #define fib_window fib_metrics[RTAX_WINDOW-1]
+ #define fib_rtt fib_metrics[RTAX_RTT-1]
+@@ -117,7 +122,7 @@
+ {
+ 	unsigned char	tb_id;
+ 	unsigned	tb_stamp;
+-	int		(*tb_lookup)(struct fib_table *tb, const struct rt_key *key, struct fib_result *res);
++	int		(*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res);
+ 	int		(*tb_insert)(struct fib_table *table, struct rtmsg *r,
+ 				     struct kern_rta *rta, struct nlmsghdr *n,
+ 				     struct netlink_skb_parms *req);
+@@ -130,7 +135,7 @@
+ 	int		(*tb_get_info)(struct fib_table *table, char *buf,
+ 				       int first, int count);
+ 	void		(*tb_select_default)(struct fib_table *table,
+-					     const struct rt_key *key, struct fib_result *res);
++					     const struct flowi *flp, struct fib_result *res);
+ 
+ 	unsigned char	tb_data[0];
+ };
+@@ -152,18 +157,18 @@
+ 	return fib_get_table(id);
+ }
+ 
+-static inline int fib_lookup(const struct rt_key *key, struct fib_result *res)
++static inline int fib_lookup(const struct flowi *flp, struct fib_result *res)
+ {
+-	if (local_table->tb_lookup(local_table, key, res) &&
+-	    main_table->tb_lookup(main_table, key, res))
++	if (local_table->tb_lookup(local_table, flp, res) &&
++	    main_table->tb_lookup(main_table, flp, res))
+ 		return -ENETUNREACH;
+ 	return 0;
+ }
+ 
+-static inline void fib_select_default(const struct rt_key *key, struct fib_result *res)
++static inline void fib_select_default(const struct flowi *flp, struct fib_result *res)
+ {
+ 	if (FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+-		main_table->tb_select_default(main_table, key, res);
++		main_table->tb_select_default(main_table, flp, res);
+ }
+ 
+ #else /* CONFIG_IP_MULTIPLE_TABLES */
+@@ -171,7 +176,7 @@
+ #define main_table (fib_tables[RT_TABLE_MAIN])
+ 
+ extern struct fib_table * fib_tables[RT_TABLE_MAX+1];
+-extern int fib_lookup(const struct rt_key *key, struct fib_result *res);
++extern int fib_lookup(const struct flowi *flp, struct fib_result *res);
+ extern struct fib_table *__fib_new_table(int id);
+ extern void fib_rule_put(struct fib_rule *r);
+ 
+@@ -191,7 +196,7 @@
+ 	return fib_tables[id] ? : __fib_new_table(id);
+ }
+ 
+-extern void fib_select_default(const struct rt_key *key, struct fib_result *res);
++extern void fib_select_default(const struct flowi *flp, struct fib_result *res);
+ 
+ #endif /* CONFIG_IP_MULTIPLE_TABLES */
+ 
+@@ -204,13 +209,13 @@
+ extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb);
+ extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif,
+ 			       struct net_device *dev, u32 *spec_dst, u32 *itag);
+-extern void fib_select_multipath(const struct rt_key *key, struct fib_result *res);
++extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res);
+ 
+ /* Exported by fib_semantics.c */
+ extern int 		ip_fib_check_default(u32 gw, struct net_device *dev);
+ extern void		fib_release_info(struct fib_info *);
+ extern int		fib_semantic_match(int type, struct fib_info *,
+-					   const struct rt_key *, struct fib_result*);
++					   const struct flowi *, struct fib_result*);
+ extern struct fib_info	*fib_create_info(const struct rtmsg *r, struct kern_rta *rta,
+ 					 const struct nlmsghdr *, int *err);
+ extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, struct kern_rta *rta, struct fib_info *fi);
+diff -Nru a/include/net/ip_vs.h b/include/net/ip_vs.h
+--- a/include/net/ip_vs.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/ip_vs.h	2005-02-13 21:25:09 +11:00
+@@ -281,6 +281,13 @@
+ #define LeaveFunction(level)   do {} while (0)
+ #endif
+ 
++#define IP_VS_XMIT(skb, rt)				\
++do {							\
++	skb->nfcache |= NFC_IPVS_PROPERTY;		\
++	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL,	\
++		(rt)->u.dst.dev, dst_output);		\
++} while (0)
++
+ 
+ /*
+  *      The port number of FTP service (in network order).
+@@ -864,7 +871,16 @@
+ 		spin_lock(&dest->dst_lock);
+ 		if (!(rt = (struct rtable *)
+ 		      __ip_vs_dst_check(dest, rtos, 0))) {
+-			if (ip_route_output(&rt, dest->addr, 0, rtos, 0)) {
++			struct flowi fl = {
++				.oif = 0,
++				.nl_u = {
++					.ip4_u = {
++						.daddr = dest->addr,
++						.saddr = 0,
++						.tos = rtos, } },
++			};
++
++			if (ip_route_output_key(&rt, &fl)) {
+ 				spin_unlock(&dest->dst_lock);
+ 				IP_VS_DBG_RL("ip_route_output error, "
+ 					     "dest: %u.%u.%u.%u\n",
+@@ -878,7 +894,16 @@
+ 		}
+ 		spin_unlock(&dest->dst_lock);
+ 	} else {
+-		if (ip_route_output(&rt, cp->daddr, 0, rtos, 0)) {
++		struct flowi fl = {
++			.oif = 0,
++			.nl_u = {
++				.ip4_u = {
++					.daddr = cp->daddr,
++					.saddr = 0,
++					.tos = rtos, } },
++		};
++
++		if (ip_route_output_key(&rt, &fl)) {
+ 			IP_VS_DBG_RL("ip_route_output error, dest: "
+ 				     "%u.%u.%u.%u\n", NIPQUAD(cp->daddr));
+ 			return NULL;
+diff -Nru a/include/net/ipcomp.h b/include/net/ipcomp.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/net/ipcomp.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,12 @@
++#ifndef _NET_IPCOMP_H
++#define _NET_IPCOMP_H
++
++#define IPCOMP_SCRATCH_SIZE     65400
++
++struct ipcomp_data {
++	u16 threshold;
++	u8 *scratch;
++	struct crypto_tfm *tfm;
++};
++
++#endif
+diff -Nru a/include/net/ipip.h b/include/net/ipip.h
+--- a/include/net/ipip.h	2005-02-13 21:25:10 +11:00
++++ b/include/net/ipip.h	2005-02-13 21:25:10 +11:00
+@@ -34,7 +34,7 @@
+ 	ip_select_ident(iph, &rt->u.dst, NULL);				\
+ 	ip_send_check(iph);						\
+ 									\
+-	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, do_ip_send); \
++	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev, dst_output);\
+ 	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {		\
+ 		stats->tx_bytes += pkt_len;				\
+ 		stats->tx_packets++;					\
+diff -Nru a/include/net/ipv6.h b/include/net/ipv6.h
+--- a/include/net/ipv6.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/ipv6.h	2005-02-13 21:25:09 +11:00
+@@ -22,6 +22,8 @@
+ 
+ #define SIN6_LEN_RFC2133	24
+ 
++#define IPV6_MAXPLEN		65535
++
+ /*
+  *	NextHeader field of IPv6 header
+  */
+@@ -48,7 +50,7 @@
+ /*
+  *	Addr type
+  *	
+- *	type	-	unicast | multicast | anycast
++ *	type	-	unicast | multicast
+  *	scope	-	local	| site	    | global
+  *	v4	-	compat
+  *	v4mapped
+@@ -60,7 +62,6 @@
+ 
+ #define IPV6_ADDR_UNICAST      	0x0001U	
+ #define IPV6_ADDR_MULTICAST    	0x0002U	
+-#define IPV6_ADDR_ANYCAST	0x0004U
+ 
+ #define IPV6_ADDR_LOOPBACK	0x0010U
+ #define IPV6_ADDR_LINKLOCAL	0x0020U
+@@ -98,6 +99,8 @@
+ 	__u32		identification;
+ };
+ 
++#define	IP6_MF	0x0001
++
+ #ifdef __KERNEL__
+ 
+ #include <net/sock.h>
+@@ -199,12 +202,8 @@
+ 
+ extern int			ip6_call_ra_chain(struct sk_buff *skb, int sel);
+ 
+-extern int			ipv6_reassembly(struct sk_buff **skb, int);
+-
+ extern int			ipv6_parse_hopopts(struct sk_buff *skb, int);
+ 
+-extern int			ipv6_parse_exthdrs(struct sk_buff **skb, int);
+-
+ extern struct ipv6_txoptions *  ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt);
+ 
+ extern int ip6_frag_nqueues;
+@@ -239,6 +238,23 @@
+ 	memcpy((void *) a1, (const void *) a2, sizeof(struct in6_addr));
+ }
+ 
++static inline void ipv6_addr_prefix(struct in6_addr *pfx, 
++				    const struct in6_addr *addr,
++				    int plen)
++{
++	/* caller must guarantee 0 <= plen <= 128 */
++	int o = plen >> 3,
++	    b = plen & 0x7;
++
++	memcpy(pfx->s6_addr, addr, o);
++	if (b != 0) {
++		pfx->s6_addr[o] = addr->s6_addr[o] & (0xff00 >> b);
++		o++;
++	}
++	if (o < 16)
++		memset(pfx->s6_addr + o, 0, 16 - o);
++}
++
+ #ifndef __HAVE_ARCH_ADDR_SET
+ static inline void ipv6_addr_set(struct in6_addr *addr, 
+ 				     __u32 w1, __u32 w2,
+@@ -291,6 +307,26 @@
+ 					       unsigned length,
+ 					       struct ipv6_txoptions *opt,
+ 					       int hlimit, int flags);
++extern int			ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr);
++
++extern int			ip6_append_data(struct sock *sk,
++						int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
++		    				void *from,
++						int length,
++						int transhdrlen,
++		      				int hlimit,
++						struct ipv6_txoptions *opt,
++						struct flowi *fl,
++						struct rt6_info *rt,
++						unsigned int flags);
++
++extern int			ip6_push_pending_frames(struct sock *sk);
++
++extern void			ip6_flush_pending_frames(struct sock *sk);
++
++extern int			ip6_dst_lookup(struct sock *sk,
++					       struct dst_entry **dst,
++					       struct flowi *fl);
+ 
+ /*
+  *	skb processing functions
+diff -Nru a/include/net/ndisc.h b/include/net/ndisc.h
+--- a/include/net/ndisc.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/ndisc.h	2005-02-13 21:25:09 +11:00
+@@ -56,20 +56,6 @@
+ 	__u8		nd_opt_len;
+ } __attribute__((__packed__));
+ 
+-struct ndisc_options {
+-	struct nd_opt_hdr *nd_opt_array[7];
+-	struct nd_opt_hdr *nd_opt_piend;
+-};
+-
+-#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
+-#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
+-#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
+-#define nd_opts_pi_end		nd_opt_piend
+-#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
+-#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
+-
+-extern struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, struct nd_opt_hdr *end);
+-extern struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, struct ndisc_options *ndopts);
+ 
+ extern int			ndisc_init(struct net_proto_family *ops);
+ 
+diff -Nru a/include/net/protocol.h b/include/net/protocol.h
+--- a/include/net/protocol.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/protocol.h	2005-02-13 21:25:09 +11:00
+@@ -30,7 +30,7 @@
+ #include <linux/ipv6.h>
+ #endif
+ 
+-#define MAX_INET_PROTOS	32		/* Must be a power of 2		*/
++#define MAX_INET_PROTOS	256		/* Must be a power of 2		*/
+ 
+ 
+ /* This is used to register protocols. */
+@@ -38,29 +38,23 @@
+ {
+ 	int			(*handler)(struct sk_buff *skb);
+ 	void			(*err_handler)(struct sk_buff *skb, u32 info);
+-	struct inet_protocol	*next;
+-	unsigned char		protocol;
+-	unsigned char		copy:1;
+-	void			*data;
+-	const char		*name;
++	int			no_policy;
+ };
+ 
+ #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+ struct inet6_protocol 
+ {
+-	int	(*handler)(struct sk_buff *skb);
++	int	(*handler)(struct sk_buff **skb, unsigned int *nhoffp);
+ 
+ 	void	(*err_handler)(struct sk_buff *skb,
+ 			       struct inet6_skb_parm *opt,
+ 			       int type, int code, int offset,
+ 			       __u32 info);
+-	struct inet6_protocol *next;
+-	unsigned char	protocol;
+-	unsigned char	copy:1;
+-	void		*data;
+-	const char	*name;
++	unsigned int	flags;	/* INET6_PROTO_xxx */
+ };
+ 
++#define INET6_PROTO_NOPOLICY	0x1
++#define INET6_PROTO_FINAL	0x2
+ #endif
+ 
+ /* This is used to register socket interfaces for IP protocols.  */
+@@ -93,14 +87,14 @@
+ extern struct list_head inetsw6[SOCK_MAX];
+ #endif
+ 
+-extern void	inet_add_protocol(struct inet_protocol *prot);
+-extern int	inet_del_protocol(struct inet_protocol *prot);
++extern int	inet_add_protocol(struct inet_protocol *prot, unsigned char num);
++extern int	inet_del_protocol(struct inet_protocol *prot, unsigned char num);
+ extern void	inet_register_protosw(struct inet_protosw *p);
+ extern void	inet_unregister_protosw(struct inet_protosw *p);
+ 
+ #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
+-extern void	inet6_add_protocol(struct inet6_protocol *prot);
+-extern int	inet6_del_protocol(struct inet6_protocol *prot);
++extern int	inet6_add_protocol(struct inet6_protocol *prot, unsigned char num);
++extern int	inet6_del_protocol(struct inet6_protocol *prot, unsigned char num);
+ extern void	inet6_register_protosw(struct inet_protosw *p);
+ extern void	inet6_unregister_protosw(struct inet_protosw *p);
+ #endif
+diff -Nru a/include/net/raw.h b/include/net/raw.h
+--- a/include/net/raw.h	2005-02-13 21:25:10 +11:00
++++ b/include/net/raw.h	2005-02-13 21:25:10 +11:00
+@@ -37,6 +37,6 @@
+ 				    unsigned long raddr, unsigned long laddr,
+ 				    int dif);
+ 
+-extern struct sock *raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
++extern void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash);
+ 
+ #endif	/* _RAW_H */
+diff -Nru a/include/net/rawv6.h b/include/net/rawv6.h
+--- a/include/net/rawv6.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/rawv6.h	2005-02-13 21:25:09 +11:00
+@@ -7,9 +7,7 @@
+ extern struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE];
+ extern rwlock_t raw_v6_lock;
+ 
+-extern struct sock * ipv6_raw_deliver(struct sk_buff *skb,
+-				      int nexthdr);
+-
++extern void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr);
+ 
+ extern struct sock *__raw_v6_lookup(struct sock *sk, unsigned short num,
+ 				    struct in6_addr *loc_addr, struct in6_addr *rmt_addr);
+diff -Nru a/include/net/route.h b/include/net/route.h
+--- a/include/net/route.h	2005-02-13 21:25:08 +11:00
++++ b/include/net/route.h	2005-02-13 21:25:08 +11:00
+@@ -27,6 +27,7 @@
+ #include <linux/config.h>
+ #include <net/dst.h>
+ #include <net/inetpeer.h>
++#include <net/flow.h>
+ #include <linux/in_route.h>
+ #include <linux/rtnetlink.h>
+ #include <linux/route.h>
+@@ -45,19 +46,6 @@
+ 
+ #define RT_CONN_FLAGS(sk)   (RT_TOS(sk->protinfo.af_inet.tos) | sk->localroute)
+ 
+-struct rt_key
+-{
+-	__u32			dst;
+-	__u32			src;
+-	int			iif;
+-	int			oif;
+-#ifdef CONFIG_IP_ROUTE_FWMARK
+-	__u32			fwmark;
+-#endif
+-	__u8			tos;
+-	__u8			scope;
+-};
+-
+ struct inet_peer;
+ struct rtable
+ {
+@@ -78,7 +66,7 @@
+ 	__u32			rt_gateway;
+ 
+ 	/* Cache lookup keys */
+-	struct rt_key		key;
++	struct flowi		fl;
+ 
+ 	/* Miscellaneous cached information */
+ 	__u32			rt_spec_dst; /* RFC1122 specific destination */
+@@ -126,10 +114,11 @@
+ 				       u32 src, u8 tos, struct net_device *dev);
+ extern void		ip_rt_advice(struct rtable **rp, int advice);
+ extern void		rt_cache_flush(int how);
+-extern int		ip_route_output_key(struct rtable **, const struct rt_key *key);
++extern int		__ip_route_output_key(struct rtable **, const struct flowi *flp);
++extern int		ip_route_output_key(struct rtable **, struct flowi *flp);
++extern int		ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags);
+ extern int		ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin);
+ extern unsigned short	ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu);
+-extern void		ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu);
+ extern void		ip_rt_send_redirect(struct sk_buff *skb);
+ 
+ extern unsigned		inet_addr_type(u32 addr);
+@@ -138,16 +127,6 @@
+ extern void		ip_rt_get_source(u8 *src, struct rtable *rt);
+ extern int		ip_rt_dump(struct sk_buff *skb,  struct netlink_callback *cb);
+ 
+-/* Deprecated: use ip_route_output_key directly */
+-static inline int ip_route_output(struct rtable **rp,
+-				      u32 daddr, u32 saddr, u32 tos, int oif)
+-{
+-	struct rt_key key = { dst:daddr, src:saddr, oif:oif, tos:tos };
+-
+-	return ip_route_output_key(rp, &key);
+-}
+-
+-
+ static inline void ip_rt_put(struct rtable * rt)
+ {
+ 	if (rt)
+@@ -163,17 +142,47 @@
+ 	return ip_tos2prio[IPTOS_TOS(tos)>>1];
+ }
+ 
+-static inline int ip_route_connect(struct rtable **rp, u32 dst, u32 src, u32 tos, int oif)
+-{
++static inline int ip_route_connect(struct rtable **rp, u32 dst,
++				   u32 src, u32 tos, int oif, u8 protocol,
++				   u16 sport, u16 dport, struct sock *sk)
++{
++	struct flowi fl = { .oif = oif,
++			    .nl_u = { .ip4_u = { .daddr = dst,
++						 .saddr = src,
++						 .tos   = tos } },
++			    .proto = protocol,
++			    .uli_u = { .ports =
++				       { .sport = sport,
++					 .dport = dport } } };
++
+ 	int err;
+-	err = ip_route_output(rp, dst, src, tos, oif);
+-	if (err || (dst && src))
+-		return err;
+-	dst = (*rp)->rt_dst;
+-	src = (*rp)->rt_src;
+-	ip_rt_put(*rp);
+-	*rp = NULL;
+-	return ip_route_output(rp, dst, src, tos, oif);
++	if (!dst || !src) {
++		err = __ip_route_output_key(rp, &fl);
++		if (err)
++			return err;
++		fl.fl4_dst = (*rp)->rt_dst;
++		fl.fl4_src = (*rp)->rt_src;
++		ip_rt_put(*rp);
++		*rp = NULL;
++	}
++	return ip_route_output_flow(rp, &fl, sk, 0);
++}
++
++static inline int ip_route_newports(struct rtable **rp, u16 sport, u16 dport,
++				    struct sock *sk)
++{
++	if (sport != (*rp)->fl.fl_ip_sport ||
++	    dport != (*rp)->fl.fl_ip_dport) {
++		struct flowi fl;
++
++		memcpy(&fl, &(*rp)->fl, sizeof(fl));
++		fl.fl_ip_sport = sport;
++		fl.fl_ip_dport = dport;
++		ip_rt_put(*rp);
++		*rp = NULL;
++		return ip_route_output_flow(rp, &fl, sk, 0);
++	}
++	return 0;
+ }
+ 
+ extern void rt_bind_peer(struct rtable *rt, int create);
+diff -Nru a/include/net/sctp/compat.h b/include/net/sctp/compat.h
+--- a/include/net/sctp/compat.h	2005-02-13 21:25:08 +11:00
++++ b/include/net/sctp/compat.h	2005-02-13 21:25:08 +11:00
+@@ -55,14 +55,10 @@
+ 	extern type name[]
+ #define SNMP_DEC_STATS(mib, field) ((mib)[2*smp_processor_id()+!in_softirq()].field--)
+ 
+-#define inet_sk(__sk) (&(((struct sock *)__sk)->protinfo.af_inet))
+-#define inet6_sk(__sk) (&(((struct sock *)__sk)->net_pinfo.af_inet6))
+-
+ #define virt_addr_valid(x)	VALID_PAGE(virt_to_page((x)))
+ #define sock_owned_by_user(sk)  ((sk)->lock.users)
+ #define sk_set_owner(x, y)
+ #define __unsafe(x)
+-#define dst_pmtu(x) ((x)->pmtu)
+ 
+ /*
+  * find last bit set.
+diff -Nru a/include/net/sock.h b/include/net/sock.h
+--- a/include/net/sock.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/sock.h	2005-02-13 21:25:09 +11:00
+@@ -45,6 +45,8 @@
+ #include <net/if_inet6.h>	/* struct ipv6_mc_socklist */
+ #endif
+ 
++#include <net/flow.h>
++
+ #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
+ #include <linux/icmp.h>
+ #endif
+@@ -184,6 +186,12 @@
+ 
+ 	struct ipv6_txoptions	*opt;
+ 	struct sk_buff		*pktoptions;
++	struct {
++		struct ipv6_txoptions *opt;
++		struct rt6_info	*rt;
++		struct flowi fl;
++		int hop_limit;
++	} cork;
+ };
+ 
+ struct raw6_opt {
+@@ -210,7 +218,7 @@
+ #if defined(CONFIG_INET) || defined (CONFIG_INET_MODULE)
+ struct inet_opt
+ {
+-	int			ttl;			/* TTL setting */
++	int			uc_ttl;			/* Unicast TTL */
+ 	int			tos;			/* TOS */
+ 	unsigned	   	cmsg_flags;
+ 	struct ip_options	*opt;
+@@ -224,7 +232,24 @@
+ 	int			mc_index;		/* Multicast device index */
+ 	__u32			mc_addr;
+ 	struct ip_mc_socklist	*mc_list;		/* Group array */
++	struct page		*sndmsg_page;	/* Cached page for sendmsg */
++	u32			sndmsg_off;	/* Cached offset for sendmsg */
++	/*
++	 * Following members are used to retain the infomation to build
++	 * an ip header on each ip fragmentation while the socket is corked.
++	 */
++	struct {
++		unsigned int		flags;
++		unsigned int		fragsize;
++		struct ip_options	*opt;
++		struct rtable		*rt;
++		int			length; /* Total length of all frames */
++		u32			addr;
++	} cork;
+ };
++
++#define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
++
+ #endif
+ 
+ #if defined(CONFIG_PPPOE) || defined (CONFIG_PPPOE_MODULE)
+@@ -250,6 +275,14 @@
+ #define pppoe_relay	proto.pppoe.relay
+ #endif
+ 
++#if defined(CONFIG_NET_KEY) || defined(CONFIG_NET_KEY_MODULE)
++struct pfkey_opt {
++	int	registered;
++	int	promisc;
++};
++#define pfkey_sk(__sk) ((__sk)->protinfo.pf_key)
++#endif
++
+ /* This defines a selective acknowledgement block. */
+ struct tcp_sack_block {
+ 	__u32	start_seq;
+@@ -314,6 +347,7 @@
+ 	__u16	mss_cache;	/* Cached effective mss, not including SACKS */
+ 	__u16	mss_clamp;	/* Maximal mss, negotiated at connection setup */
+ 	__u16	ext_header_len;	/* Network protocol overhead (IP/IPv6 options) */
++	__u16	ext2_header_len;/* Options depending on route */
+ 	__u8	ca_state;	/* State of fast-retransmit machine 	*/
+ 	__u8	retransmits;	/* Number of unrecovered RTO timeouts.	*/
+ 
+@@ -354,8 +388,6 @@
+ 
+ 	struct tcp_func		*af_specific;	/* Operations which are AF_INET{4,6} specific	*/
+ 	struct sk_buff		*send_head;	/* Front of stuff to transmit			*/
+-	struct page		*sndmsg_page;	/* Cached page for sendmsg			*/
+-	u32			sndmsg_off;	/* Cached offset for sendmsg			*/
+ 
+  	__u32	rcv_wnd;	/* Current receiver window		*/
+ 	__u32	rcv_wup;	/* rcv_nxt on last window update sent	*/
+@@ -488,6 +520,20 @@
+ 	} bictcp;
+ };
+ 
++struct udp_opt {
++	int		pending;	/* Any pending frames ? */
++	unsigned int	corkflag;	/* Cork is required */
++	__u16		encap_type;	/* Is this an Encapsulation socket? */
++	/*
++	 * Following members retains the infomation to create a UDP header
++	 * when the socket is uncorked.
++	 */
++	u32		saddr;		/* source address */
++	u32		daddr;		/* destination address */
++	__u16		sport;		/* source port */
++	__u16		dport;		/* destination port */
++	__u16		len;		/* total length of pending frames */
++};
+  	
+ /*
+  * This structure really needs to be cleaned up.
+@@ -583,6 +629,7 @@
+ 	wait_queue_head_t	*sleep;		/* Sock wait queue			*/
+ 	struct dst_entry	*dst_cache;	/* Destination cache			*/
+ 	rwlock_t		dst_lock;
++	struct xfrm_policy	*policy[2];
+ 	atomic_t		rmem_alloc;	/* Receive queue bytes committed	*/
+ 	struct sk_buff_head	receive_queue;	/* Incoming packets			*/
+ 	atomic_t		wmem_alloc;	/* Transmit queue bytes committed	*/
+@@ -639,10 +686,12 @@
+ 	union {
+ 		struct ipv6_pinfo	af_inet6;
+ 	} net_pinfo;
++#define inet6_sk(sk)	(&(sk)->net_pinfo.af_inet6)
+ #endif
+ 
+ 	union {
+ 		struct tcp_opt		af_tcp;
++		struct udp_opt		af_udp;
+ #if defined(CONFIG_IP_SCTP) || defined (CONFIG_IP_SCTP_MODULE)
+ 		struct sctp_opt		af_sctp;
+ #endif
+@@ -657,6 +706,10 @@
+ #endif /* CONFIG_SPX */
+ 
+ 	} tp_pinfo;
++#define tcp_sk(sk)		(&(sk)->tp_pinfo.af_tcp)
++#define udp_sk(sk) 		(&(sk)->tp_pinfo.af_udp)
++#define raw_sk(sk)		(&(sk)->tp_pinfo.tp_raw4)
++#define raw6_sk(sk)		(&(sk)->tp_pinfo.tp_raw)
+ 
+ 	int			err, err_soft;	/* Soft holds errors that don't
+ 						   cause failure but are the cause
+@@ -727,8 +780,11 @@
+ #if defined(CONFIG_WAN_ROUTER) || defined(CONFIG_WAN_ROUTER_MODULE)
+                struct wanpipe_opt      *af_wanpipe;
+ #endif
++#if defined(CONFIG_NET_KEY) || defined(CONFIG_NET_KEY_MODULE)
++		struct pfkey_opt	*pf_key;
++#endif
+ 	} protinfo;  		
+-
++#define inet_sk(sk)	(&(sk)->protinfo.af_inet)
+ 
+ 	/* This part is used for the timeout functions. */
+ 	struct timer_list	timer;		/* This is the sock cleanup timer. */
+@@ -792,6 +848,8 @@
+ 	int			(*recvmsg)(struct sock *sk, struct msghdr *msg,
+ 					int len, int noblock, int flags, 
+ 					int *addr_len);
++	int			(*sendpage)(struct sock *sk, struct page *page,
++					int offset, size_t size, int flags);
+ 	int			(*bind)(struct sock *sk, 
+ 					struct sockaddr *uaddr, int addr_len);
+ 
+diff -Nru a/include/net/tcp.h b/include/net/tcp.h
+--- a/include/net/tcp.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/tcp.h	2005-02-13 21:25:09 +11:00
+@@ -575,13 +575,6 @@
+ /*
+  *	Pointers to address related TCP functions
+  *	(i.e. things that depend on the address family)
+- *
+- * 	BUGGG_FUTURE: all the idea behind this struct is wrong.
+- *	It mixes socket frontend with transport function.
+- *	With port sharing between IPv6/v4 it gives the only advantage,
+- *	only poor IPv6 needs to permanently recheck, that it
+- *	is still IPv6 8)8) It must be cleaned up as soon as possible.
+- *						--ANK (980802)
+  */
+ 
+ struct tcp_func {
+@@ -940,9 +933,12 @@
+ 	struct dst_entry *dst = __sk_dst_get(sk);
+ 	int mss_now = tp->mss_cache; 
+ 
+-	if (dst && dst->pmtu != tp->pmtu_cookie)
+-		mss_now = tcp_sync_mss(sk, dst->pmtu);
+-
++	if (dst) {
++		u32 mtu = dst_pmtu(dst);
++		if (mtu != tp->pmtu_cookie ||
++		    tp->ext2_header_len != dst->header_len)
++			mss_now = tcp_sync_mss(sk, mtu);
++	}
+ 	if (tp->eff_sacks)
+ 		mss_now -= (TCPOLEN_SACK_BASE_ALIGNED +
+ 			    (tp->eff_sacks * TCPOLEN_SACK_PERBLOCK));
+@@ -1257,7 +1253,7 @@
+ 	}
+ }
+ 
+-extern __u32 tcp_init_cwnd(struct tcp_opt *tp);
++extern __u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst);
+ 
+ /* Slow start with delack produces 3 packets of burst, so that
+  * it is safe "de facto".
+diff -Nru a/include/net/transp_v6.h b/include/net/transp_v6.h
+--- a/include/net/transp_v6.h	2005-02-13 21:25:09 +11:00
++++ b/include/net/transp_v6.h	2005-02-13 21:25:09 +11:00
+@@ -17,6 +17,13 @@
+ 
+ extern void				ipv6_frag_init(void);
+ 
++/* extention headers */
++extern void				ipv6_rthdr_init(void);
++extern void				ipv6_frag_init(void);
++extern void				ipv6_nodata_init(void);
++extern void				ipv6_destopt_init(void);
++
++/* transport protocols */
+ extern void				rawv6_init(void);
+ extern void				udpv6_init(void);
+ extern void				tcpv6_init(void);
+diff -Nru a/include/net/xfrm.h b/include/net/xfrm.h
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/include/net/xfrm.h	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,905 @@
++#ifndef _NET_XFRM_H
++#define _NET_XFRM_H
++
++#include <linux/xfrm.h>
++#include <linux/spinlock.h>
++#include <linux/list.h>
++#include <linux/skbuff.h>
++#include <linux/netdevice.h>
++#include <linux/crypto.h>
++#include <linux/pfkeyv2.h>
++#include <linux/in6.h>
++
++#include <net/sock.h>
++#include <net/dst.h>
++#include <net/route.h>
++#include <net/ipv6.h>
++#include <net/ip6_fib.h>
++
++#define XFRM_ALIGN8(len)	(((len) + 7) & ~7)
++
++extern struct semaphore xfrm_cfg_sem;
++
++/* Organization of SPD aka "XFRM rules"
++   ------------------------------------
++
++   Basic objects:
++   - policy rule, struct xfrm_policy (=SPD entry)
++   - bundle of transformations, struct dst_entry == struct xfrm_dst (=SA bundle)
++   - instance of a transformer, struct xfrm_state (=SA)
++   - template to clone xfrm_state, struct xfrm_tmpl
++
++   SPD is plain linear list of xfrm_policy rules, ordered by priority.
++   (To be compatible with existing pfkeyv2 implementations,
++   many rules with priority of 0x7fffffff are allowed to exist and
++   such rules are ordered in an unpredictable way, thanks to bsd folks.)
++
++   Lookup is plain linear search until the first match with selector.
++
++   If "action" is "block", then we prohibit the flow, otherwise:
++   if "xfrms_nr" is zero, the flow passes untransformed. Otherwise,
++   policy entry has list of up to XFRM_MAX_DEPTH transformations,
++   described by templates xfrm_tmpl. Each template is resolved
++   to a complete xfrm_state (see below) and we pack bundle of transformations
++   to a dst_entry returned to requestor.
++
++   dst -. xfrm  .-> xfrm_state #1
++    |---. child .-> dst -. xfrm .-> xfrm_state #2
++                     |---. child .-> dst -. xfrm .-> xfrm_state #3
++                                      |---. child .-> NULL
++
++   Bundles are cached at xrfm_policy struct (field ->bundles).
++
++
++   Resolution of xrfm_tmpl
++   -----------------------
++   Template contains:
++   1. ->mode		Mode: transport or tunnel
++   2. ->id.proto	Protocol: AH/ESP/IPCOMP
++   3. ->id.daddr	Remote tunnel endpoint, ignored for transport mode.
++      Q: allow to resolve security gateway?
++   4. ->id.spi          If not zero, static SPI.
++   5. ->saddr		Local tunnel endpoint, ignored for transport mode.
++   6. ->algos		List of allowed algos. Plain bitmask now.
++      Q: ealgos, aalgos, calgos. What a mess...
++   7. ->share		Sharing mode.
++      Q: how to implement private sharing mode? To add struct sock* to
++      flow id?
++
++   Having this template we search through SAD searching for entries
++   with appropriate mode/proto/algo, permitted by selector.
++   If no appropriate entry found, it is requested from key manager.
++
++   PROBLEMS:
++   Q: How to find all the bundles referring to a physical path for
++      PMTU discovery? Seems, dst should contain list of all parents...
++      and enter to infinite locking hierarchy disaster.
++      No! It is easier, we will not search for them, let them find us.
++      We add genid to each dst plus pointer to genid of raw IP route,
++      pmtu disc will update pmtu on raw IP route and increase its genid.
++      dst_check() will see this for top level and trigger resyncing
++      metrics. Plus, it will be made via sk->dst_cache. Solved.
++ */
++
++/* Full description of state of transformer. */
++struct xfrm_state
++{
++	/* Note: bydst is re-used during gc */
++	struct list_head	bydst;
++	struct list_head	byspi;
++
++	atomic_t		refcnt;
++	spinlock_t		lock;
++
++	struct xfrm_id		id;
++	struct xfrm_selector	sel;
++
++	/* Key manger bits */
++	struct {
++		u8		state;
++		u8		dying;
++		u32		seq;
++	} km;
++
++	/* Parameters of this state. */
++	struct {
++		u32		reqid;
++		u8		mode;
++		u8		replay_window;
++		u8		aalgo, ealgo, calgo;
++		u8		flags;
++		u16		family;
++		xfrm_address_t	saddr;
++		int		header_len;
++		int		trailer_len;
++	} props;
++
++	struct xfrm_lifetime_cfg lft;
++
++	/* Data for transformer */
++	struct xfrm_algo	*aalg;
++	struct xfrm_algo	*ealg;
++	struct xfrm_algo	*calg;
++
++	/* Data for encapsulator */
++	struct xfrm_encap_tmpl	*encap;
++
++	/* IPComp needs an IPIP tunnel for handling uncompressed packets */
++	struct xfrm_state	*tunnel;
++
++	/* If a tunnel, number of users + 1 */
++	atomic_t		tunnel_users;
++
++	/* State for replay detection */
++	struct xfrm_replay_state replay;
++
++	/* Statistics */
++	struct xfrm_stats	stats;
++
++	struct xfrm_lifetime_cur curlft;
++	struct timer_list	timer;
++
++	/* Reference to data common to all the instances of this
++	 * transformer. */
++	struct xfrm_type	*type;
++
++	/* Private data of this transformer, format is opaque,
++	 * interpreted by xfrm_type methods. */
++	void			*data;
++};
++
++enum {
++	XFRM_STATE_VOID,
++	XFRM_STATE_ACQ,
++	XFRM_STATE_VALID,
++	XFRM_STATE_ERROR,
++	XFRM_STATE_EXPIRED,
++	XFRM_STATE_DEAD
++};
++
++struct xfrm_type;
++struct xfrm_dst;
++struct xfrm_policy_afinfo {
++	unsigned short		family;
++	rwlock_t		lock;
++	struct xfrm_type_map	*type_map;
++	struct dst_ops		*dst_ops;
++	void			(*garbage_collect)(void);
++	int			(*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl);
++	struct dst_entry	*(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy);
++	int			(*bundle_create)(struct xfrm_policy *policy, 
++						 struct xfrm_state **xfrm, 
++						 int nx,
++						 struct flowi *fl, 
++						 struct dst_entry **dst_p);
++	void			(*decode_session)(struct sk_buff *skb,
++						  struct flowi *fl);
++};
++
++extern int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo);
++extern int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo);
++
++#define XFRM_ACQ_EXPIRES	30
++
++struct xfrm_tmpl;
++struct xfrm_state_afinfo {
++	unsigned short		family;
++	rwlock_t		lock;
++	struct list_head	*state_bydst;
++	struct list_head	*state_byspi;
++	void			(*init_tempsel)(struct xfrm_state *x, struct flowi *fl,
++						struct xfrm_tmpl *tmpl,
++						xfrm_address_t *daddr, xfrm_address_t *saddr);
++	struct xfrm_state	*(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto);
++	struct xfrm_state	*(*find_acq)(u8 mode, u32 reqid, u8 proto, 
++					     xfrm_address_t *daddr, xfrm_address_t *saddr, 
++					     int create);
++};
++
++extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo);
++extern int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo);
++
++extern void xfrm_state_delete_tunnel(struct xfrm_state *x);
++
++struct xfrm_decap_state;
++struct xfrm_type
++{
++	char			*description;
++	struct module		*owner;
++	__u8			proto;
++
++	int			(*init_state)(struct xfrm_state *x, void *args);
++	void			(*destructor)(struct xfrm_state *);
++	int			(*input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb);
++	int			(*post_input)(struct xfrm_state *, struct xfrm_decap_state *, struct sk_buff *skb);
++	int			(*output)(struct sk_buff *skb);
++	/* Estimate maximal size of result of transformation of a dgram */
++	u32			(*get_max_size)(struct xfrm_state *, int size);
++};
++
++struct xfrm_type_map {
++	rwlock_t		lock;
++	struct xfrm_type	*map[256];
++};
++
++extern int xfrm_register_type(struct xfrm_type *type, unsigned short family);
++extern int xfrm_unregister_type(struct xfrm_type *type, unsigned short family);
++extern struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family);
++extern void xfrm_put_type(struct xfrm_type *type);
++
++struct xfrm_tmpl
++{
++/* id in template is interpreted as:
++ * daddr - destination of tunnel, may be zero for transport mode.
++ * spi   - zero to acquire spi. Not zero if spi is static, then
++ *	   daddr must be fixed too.
++ * proto - AH/ESP/IPCOMP
++ */
++	struct xfrm_id		id;
++
++/* Source address of tunnel. Ignored, if it is not a tunnel. */
++	xfrm_address_t		saddr;
++
++	__u32			reqid;
++
++/* Mode: transport/tunnel */
++	__u8			mode;
++
++/* Sharing mode: unique, this session only, this user only etc. */
++	__u8			share;
++
++/* May skip this transfomration if no SA is found */
++	__u8			optional;
++
++/* Bit mask of algos allowed for acquisition */
++	__u32			aalgos;
++	__u32			ealgos;
++	__u32			calgos;
++};
++
++#define XFRM_MAX_DEPTH		4
++
++struct xfrm_policy
++{
++	struct xfrm_policy	*next;
++	struct list_head	list;
++
++	/* This lock only affects elements except for entry. */
++	rwlock_t		lock;
++	atomic_t		refcnt;
++	struct timer_list	timer;
++
++	u32			priority;
++	u32			index;
++	struct xfrm_selector	selector;
++	struct xfrm_lifetime_cfg lft;
++	struct xfrm_lifetime_cur curlft;
++	struct dst_entry       *bundles;
++	__u16			family;
++	__u8			action;
++	__u8			flags;
++	__u8			dead;
++	__u8			xfrm_nr;
++	struct xfrm_tmpl       	xfrm_vec[XFRM_MAX_DEPTH];
++};
++
++#define XFRM_KM_TIMEOUT		30
++
++struct xfrm_mgr
++{
++	struct list_head	list;
++	char			*id;
++	int			(*notify)(struct xfrm_state *x, int event);
++	int			(*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir);
++	struct xfrm_policy	*(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir);
++	int			(*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
++	int			(*notify_policy)(struct xfrm_policy *x, int dir, int event);
++};
++
++extern int xfrm_register_km(struct xfrm_mgr *km);
++extern int xfrm_unregister_km(struct xfrm_mgr *km);
++
++
++extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
++
++static inline void xfrm_pol_hold(struct xfrm_policy *policy)
++{
++	if (likely(policy != NULL))
++		atomic_inc(&policy->refcnt);
++}
++
++extern void __xfrm_policy_destroy(struct xfrm_policy *policy);
++
++static inline void xfrm_pol_put(struct xfrm_policy *policy)
++{
++	if (atomic_dec_and_test(&policy->refcnt))
++		__xfrm_policy_destroy(policy);
++}
++
++#define XFRM_DST_HSIZE		1024
++
++static __inline__
++unsigned __xfrm4_dst_hash(xfrm_address_t *addr)
++{
++	unsigned h;
++	h = ntohl(addr->a4);
++	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
++	return h;
++}
++
++static __inline__
++unsigned __xfrm6_dst_hash(xfrm_address_t *addr)
++{
++	unsigned h;
++	h = ntohl(addr->a6[2]^addr->a6[3]);
++	h = (h ^ (h>>16)) % XFRM_DST_HSIZE;
++	return h;
++}
++
++static __inline__
++unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family)
++{
++	switch (family) {
++	case AF_INET:
++		return __xfrm4_dst_hash(addr);
++	case AF_INET6:
++		return __xfrm6_dst_hash(addr);
++	}
++	return 0;
++}
++
++static __inline__
++unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
++{
++	unsigned h;
++	h = ntohl(addr->a4^spi^proto);
++	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
++	return h;
++}
++
++static __inline__
++unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto)
++{
++	unsigned h;
++	h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto);
++	h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE;
++	return h;
++}
++
++static __inline__
++unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family)
++{
++	switch (family) {
++	case AF_INET:
++		return __xfrm4_spi_hash(addr, spi, proto);
++	case AF_INET6:
++		return __xfrm6_spi_hash(addr, spi, proto);
++	}
++	return 0;	/*XXX*/
++}
++
++extern void __xfrm_state_destroy(struct xfrm_state *);
++
++static inline void xfrm_state_put(struct xfrm_state *x)
++{
++	if (atomic_dec_and_test(&x->refcnt))
++		__xfrm_state_destroy(x);
++}
++
++static inline void xfrm_state_hold(struct xfrm_state *x)
++{
++	atomic_inc(&x->refcnt);
++}
++
++static __inline__ int addr_match(void *token1, void *token2, int prefixlen)
++{
++	__u32 *a1 = token1;
++	__u32 *a2 = token2;
++	int pdw;
++	int pbi;
++
++	pdw = prefixlen >> 5;	  /* num of whole __u32 in prefix */
++	pbi = prefixlen &  0x1f;  /* num of bits in incomplete u32 in prefix */
++
++	if (pdw)
++		if (memcmp(a1, a2, pdw << 2))
++			return 0;
++
++	if (pbi) {
++		__u32 mask;
++
++		mask = htonl((0xffffffff) << (32 - pbi));
++
++		if ((a1[pdw] ^ a2[pdw]) & mask)
++			return 0;
++	}
++
++	return 1;
++}
++
++static __inline__
++u16 xfrm_flowi_sport(struct flowi *fl)
++{
++	u16 port;
++	switch(fl->proto) {
++	case IPPROTO_TCP:
++	case IPPROTO_UDP:
++	case IPPROTO_SCTP:
++		port = fl->fl_ip_sport;
++		break;
++	case IPPROTO_ICMP:
++	case IPPROTO_ICMPV6:
++		port = htons(fl->fl_icmp_type);
++		break;
++	default:
++		port = 0;	/*XXX*/
++	}
++	return port;
++}
++
++static __inline__
++u16 xfrm_flowi_dport(struct flowi *fl)
++{
++	u16 port;
++	switch(fl->proto) {
++	case IPPROTO_TCP:
++	case IPPROTO_UDP:
++	case IPPROTO_SCTP:
++		port = fl->fl_ip_dport;
++		break;
++	case IPPROTO_ICMP:
++	case IPPROTO_ICMPV6:
++		port = htons(fl->fl_icmp_code);
++		break;
++	default:
++		port = 0;	/*XXX*/
++	}
++	return port;
++}
++
++static inline int
++__xfrm4_selector_match(struct xfrm_selector *sel, struct flowi *fl)
++{
++	return  addr_match(&fl->fl4_dst, &sel->daddr, sel->prefixlen_d) &&
++		addr_match(&fl->fl4_src, &sel->saddr, sel->prefixlen_s) &&
++		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
++		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
++		(fl->proto == sel->proto || !sel->proto) &&
++		(fl->oif == sel->ifindex || !sel->ifindex);
++}
++
++static inline int
++__xfrm6_selector_match(struct xfrm_selector *sel, struct flowi *fl)
++{
++	return  addr_match(&fl->fl6_dst, &sel->daddr, sel->prefixlen_d) &&
++		addr_match(&fl->fl6_src, &sel->saddr, sel->prefixlen_s) &&
++		!((xfrm_flowi_dport(fl) ^ sel->dport) & sel->dport_mask) &&
++		!((xfrm_flowi_sport(fl) ^ sel->sport) & sel->sport_mask) &&
++		(fl->proto == sel->proto || !sel->proto) &&
++		(fl->oif == sel->ifindex || !sel->ifindex);
++}
++
++static inline int
++xfrm_selector_match(struct xfrm_selector *sel, struct flowi *fl,
++		    unsigned short family)
++{
++	switch (family) {
++	case AF_INET:
++		return __xfrm4_selector_match(sel, fl);
++	case AF_INET6:
++		return __xfrm6_selector_match(sel, fl);
++	}
++	return 0;
++}
++
++/* A struct encoding bundle of transformations to apply to some set of flow.
++ *
++ * dst->child points to the next element of bundle.
++ * dst->xfrm  points to an instanse of transformer.
++ *
++ * Due to unfortunate limitations of current routing cache, which we
++ * have no time to fix, it mirrors struct rtable and bound to the same
++ * routing key, including saddr,daddr. However, we can have many of
++ * bundles differing by session id. All the bundles grow from a parent
++ * policy rule.
++ */
++struct xfrm_dst
++{
++	union {
++		struct xfrm_dst		*next;
++		struct dst_entry	dst;
++		struct rtable		rt;
++		struct rt6_info		rt6;
++	} u;
++};
++
++/* Decapsulation state, used by the input to store data during
++ * decapsulation procedure, to be used later (during the policy
++ * check
++ */
++struct xfrm_decap_state {
++	char	decap_data[20];
++	__u16	decap_type;
++};   
++
++struct sec_decap_state {
++	struct xfrm_state	*xvec;
++	struct xfrm_decap_state decap;
++};
++
++struct sec_path
++{
++	atomic_t		refcnt;
++	int			len;
++	struct sec_decap_state	x[XFRM_MAX_DEPTH];
++};
++
++static inline struct sec_path *
++secpath_get(struct sec_path *sp)
++{
++	if (sp)
++		atomic_inc(&sp->refcnt);
++	return sp;
++}
++
++extern void __secpath_destroy(struct sec_path *sp);
++
++static inline void
++secpath_put(struct sec_path *sp)
++{
++	if (sp && atomic_dec_and_test(&sp->refcnt))
++		__secpath_destroy(sp);
++}
++
++extern struct sec_path *secpath_dup(struct sec_path *src);
++
++static inline void
++secpath_reset(struct sk_buff *skb)
++{
++#ifdef CONFIG_XFRM
++	secpath_put(skb->sp);
++	skb->sp = NULL;
++#endif
++}
++
++static inline int
++__xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x)
++{
++	return	(tmpl->saddr.a4 &&
++		 tmpl->saddr.a4 != x->props.saddr.a4);
++}
++
++static inline int
++__xfrm6_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x)
++{
++	return	(!ipv6_addr_any((struct in6_addr*)&tmpl->saddr) &&
++		 ipv6_addr_cmp((struct in6_addr *)&tmpl->saddr, (struct in6_addr*)&x->props.saddr));
++}
++
++static inline int
++xfrm_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x, unsigned short family)
++{
++	switch (family) {
++	case AF_INET:
++		return __xfrm4_state_addr_cmp(tmpl, x);
++	case AF_INET6:
++		return __xfrm6_state_addr_cmp(tmpl, x);
++	}
++	return !0;
++}
++
++#ifdef CONFIG_XFRM
++
++extern int __xfrm_policy_check(struct sock *, int dir, struct sk_buff *skb, unsigned short family);
++
++static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
++{
++	if (sk && sk->policy[XFRM_POLICY_IN])
++		return __xfrm_policy_check(sk, dir, skb, family);
++		
++	return	(!xfrm_policy_list[dir] && !skb->sp) ||
++		(skb->dst->flags & DST_NOPOLICY) ||
++		__xfrm_policy_check(sk, dir, skb, family);
++}
++
++static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
++{
++	return xfrm_policy_check(sk, dir, skb, AF_INET);
++}
++
++static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
++{
++	return xfrm_policy_check(sk, dir, skb, AF_INET6);
++}
++
++
++extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family);
++
++static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family)
++{
++	return	!xfrm_policy_list[XFRM_POLICY_OUT] ||
++		(skb->dst->flags & DST_NOXFRM) ||
++		__xfrm_route_forward(skb, family);
++}
++
++static inline int xfrm4_route_forward(struct sk_buff *skb)
++{
++	return xfrm_route_forward(skb, AF_INET);
++}
++
++static inline int xfrm6_route_forward(struct sk_buff *skb)
++{
++	return xfrm_route_forward(skb, AF_INET6);
++}
++
++extern int __xfrm_sk_clone_policy(struct sock *sk);
++
++static inline int xfrm_sk_clone_policy(struct sock *sk)
++{
++	if (unlikely(sk->policy[0] || sk->policy[1]))
++		return __xfrm_sk_clone_policy(sk);
++	return 0;
++}
++
++extern void xfrm_policy_delete(struct xfrm_policy *pol, int dir);
++
++static inline void xfrm_sk_free_policy(struct sock *sk)
++{
++	if (unlikely(sk->policy[0] != NULL)) {
++		xfrm_policy_delete(sk->policy[0], XFRM_POLICY_MAX);
++		sk->policy[0] = NULL;
++	}
++	if (unlikely(sk->policy[1] != NULL)) {
++		xfrm_policy_delete(sk->policy[1], XFRM_POLICY_MAX+1);
++		sk->policy[1] = NULL;
++	}
++}
++
++#else
++
++static inline void xfrm_sk_free_policy(struct sock *sk) {}
++static inline int xfrm_sk_clone_policy(struct sock *sk) { return 0; }
++static inline int xfrm6_route_forward(struct sk_buff *skb) { return 1; }  
++static inline int xfrm4_route_forward(struct sk_buff *skb) { return 1; } 
++static inline int xfrm6_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
++{ 
++	return 1; 
++} 
++static inline int xfrm4_policy_check(struct sock *sk, int dir, struct sk_buff *skb)
++{
++	return 1;
++}
++static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family)
++{
++	return 1;
++}
++#endif
++
++static __inline__
++xfrm_address_t *xfrm_flowi_daddr(struct flowi *fl, unsigned short family)
++{
++	switch (family){
++	case AF_INET:
++		return (xfrm_address_t *)&fl->fl4_dst;
++	case AF_INET6:
++		return (xfrm_address_t *)&fl->fl6_dst;
++	}
++	return NULL;
++}
++
++static __inline__
++xfrm_address_t *xfrm_flowi_saddr(struct flowi *fl, unsigned short family)
++{
++	switch (family){
++	case AF_INET:
++		return (xfrm_address_t *)&fl->fl4_src;
++	case AF_INET6:
++		return (xfrm_address_t *)&fl->fl6_src;
++	}
++	return NULL;
++}
++
++static __inline__ int
++__xfrm4_state_addr_check(struct xfrm_state *x,
++			 xfrm_address_t *daddr, xfrm_address_t *saddr)
++{
++	if (daddr->a4 == x->id.daddr.a4 &&
++	    (saddr->a4 == x->props.saddr.a4 || !saddr->a4 || !x->props.saddr.a4))
++		return 1;
++	return 0;
++}
++
++static __inline__ int
++__xfrm6_state_addr_check(struct xfrm_state *x,
++			 xfrm_address_t *daddr, xfrm_address_t *saddr)
++{
++	if (!ipv6_addr_cmp((struct in6_addr *)daddr, (struct in6_addr *)&x->id.daddr) &&
++	    (!ipv6_addr_cmp((struct in6_addr *)saddr, (struct in6_addr *)&x->props.saddr)|| 
++	     ipv6_addr_any((struct in6_addr *)saddr) || 
++	     ipv6_addr_any((struct in6_addr *)&x->props.saddr)))
++		return 1;
++	return 0;
++}
++
++static __inline__ int
++xfrm_state_addr_check(struct xfrm_state *x,
++		      xfrm_address_t *daddr, xfrm_address_t *saddr,
++		      unsigned short family)
++{
++	switch (family) {
++	case AF_INET:
++		return __xfrm4_state_addr_check(x, daddr, saddr);
++	case AF_INET6:
++		return __xfrm6_state_addr_check(x, daddr, saddr);
++	}
++	return 0;
++}
++
++static inline int xfrm_state_kern(struct xfrm_state *x)
++{
++	return atomic_read(&x->tunnel_users);
++}
++
++/*
++ * xfrm algorithm information
++ */
++struct xfrm_algo_auth_info {
++	u16 icv_truncbits;
++	u16 icv_fullbits;
++};
++
++struct xfrm_algo_encr_info {
++	u16 blockbits;
++	u16 defkeybits;
++};
++
++struct xfrm_algo_comp_info {
++	u16 threshold;
++};
++
++struct xfrm_algo_desc {
++	char *name;
++	u8 available:1;
++	union {
++		struct xfrm_algo_auth_info auth;
++		struct xfrm_algo_encr_info encr;
++		struct xfrm_algo_comp_info comp;
++	} uinfo;
++	struct sadb_alg desc;
++};
++
++/* XFRM tunnel handlers.  */
++struct xfrm_tunnel {
++	int (*handler)(struct sk_buff *skb);
++	void (*err_handler)(struct sk_buff *skb, void *info);
++};
++
++struct xfrm6_tunnel {
++	int (*handler)(struct sk_buff **pskb, unsigned int *nhoffp);
++	void (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
++			    int type, int code, int offset, __u32 info);
++};
++
++extern void xfrm_init(void);
++extern void xfrm4_init(void);
++extern void xfrm4_fini(void);
++extern void xfrm6_init(void);
++extern void xfrm6_fini(void);
++extern void xfrm_state_init(void);
++extern void xfrm4_state_init(void);
++extern void xfrm4_state_fini(void);
++extern void xfrm6_state_init(void);
++extern void xfrm6_state_fini(void);
++
++extern int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), void *);
++extern struct xfrm_state *xfrm_state_alloc(void);
++extern struct xfrm_state *xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
++					  struct flowi *fl, struct xfrm_tmpl *tmpl,
++					  struct xfrm_policy *pol, int *err,
++					  unsigned short family);
++extern int xfrm_state_check_expire(struct xfrm_state *x);
++extern void xfrm_state_insert(struct xfrm_state *x);
++extern int xfrm_state_add(struct xfrm_state *x);
++extern int xfrm_state_update(struct xfrm_state *x);
++extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family);
++extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
++extern void xfrm_state_delete(struct xfrm_state *x);
++extern void xfrm_state_flush(u8 proto);
++extern int xfrm_replay_check(struct xfrm_state *x, u32 seq);
++extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq);
++extern int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb);
++extern int xfrm4_rcv(struct sk_buff *skb);
++extern int xfrm4_output(struct sk_buff *skb);
++extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler);
++extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler);
++extern int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi);
++extern int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
++extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler);
++extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler);
++extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr);
++extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr);
++extern u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr);
++extern int xfrm6_output(struct sk_buff *skb);
++
++#ifdef CONFIG_XFRM
++extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type);
++extern int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen);
++extern int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family);
++#else
++static inline int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
++{
++ 	return -ENOPROTOOPT;
++} 
++
++static inline int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
++{
++ 	/* should not happen */
++ 	kfree_skb(skb);
++	return 0;
++}
++static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsigned short family)
++{
++	return -EINVAL;
++} 
++#endif
++
++void xfrm_policy_init(void);
++struct xfrm_policy *xfrm_policy_alloc(int gfp);
++extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *);
++int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl);
++struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
++				      int delete);
++struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete);
++void xfrm_policy_flush(void);
++u32 xfrm_get_acqseq(void);
++void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi);
++struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
++				  xfrm_address_t *daddr, xfrm_address_t *saddr, 
++				  int create, unsigned short family);
++extern void xfrm_policy_flush(void);
++extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol);
++extern struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl);
++extern int xfrm_flush_bundles(void);
++
++extern wait_queue_head_t km_waitq;
++extern void km_state_expired(struct xfrm_state *x, int hard);
++extern int km_query(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *pol);
++extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport);
++extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard);
++
++extern void xfrm_input_init(void);
++extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq);
++
++extern void xfrm_probe_algs(void);
++extern int xfrm_count_auth_supported(void);
++extern int xfrm_count_enc_supported(void);
++extern struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx);
++extern struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx);
++extern struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx);
++extern struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id);
++extern struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id);
++extern struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id);
++extern struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name);
++extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name);
++extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name);
++
++struct crypto_tfm;
++typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int);
++
++extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
++			 int offset, int len, icv_update_fn_t icv_update);
++
++static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b,
++				int family)
++{
++	switch (family) {
++	default:
++	case AF_INET:
++		return a->a4 - b->a4;
++	case AF_INET6:
++		return ipv6_addr_cmp((struct in6_addr *)a,
++				     (struct in6_addr *)b);
++	}
++}
++
++#endif	/* _NET_XFRM_H */
+diff -Nru a/net/Config.in b/net/Config.in
+--- a/net/Config.in	2005-02-13 21:25:09 +11:00
++++ b/net/Config.in	2005-02-13 21:25:09 +11:00
+@@ -16,6 +16,7 @@
+ fi
+ bool 'Socket Filtering'  CONFIG_FILTER
+ tristate 'Unix domain sockets' CONFIG_UNIX
++tristate 'PF_KEY sockets' CONFIG_NET_KEY
+ bool 'TCP/IP networking' CONFIG_INET
+ if [ "$CONFIG_INET" = "y" ]; then
+    source net/ipv4/Config.in
+@@ -25,6 +26,28 @@
+       if [ "$CONFIG_IPV6" != "n" ]; then
+ 	 source net/ipv6/Config.in
+       fi
++   fi
++   if [ "$CONFIG_NET_KEY" != "n" -o \
++	"$CONFIG_NET_IPGRE" != "n" -o \
++	"$CONFIG_INET_AH" != "n" -o \
++	"$CONFIG_INET_ESP" != "n" -o \
++	"$CONFIG_INET_TUNNEL" != "n" ]; then
++      define_bool CONFIG_XFRM y
++   else
++      if [ "$CONFIG_IPV6" != "n" ]; then
++	 if [ "$CONFIG_INET6_AH" != "n" -o \
++	      "$CONFIG_INET6_ESP" != "n" -o \
++	      "$CONFIG_INET6_TUNNEL" != "n" ]; then
++	    define_bool CONFIG_XFRM y
++	 else
++	    bool '  XFRM support' CONFIG_XFRM
++	 fi
++      else
++	 bool '  XFRM support' CONFIG_XFRM
++      fi
++   fi
++   if [ "$CONFIG_XFRM" = "y" ]; then
++      source net/xfrm/Config.in
+    fi
+    if [ "$CONFIG_EXPERIMENTAL" = "y" ]; then
+       source net/khttpd/Config.in
+diff -Nru a/net/Makefile b/net/Makefile
+--- a/net/Makefile	2005-02-13 21:25:09 +11:00
++++ b/net/Makefile	2005-02-13 21:25:09 +11:00
+@@ -7,28 +7,24 @@
+ 
+ O_TARGET :=	network.o
+ 
+-mod-subdirs :=	ipv4/netfilter ipv6/netfilter ipx irda bluetooth atm netlink sched core sctp 802
++mod-subdirs :=	ipv4/netfilter ipv6 ipx irda bluetooth atm netlink sched core sctp 802 xfrm
+ export-objs :=	netsyms.o
+ 
+ subdir-y :=	core ethernet
+-subdir-m :=	ipv4 # hum?
++subdir-m :=	ipv4 xfrm # hum?
+ 
+ 
+ subdir-$(CONFIG_NET)		+= 802 sched netlink
+ subdir-$(CONFIG_IPV6)		+= ipv6
+ subdir-$(CONFIG_INET)		+= ipv4
++subdir-$(CONFIG_XFRM)		+= xfrm
+ subdir-$(CONFIG_NETFILTER)	+= ipv4/netfilter
+ subdir-$(CONFIG_UNIX)		+= unix
+ subdir-$(CONFIG_IP_SCTP)	+= sctp
+ 
+-ifneq ($(CONFIG_IPV6),n)
+-ifneq ($(CONFIG_IPV6),)
+-subdir-$(CONFIG_NETFILTER)	+= ipv6/netfilter
+-endif
+-endif
+-
+ subdir-$(CONFIG_KHTTPD)		+= khttpd
+ subdir-$(CONFIG_PACKET)		+= packet
++subdir-$(CONFIG_NET_KEY)	+= key
+ subdir-$(CONFIG_NET_SCHED)	+= sched
+ subdir-$(CONFIG_BRIDGE)		+= bridge
+ subdir-$(CONFIG_IPX)		+= ipx
+diff -Nru a/net/atm/clip.c b/net/atm/clip.c
+--- a/net/atm/clip.c	2005-02-13 21:25:09 +11:00
++++ b/net/atm/clip.c	2005-02-13 21:25:09 +11:00
+@@ -503,6 +503,7 @@
+ 	struct atmarp_entry *entry;
+ 	int error;
+ 	struct clip_vcc *clip_vcc;
++	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip, .tos = 1 } } };
+ 	struct rtable *rt;
+ 
+ 	if (vcc->push != clip_push) {
+@@ -519,7 +520,7 @@
+ 		unlink_clip_vcc(clip_vcc);
+ 		return 0;
+ 	}
+-	error = ip_route_output(&rt,ip,0,1,0);
++	error = ip_route_output_key(&rt,&fl);
+ 	if (error) return error;
+ 	neigh = __neigh_lookup(&clip_tbl,&ip,rt->u.dst.dev,1);
+ 	ip_rt_put(rt);
+diff -Nru a/net/core/Makefile b/net/core/Makefile
+--- a/net/core/Makefile	2005-02-13 21:25:10 +11:00
++++ b/net/core/Makefile	2005-02-13 21:25:10 +11:00
+@@ -21,8 +21,8 @@
+ 
+ obj-$(CONFIG_FILTER) += filter.o
+ 
+-obj-$(CONFIG_NET) +=	dev.o ethtool.o dev_mcast.o dst.o neighbour.o \
+-			rtnetlink.o utils.o
++obj-$(CONFIG_NET) +=	flow.o dev.o ethtool.o dev_mcast.o dst.o \
++			neighbour.o rtnetlink.o utils.o
+ 
+ obj-$(CONFIG_NETFILTER) += netfilter.o
+ obj-$(CONFIG_NET_DIVERT) += dv.o
+diff -Nru a/net/core/dev.c b/net/core/dev.c
+--- a/net/core/dev.c	2005-02-13 21:25:09 +11:00
++++ b/net/core/dev.c	2005-02-13 21:25:09 +11:00
+@@ -912,6 +912,13 @@
+ 	return notifier_chain_register(&netdev_chain, nb);
+ }
+ 
++/* Synchronize with packet receive processing. */
++void synchronize_net(void) 
++{
++	br_write_lock_bh(BR_NETPROTO_LOCK);
++	br_write_unlock_bh(BR_NETPROTO_LOCK);
++}
++
+ /**
+  *	unregister_netdevice_notifier - unregister a network notifier block
+  *	@nb: notifier
+@@ -1479,6 +1486,7 @@
+ #endif
+ 
+ 	skb->h.raw = skb->nh.raw = skb->data;
++	skb->mac_len = skb->nh.raw - skb->mac.raw;
+ 
+ 	pt_prev = NULL;
+ 	for (ptype = ptype_all; ptype; ptype = ptype->next) {
+diff -Nru a/net/core/dst.c b/net/core/dst.c
+--- a/net/core/dst.c	2005-02-13 21:25:09 +11:00
++++ b/net/core/dst.c	2005-02-13 21:25:09 +11:00
+@@ -36,11 +36,11 @@
+ static unsigned long dst_gc_timer_expires;
+ static unsigned long dst_gc_timer_inc = DST_GC_MAX;
+ static void dst_run_gc(unsigned long);
++static void ___dst_free(struct dst_entry * dst);
+ 
+ static struct timer_list dst_gc_timer =
+ 	{ data: DST_GC_MIN, function: dst_run_gc };
+ 
+-
+ static void dst_run_gc(unsigned long dummy)
+ {
+ 	int    delayed = 0;
+@@ -61,7 +61,25 @@
+ 			continue;
+ 		}
+ 		*dstp = dst->next;
+-		dst_destroy(dst);
++
++		dst = dst_destroy(dst);
++		if (dst) {
++			/* NOHASH and still referenced. Unless it is already
++			 * on gc list, invalidate it and add to gc list.
++			 *
++			 * Note: this is temporary. Actually, NOHASH dst's
++			 * must be obsoleted when parent is obsoleted.
++			 * But we do not have state "obsoleted, but
++			 * referenced by parent", so it is right.
++			 */
++			if (dst->obsolete > 1)
++				continue;
++
++			___dst_free(dst);
++			dst->next = *dstp;
++			*dstp = dst;
++			dstp = &dst->next;
++		}
+ 	}
+ 	if (!dst_garbage_list) {
+ 		dst_gc_timer_inc = DST_GC_MAX;
+@@ -108,6 +126,7 @@
+ 	atomic_set(&dst->__refcnt, 0);
+ 	dst->ops = ops;
+ 	dst->lastuse = jiffies;
++	dst->path = dst;
+ 	dst->input = dst_discard;
+ 	dst->output = dst_blackhole;
+ #if RT_CACHE_DEBUG >= 2 
+@@ -117,10 +136,8 @@
+ 	return dst;
+ }
+ 
+-void __dst_free(struct dst_entry * dst)
++static void ___dst_free(struct dst_entry * dst)
+ {
+-	spin_lock_bh(&dst_lock);
+-
+ 	/* The first case (dev==NULL) is required, when
+ 	   protocol module is unloaded.
+ 	 */
+@@ -129,6 +146,12 @@
+ 		dst->output = dst_blackhole;
+ 	}
+ 	dst->obsolete = 2;
++}
++
++void __dst_free(struct dst_entry * dst)
++{
++	spin_lock_bh(&dst_lock);
++	___dst_free(dst);
+ 	dst->next = dst_garbage_list;
+ 	dst_garbage_list = dst;
+ 	if (dst_gc_timer_inc > DST_GC_INC) {
+@@ -136,14 +159,19 @@
+ 		dst_gc_timer_expires = DST_GC_MIN;
+ 		mod_timer(&dst_gc_timer, jiffies + dst_gc_timer_expires);
+ 	}
+-
+ 	spin_unlock_bh(&dst_lock);
+ }
+ 
+-void dst_destroy(struct dst_entry * dst)
++struct dst_entry *dst_destroy(struct dst_entry * dst)
+ {
+-	struct neighbour *neigh = dst->neighbour;
+-	struct hh_cache *hh = dst->hh;
++	struct dst_entry *child;
++	struct neighbour *neigh;
++	struct hh_cache *hh;
++
++again:
++	neigh = dst->neighbour;
++	hh = dst->hh;
++	child = dst->child;
+ 
+ 	dst->hh = NULL;
+ 	if (hh && atomic_dec_and_test(&hh->hh_refcnt))
+@@ -164,6 +192,21 @@
+ 	atomic_dec(&dst_total);
+ #endif
+ 	kmem_cache_free(dst->ops->kmem_cachep, dst);
++
++	dst = child;
++	if (dst) {
++		if (atomic_dec_and_test(&dst->__refcnt)) {
++			/* We were real parent of this dst, so kill child. */
++			if (dst->flags&DST_NOHASH)
++				goto again;
++		} else {
++			/* Child is still referenced, return it for freeing. */
++			if (dst->flags&DST_NOHASH)
++				return dst;
++			/* Child is still in his hash table */
++		}
++	}
++	return NULL;
+ }
+ 
+ static int dst_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
+diff -Nru a/net/core/flow.c b/net/core/flow.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/core/flow.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,322 @@
++/* flow.c: Generic flow cache.
++ *
++ * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet at ms2.inr.ac.ru)
++ * Copyright (C) 2003 David S. Miller (davem at redhat.com)
++ */
++
++#include <linux/kernel.h>
++#include <linux/sched.h>
++#include <linux/list.h>
++#include <linux/jhash.h>
++#include <linux/interrupt.h>
++#include <linux/mm.h>
++#include <linux/random.h>
++#include <linux/init.h>
++#include <linux/slab.h>
++#include <linux/smp.h>
++#include <linux/completion.h>
++#include <net/flow.h>
++#include <asm/atomic.h>
++#include <asm/semaphore.h>
++
++struct flow_cache_entry {
++	struct flow_cache_entry	*next;
++	u16			family;
++	u8			dir;
++	struct flowi		key;
++	u32			genid;
++	void			*object;
++	atomic_t		*object_ref;
++};
++
++atomic_t flow_cache_genid = ATOMIC_INIT(0);
++
++static u32 flow_hash_shift;
++#define flow_hash_size	(1 << flow_hash_shift)
++static struct flow_cache_entry **flow_table;
++static kmem_cache_t *flow_cachep;
++
++static int flow_lwm, flow_hwm;
++
++struct flow_percpu_info {
++	int hash_rnd_recalc;
++	u32 hash_rnd;
++	int count;
++} ____cacheline_aligned;
++static struct flow_percpu_info flow_hash_info[NR_CPUS];
++
++#define flow_hash_rnd_recalc(cpu)	(flow_hash_info[cpu].hash_rnd_recalc)
++#define flow_hash_rnd(cpu)		(flow_hash_info[cpu].hash_rnd)
++#define flow_count(cpu)			(flow_hash_info[cpu].count)
++
++static struct timer_list flow_hash_rnd_timer;
++
++#define FLOW_HASH_RND_PERIOD	(10 * 60 * HZ)
++
++struct flow_flush_info {
++	atomic_t cpuleft;
++	struct completion completion;
++};
++static struct tasklet_struct flow_flush_tasklets[NR_CPUS];
++static DECLARE_MUTEX(flow_flush_sem);
++
++static void flow_cache_new_hashrnd(unsigned long arg)
++{
++	int i;
++
++	for (i = 0; i < NR_CPUS; i++)
++		flow_hash_rnd_recalc(i) = 1;
++
++	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++	add_timer(&flow_hash_rnd_timer);
++}
++
++static void __flow_cache_shrink(int cpu, int shrink_to)
++{
++	struct flow_cache_entry *fle, **flp;
++	int i;
++
++	for (i = 0; i < flow_hash_size; i++) {
++		int k = 0;
++
++		flp = &flow_table[cpu*flow_hash_size+i];
++		while ((fle = *flp) != NULL && k < shrink_to) {
++			k++;
++			flp = &fle->next;
++		}
++		while ((fle = *flp) != NULL) {
++			*flp = fle->next;
++			if (fle->object)
++				atomic_dec(fle->object_ref);
++			kmem_cache_free(flow_cachep, fle);
++			flow_count(cpu)--;
++		}
++	}
++}
++
++static void flow_cache_shrink(int cpu)
++{
++	int shrink_to = flow_lwm / flow_hash_size;
++
++	__flow_cache_shrink(cpu, shrink_to);
++}
++
++static void flow_new_hash_rnd(int cpu)
++{
++	get_random_bytes(&flow_hash_rnd(cpu), sizeof(u32));
++	flow_hash_rnd_recalc(cpu) = 0;
++
++	__flow_cache_shrink(cpu, 0);
++}
++
++static u32 flow_hash_code(struct flowi *key, int cpu)
++{
++	u32 *k = (u32 *) key;
++
++	return (jhash2(k, (sizeof(*key) / sizeof(u32)), flow_hash_rnd(cpu)) &
++		(flow_hash_size - 1));
++}
++
++#if (BITS_PER_LONG == 64)
++typedef u64 flow_compare_t;
++#else
++typedef u32 flow_compare_t;
++#endif
++
++extern void flowi_is_missized(void);
++
++/* I hear what you're saying, use memcmp.  But memcmp cannot make
++ * important assumptions that we can here, such as alignment and
++ * constant size.
++ */
++static int flow_key_compare(struct flowi *key1, struct flowi *key2)
++{
++	flow_compare_t *k1, *k1_lim, *k2;
++	const int n_elem = sizeof(struct flowi) / sizeof(flow_compare_t);
++
++	if (sizeof(struct flowi) % sizeof(flow_compare_t))
++		flowi_is_missized();
++
++	k1 = (flow_compare_t *) key1;
++	k1_lim = k1 + n_elem;
++
++	k2 = (flow_compare_t *) key2;
++
++	do {
++		if (*k1++ != *k2++)
++			return 1;
++	} while (k1 < k1_lim);
++
++	return 0;
++}
++
++void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir,
++			flow_resolve_t resolver)
++{
++	struct flow_cache_entry *fle, **head;
++	unsigned int hash;
++	int cpu;
++
++	local_bh_disable();
++	cpu = smp_processor_id();
++	if (flow_hash_rnd_recalc(cpu))
++		flow_new_hash_rnd(cpu);
++	hash = flow_hash_code(key, cpu);
++
++	head = &flow_table[(cpu << flow_hash_shift) + hash];
++	for (fle = *head; fle; fle = fle->next) {
++		if (fle->family == family &&
++		    fle->dir == dir &&
++		    flow_key_compare(key, &fle->key) == 0) {
++			if (fle->genid == atomic_read(&flow_cache_genid)) {
++				void *ret = fle->object;
++
++				if (ret)
++					atomic_inc(fle->object_ref);
++				local_bh_enable();
++
++				return ret;
++			}
++			break;
++		}
++	}
++
++	if (!fle) {
++		if (flow_count(cpu) > flow_hwm)
++			flow_cache_shrink(cpu);
++
++		fle = kmem_cache_alloc(flow_cachep, SLAB_ATOMIC);
++		if (fle) {
++			fle->next = *head;
++			*head = fle;
++			fle->family = family;
++			fle->dir = dir;
++			memcpy(&fle->key, key, sizeof(*key));
++			fle->object = NULL;
++			flow_count(cpu)++;
++		}
++	}
++
++	{
++		void *obj;
++		atomic_t *obj_ref;
++
++		resolver(key, family, dir, &obj, &obj_ref);
++
++		if (fle) {
++			fle->genid = atomic_read(&flow_cache_genid);
++
++			if (fle->object)
++				atomic_dec(fle->object_ref);
++
++			fle->object = obj;
++			fle->object_ref = obj_ref;
++			if (obj)
++				atomic_inc(fle->object_ref);
++		}
++		local_bh_enable();
++
++		return obj;
++	}
++}
++
++static void flow_cache_flush_tasklet(unsigned long data)
++{
++	struct flow_flush_info *info = (void *)data;
++	int i;
++	int cpu;
++
++	cpu = smp_processor_id();
++	for (i = 0; i < flow_hash_size; i++) {
++		struct flow_cache_entry *fle;
++
++		fle = flow_table[(cpu << flow_hash_shift) + i];
++		for (; fle; fle = fle->next) {
++			unsigned genid = atomic_read(&flow_cache_genid);
++
++			if (!fle->object || fle->genid == genid)
++				continue;
++
++			fle->object = NULL;
++			atomic_dec(fle->object_ref);
++		}
++	}
++
++	if (atomic_dec_and_test(&info->cpuleft))
++		complete(&info->completion);
++}
++
++static void flow_cache_flush_per_cpu(void *data)
++{
++	struct flow_flush_info *info = data;
++	int cpu;
++	struct tasklet_struct *tasklet;
++
++	cpu = smp_processor_id();
++	tasklet = &flow_flush_tasklets[cpu];
++	tasklet_init(tasklet, flow_cache_flush_tasklet, (unsigned long)info);
++	tasklet_schedule(tasklet);
++}
++
++void flow_cache_flush(void)
++{
++	struct flow_flush_info info;
++
++	atomic_set(&info.cpuleft, smp_num_cpus);
++	init_completion(&info.completion);
++
++	down(&flow_flush_sem);
++
++	local_bh_disable();
++	smp_call_function(flow_cache_flush_per_cpu, &info, 1, 0);
++	flow_cache_flush_per_cpu(&info);
++	local_bh_enable();
++
++	wait_for_completion(&info.completion);
++
++	up(&flow_flush_sem);
++}
++
++static int __init flow_cache_init(void)
++{
++	unsigned long order;
++	int i;
++
++	flow_cachep = kmem_cache_create("flow_cache",
++					sizeof(struct flow_cache_entry),
++					0, SLAB_HWCACHE_ALIGN,
++					NULL, NULL);
++
++	if (!flow_cachep)
++		panic("NET: failed to allocate flow cache slab\n");
++
++	flow_hash_shift = 10;
++	flow_lwm = 2 * flow_hash_size;
++	flow_hwm = 4 * flow_hash_size;
++
++	for (i = 0; i < NR_CPUS; i++)
++		flow_hash_rnd_recalc(i) = 1;
++
++	init_timer(&flow_hash_rnd_timer);
++	flow_hash_rnd_timer.function = flow_cache_new_hashrnd;
++	flow_hash_rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD;
++	add_timer(&flow_hash_rnd_timer);
++
++	for (order = 0;
++	     (PAGE_SIZE << order) <
++		     (NR_CPUS*sizeof(struct flow_entry *)*flow_hash_size);
++	     order++)
++		/* NOTHING */;
++
++	flow_table = (struct flow_cache_entry **)
++		__get_free_pages(GFP_ATOMIC, order);
++
++	if (!flow_table)
++		panic("Failed to allocate flow cache hash table\n");
++
++	memset(flow_table, 0, PAGE_SIZE << order);
++
++	return 0;
++}
++
++module_init(flow_cache_init);
+diff -Nru a/net/core/neighbour.c b/net/core/neighbour.c
+--- a/net/core/neighbour.c	2005-02-13 21:25:09 +11:00
++++ b/net/core/neighbour.c	2005-02-13 21:25:09 +11:00
+@@ -737,7 +737,9 @@
+ static __inline__ int neigh_max_probes(struct neighbour *n)
+ {
+ 	struct neigh_parms *p = n->parms;
+-	return p->ucast_probes + p->app_probes + p->mcast_probes;
++	return (n->nud_state & NUD_PROBE ?
++		p->ucast_probes :
++		p->ucast_probes + p->app_probes + p->mcast_probes);
+ }
+ 
+ 
+@@ -1227,9 +1229,6 @@
+ 		if (*p == parms) {
+ 			*p = parms->next;
+ 			write_unlock_bh(&tbl->lock);
+-#ifdef CONFIG_SYSCTL
+-			neigh_sysctl_unregister(parms);
+-#endif
+ 			kfree(parms);
+ 			return;
+ 		}
+@@ -1326,9 +1325,6 @@
+ 	kfree(tbl->phash_buckets);
+ 	tbl->phash_buckets = NULL;
+ 
+-#ifdef CONFIG_SYSCTL
+-	neigh_sysctl_unregister(&tbl->parms);
+-#endif
+ 	return 0;
+ }
+ 
+diff -Nru a/net/core/netfilter.c b/net/core/netfilter.c
+--- a/net/core/netfilter.c	2005-02-13 21:25:09 +11:00
++++ b/net/core/netfilter.c	2005-02-13 21:25:09 +11:00
+@@ -563,7 +563,7 @@
+ {
+ 	struct iphdr *iph = (*pskb)->nh.iph;
+ 	struct rtable *rt;
+-	struct rt_key key = {};
++	struct flowi fl = {};
+ 	struct dst_entry *odst;
+ 	unsigned int hh_len;
+ 
+@@ -571,14 +571,15 @@
+ 	 * packets with foreign saddr to be appear on the NF_IP_LOCAL_OUT hook.
+ 	 */
+ 	if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
+-		key.dst = iph->daddr;
+-		key.src = iph->saddr;
+-		key.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
+-		key.tos = RT_TOS(iph->tos);
++		fl.nl_u.ip4_u.daddr = iph->daddr;
++		fl.nl_u.ip4_u.saddr = iph->saddr;
++		fl.oif = (*pskb)->sk ? (*pskb)->sk->bound_dev_if : 0;
++		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-		key.fwmark = (*pskb)->nfmark;
++		fl.nl_u.ip4_u.fwmark = (*pskb)->nfmark;
+ #endif
+-		if (ip_route_output_key(&rt, &key) != 0)
++		fl.proto = iph->protocol;
++		if (ip_route_output_key(&rt, &fl) != 0)
+ 			return -1;
+ 
+ 		/* Drop old route. */
+@@ -587,8 +588,8 @@
+ 	} else {
+ 		/* non-local src, find valid iif to satisfy
+ 		 * rp-filter when calling ip_route_input. */
+-		key.dst = iph->saddr;
+-		if (ip_route_output_key(&rt, &key) != 0)
++		fl.nl_u.ip4_u.daddr = iph->saddr;
++		if (ip_route_output_key(&rt, &fl) != 0)
+ 			return -1;
+ 
+ 		odst = (*pskb)->dst;
+diff -Nru a/net/core/rtnetlink.c b/net/core/rtnetlink.c
+--- a/net/core/rtnetlink.c	2005-02-13 21:25:10 +11:00
++++ b/net/core/rtnetlink.c	2005-02-13 21:25:10 +11:00
+@@ -128,7 +128,7 @@
+ 	return err;
+ }
+ 
+-int rtnetlink_put_metrics(struct sk_buff *skb, unsigned *metrics)
++int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
+ {
+ 	struct rtattr *mx = (struct rtattr*)skb->tail;
+ 	int i;
+@@ -136,7 +136,7 @@
+ 	RTA_PUT(skb, RTA_METRICS, 0, NULL);
+ 	for (i=0; i<RTAX_MAX; i++) {
+ 		if (metrics[i])
+-			RTA_PUT(skb, i+1, sizeof(unsigned), metrics+i);
++			RTA_PUT(skb, i+1, sizeof(u32), metrics+i);
+ 	}
+ 	mx->rta_len = skb->tail - (u8*)mx;
+ 	if (mx->rta_len == RTA_LENGTH(0))
+diff -Nru a/net/core/skbuff.c b/net/core/skbuff.c
+--- a/net/core/skbuff.c	2005-02-13 21:25:09 +11:00
++++ b/net/core/skbuff.c	2005-02-13 21:25:09 +11:00
+@@ -57,6 +57,7 @@
+ #include <net/dst.h>
+ #include <net/sock.h>
+ #include <net/checksum.h>
++#include <net/xfrm.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -201,6 +202,7 @@
+ 
+ 	/* Set up other state */
+ 	skb->len = 0;
++	skb->local_df = 0;
+ 	skb->cloned = 0;
+ 	skb->data_len = 0;
+ 
+@@ -233,6 +235,7 @@
+ 	skb->dev = NULL;
+ 	skb->real_dev = NULL;
+ 	skb->dst = NULL;
++	skb->sp = NULL;
+ 	memset(skb->cb, 0, sizeof(skb->cb));
+ 	skb->pkt_type = PACKET_HOST;	/* Default type */
+ 	skb->ip_summed = 0;
+@@ -317,6 +320,9 @@
+ 	}
+ 
+ 	dst_release(skb->dst);
++#ifdef CONFIG_XFRM
++	secpath_put(skb->sp);
++#endif
+ 	if(skb->destructor) {
+ 		if (in_irq()) {
+ 			printk(KERN_WARNING "Warning: kfree_skb on hard IRQ %p\n",
+@@ -369,10 +375,15 @@
+ 	C(mac);
+ 	C(dst);
+ 	dst_clone(n->dst);
++	C(sp);
++#ifdef CONFIG_INET
++	secpath_get(n->sp);
++#endif
+ 	memcpy(n->cb, skb->cb, sizeof(skb->cb));
+ 	C(len);
+ 	C(data_len);
+ 	C(csum);
++	C(local_df);
+ 	n->cloned = 1;
+ 	C(pkt_type);
+ 	C(ip_summed);
+@@ -423,11 +434,15 @@
+ 	new->priority=old->priority;
+ 	new->protocol=old->protocol;
+ 	new->dst=dst_clone(old->dst);
++#ifdef CONFIG_INET
++	new->sp=secpath_get(old->sp);
++#endif
+ 	new->h.raw=old->h.raw+offset;
+ 	new->nh.raw=old->nh.raw+offset;
+ 	new->mac.raw=old->mac.raw+offset;
+ 	memcpy(new->cb, old->cb, sizeof(old->cb));
+ 	atomic_set(&new->users, 1);
++	new->local_df=old->local_df;
+ 	new->pkt_type=old->pkt_type;
+ 	new->stamp=old->stamp;
+ 	new->destructor = NULL;
+diff -Nru a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c
+--- a/net/decnet/dn_nsp_out.c	2005-02-13 21:25:10 +11:00
++++ b/net/decnet/dn_nsp_out.c	2005-02-13 21:25:10 +11:00
+@@ -593,7 +593,7 @@
+ 	 * associations.
+ 	 */
+ 	skb->dst = dst_clone(dst);
+-	skb->dst->output(skb);
++	dst_output(skb);
+ }
+ 
+ 
+diff -Nru a/net/decnet/dn_route.c b/net/decnet/dn_route.c
+--- a/net/decnet/dn_route.c	2005-02-13 21:25:10 +11:00
++++ b/net/decnet/dn_route.c	2005-02-13 21:25:10 +11:00
+@@ -100,7 +100,6 @@
+ 
+ static int dn_dst_gc(void);
+ static struct dst_entry *dn_dst_check(struct dst_entry *, __u32);
+-static struct dst_entry *dn_dst_reroute(struct dst_entry *, struct sk_buff *skb);
+ static struct dst_entry *dn_dst_negative_advice(struct dst_entry *);
+ static void dn_dst_link_failure(struct sk_buff *);
+ static int dn_route_input(struct sk_buff *);
+@@ -119,7 +118,6 @@
+ 	gc_thresh:		128,
+ 	gc:			dn_dst_gc,
+ 	check:			dn_dst_check,
+-	reroute:		dn_dst_reroute,
+ 	negative_advice:	dn_dst_negative_advice,
+ 	link_failure:		dn_dst_link_failure,
+ 	entry_size:		sizeof(struct dn_route),
+@@ -202,12 +200,6 @@
+ 	return NULL;
+ }
+ 
+-static struct dst_entry *dn_dst_reroute(struct dst_entry *dst,
+-					struct sk_buff *skb)
+-{
+-	return NULL;
+-}
+-
+ /*
+  * This is called through sendmsg() when you specify MSG_TRYHARD
+  * and there is already a route in cache.
+@@ -396,7 +388,7 @@
+ 	int err;
+ 
+ 	if ((err = dn_route_input(skb)) == 0)
+-		return skb->dst->input(skb);
++		return dst_input(skb);
+ 
+ 	if (decnet_debug_level & 4) {
+ 		char *devname = skb->dev ? skb->dev->name : "???";
+@@ -1049,10 +1041,12 @@
+ 	RTA_PUT(skb, RTA_SRC, 2, &rt->rt_saddr);
+ 	if (rt->u.dst.dev)
+ 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+-	if (rt->u.dst.window)
+-		RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned), &rt->u.dst.window);
+-	if (rt->u.dst.rtt)
+-		RTA_PUT(skb, RTAX_RTT, sizeof(unsigned), &rt->u.dst.rtt);
++	if (dst_metric(&rt->u.dst, RTAX_WINDOW))
++		RTA_PUT(skb, RTAX_WINDOW, sizeof(unsigned),
++			&rt->u.dst.metrics[RTAX_WINDOW - 1]);
++	if (dst_metric(&rt->u.dst, RTAX_RTT))
++		RTA_PUT(skb, RTAX_RTT, sizeof(unsigned),
++			&rt->u.dst.metrics[RTAX_RTT]);
+ 
+ 	nlh->nlmsg_len = skb->tail - b;
+ 	return skb->len;
+@@ -1208,7 +1202,7 @@
+ 					dn_addr2asc(dn_ntohs(rt->rt_saddr), buf2),
+ 					atomic_read(&rt->u.dst.__refcnt),
+ 					rt->u.dst.__use,
+-					(int)rt->u.dst.rtt
++					(int) dst_metric(&rt->u.dst, RTAX_RTT)
+ 					);
+ 
+ 
+diff -Nru a/net/ipv4/Config.in b/net/ipv4/Config.in
+--- a/net/ipv4/Config.in	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/Config.in	2005-02-13 21:25:09 +11:00
+@@ -40,6 +40,18 @@
+ fi
+ bool '  IP: TCP Explicit Congestion Notification support' CONFIG_INET_ECN
+ bool '  IP: TCP syncookie support (disabled per default)' CONFIG_SYN_COOKIES
++tristate '  IP: AH transformation' CONFIG_INET_AH
++tristate '  IP: ESP transformation' CONFIG_INET_ESP
++tristate '  IP: IPComp transformation' CONFIG_INET_IPCOMP
++if [ "$CONFIG_INET_IPIP" = "y" -o "$CONFIG_INET_IPCOMP" = "y" ]; then
++   define_tristate CONFIG_INET_TUNNEL y
++else
++   if [ "$CONFIG_INET_IPIP" = "m" -o "$CONFIG_INET_IPCOMP" = "m" ]; then
++      define_tristate CONFIG_INET_TUNNEL m
++   else
++      tristate '  IP: tunnel transformation' CONFIG_INET_TUNNEL
++   fi
++fi
+ if [ "$CONFIG_NETFILTER" != "n" ]; then
+    source net/ipv4/netfilter/Config.in
+ fi
+diff -Nru a/net/ipv4/Makefile b/net/ipv4/Makefile
+--- a/net/ipv4/Makefile	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/Makefile	2005-02-13 21:25:09 +11:00
+@@ -9,7 +9,7 @@
+ 
+ O_TARGET := ipv4.o
+ 
+-export-objs = ipip.o ip_gre.o
++export-objs = ipip.o ip_gre.o xfrm4_input.o xfrm4_tunnel.o
+ 
+ obj-y     := utils.o route.o inetpeer.o proc.o protocol.o \
+ 	     ip_input.o ip_fragment.o ip_forward.o ip_options.o \
+@@ -24,6 +24,13 @@
+ obj-$(CONFIG_NET_IPIP) += ipip.o
+ obj-$(CONFIG_NET_IPGRE) += ip_gre.o
+ obj-$(CONFIG_SYN_COOKIES) += syncookies.o
++obj-$(CONFIG_INET_AH) += ah4.o
++obj-$(CONFIG_INET_ESP) += esp4.o
++obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
++obj-$(CONFIG_INET_TUNNEL) += xfrm4_tunnel.o 
+ obj-$(CONFIG_IP_PNP) += ipconfig.o
++
++obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
++		      xfrm4_output.o
+ 
+ include $(TOPDIR)/Rules.make
+diff -Nru a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
+--- a/net/ipv4/af_inet.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/af_inet.c	2005-02-13 21:25:09 +11:00
+@@ -89,6 +89,7 @@
+ 
+ #include <linux/smp_lock.h>
+ #include <linux/inet.h>
++#include <linux/igmp.h>
+ #include <linux/netdevice.h>
+ #include <linux/brlock.h>
+ #include <net/ip.h>
+@@ -103,6 +104,7 @@
+ #include <net/icmp.h>
+ #include <net/ipip.h>
+ #include <net/inet_common.h>
++#include <net/xfrm.h>
+ #ifdef CONFIG_IP_MROUTE
+ #include <linux/mroute.h>
+ #endif
+@@ -213,6 +215,8 @@
+ 
+ 	sock_orphan(sk);
+ 
++	xfrm_sk_free_policy(sk);
++
+ #ifdef INET_REFCNT_DEBUG
+ 	if (atomic_read(&sk->refcnt) != 1) {
+ 		printk(KERN_DEBUG "Destruction inet %p delayed, c=%d\n", sk, atomic_read(&sk->refcnt));
+@@ -386,7 +390,7 @@
+ 
+ 	sk->backlog_rcv = sk->prot->backlog_rcv;
+ 
+-	sk->protinfo.af_inet.ttl	= sysctl_ip_default_ttl;
++	sk->protinfo.af_inet.uc_ttl	= -1;
+ 
+ 	sk->protinfo.af_inet.mc_loop	= 1;
+ 	sk->protinfo.af_inet.mc_ttl	= 1;
+@@ -698,6 +702,27 @@
+ 	return err;
+ }
+ 
++#ifdef CONFIG_IP_MULTICAST
++static struct inet_protocol igmp_protocol = {
++	.handler =	igmp_rcv,
++};
++#endif
++
++static struct inet_protocol tcp_protocol = {
++	.handler =	tcp_v4_rcv,
++	.err_handler =	tcp_v4_err,
++	.no_policy =	1,
++};
++
++static struct inet_protocol udp_protocol = {
++	.handler =	udp_rcv,
++	.err_handler =	udp_err,
++	.no_policy =	1,
++};
++
++static struct inet_protocol icmp_protocol = {
++	.handler =	icmp_rcv,
++};
+ 
+ /*
+  *	This does both peername and sockname.
+@@ -724,6 +749,7 @@
+ 		sin->sin_port = sk->sport;
+ 		sin->sin_addr.s_addr = addr;
+ 	}
++	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
+ 	*uaddr_len = sizeof(*sin);
+ 	return(0);
+ }
+@@ -757,6 +783,21 @@
+ 	return sk->prot->sendmsg(sk, msg, size);
+ }
+ 
++
++ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags)
++{
++	struct sock *sk = sock->sk;
++
++	/* We may need to bind the socket. */
++	if (!sk->num && inet_autobind(sk))
++		return -EAGAIN;
++
++	if (sk->prot->sendpage)
++		return sk->prot->sendpage(sk, page, offset, size, flags);
++	return sock_no_sendpage(sock, page, offset, size, flags);
++}
++
++
+ int inet_shutdown(struct socket *sock, int how)
+ {
+ 	struct sock *sk = sock->sk;
+@@ -1002,7 +1043,7 @@
+ 	sendmsg:	inet_sendmsg,
+ 	recvmsg:	inet_recvmsg,
+ 	mmap:		sock_no_mmap,
+-	sendpage:	sock_no_sendpage,
++	sendpage:	inet_sendpage,
+ };
+ 
+ struct net_proto_family inet_family_ops = {
+@@ -1130,7 +1171,6 @@
+ static int __init inet_init(void)
+ {
+ 	struct sk_buff *dummy_skb;
+-	struct inet_protocol *p;
+ 	struct inet_protosw *q;
+ 	struct list_head *r;
+ 
+@@ -1148,16 +1188,19 @@
+   	(void) sock_register(&inet_family_ops);
+ 
+ 	/*
+-	 *	Add all the protocols. 
++	 *	Add all the base protocols.
+ 	 */
+ 
+-	printk(KERN_INFO "IP Protocols: ");
+-	for (p = inet_protocol_base; p != NULL;) {
+-		struct inet_protocol *tmp = (struct inet_protocol *) p->next;
+-		inet_add_protocol(p);
+-		printk("%s%s",p->name,tmp?", ":"\n");
+-		p = tmp;
+-	}
++	if (inet_add_protocol(&icmp_protocol, IPPROTO_ICMP) < 0)
++		printk(KERN_CRIT "inet_init: Cannot add ICMP protocol\n");
++	if (inet_add_protocol(&udp_protocol, IPPROTO_UDP) < 0)
++		printk(KERN_CRIT "inet_init: Cannot add UDP protocol\n");
++	if (inet_add_protocol(&tcp_protocol, IPPROTO_TCP) < 0)
++		printk(KERN_CRIT "inet_init: Cannot add TCP protocol\n");
++#ifdef CONFIG_IP_MULTICAST
++	if (inet_add_protocol(&igmp_protocol, IPPROTO_IGMP) < 0)
++		printk(KERN_CRIT "inet_init: Cannot add IGMP protocol\n");
++#endif
+ 
+ 	/* Register the socket-side information for inet_create. */
+ 	for(r = &inetsw[0]; r < &inetsw[SOCK_MAX]; ++r)
+diff -Nru a/net/ipv4/ah4.c b/net/ipv4/ah4.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/ah4.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,337 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/ah.h>
++#include <linux/crypto.h>
++#include <linux/pfkeyv2.h>
++#include <net/icmp.h>
++#include <asm/scatterlist.h>
++
++
++/* Clear mutable options and find final destination to substitute
++ * into IP header for icv calculation. Options are already checked
++ * for validity, so paranoia is not required. */
++
++static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr)
++{
++	unsigned char * optptr = (unsigned char*)(iph+1);
++	int  l = iph->ihl*4 - sizeof(struct iphdr);
++	int  optlen;
++
++	while (l > 0) {
++		switch (*optptr) {
++		case IPOPT_END:
++			return 0;
++		case IPOPT_NOOP:
++			l--;
++			optptr++;
++			continue;
++		}
++		optlen = optptr[1];
++		if (optlen<2 || optlen>l)
++			return -EINVAL;
++		switch (*optptr) {
++		case IPOPT_SEC:
++		case 0x85:	/* Some "Extended Security" crap. */
++		case 0x86:	/* Another "Commercial Security" crap. */
++		case IPOPT_RA:
++		case 0x80|21:	/* RFC1770 */
++			break;
++		case IPOPT_LSRR:
++		case IPOPT_SSRR:
++			if (optlen < 6)
++				return -EINVAL;
++			memcpy(daddr, optptr+optlen-4, 4);
++			/* Fall through */
++		default:
++			memset(optptr+2, 0, optlen-2);
++		}
++		l -= optlen;
++		optptr += optlen;
++	}
++	return 0;
++}
++
++static int ah_output(struct sk_buff *skb)
++{
++	int err;
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x  = dst->xfrm;
++	struct iphdr *iph, *top_iph;
++	struct ip_auth_hdr *ah;
++	struct ah_data *ahp;
++	union {
++		struct iphdr	iph;
++		char 		buf[60];
++	} tmp_iph;
++
++	top_iph = skb->nh.iph;
++	iph = &tmp_iph.iph;
++
++	iph->tos = top_iph->tos;
++	iph->ttl = top_iph->ttl;
++	iph->frag_off = top_iph->frag_off;
++
++	if (top_iph->ihl != 5) {
++		iph->daddr = top_iph->daddr;
++		memcpy(iph+1, top_iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
++		err = ip_clear_mutable_options(top_iph, &top_iph->daddr);
++		if (err)
++			goto error;
++	}
++
++	ah = (struct ip_auth_hdr *)((char *)top_iph+top_iph->ihl*4);
++	ah->nexthdr = top_iph->protocol;
++
++	top_iph->tos = 0;
++	top_iph->tot_len = htons(skb->len);
++	top_iph->frag_off = 0;
++	top_iph->ttl = 0;
++	top_iph->protocol = IPPROTO_AH;
++	top_iph->check = 0;
++
++	ahp = x->data;
++	ah->hdrlen  = (XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + 
++				   ahp->icv_trunc_len) >> 2) - 2;
++
++	ah->reserved = 0;
++	ah->spi = x->id.spi;
++	ah->seq_no = htonl(++x->replay.oseq);
++	ahp->icv(ahp, skb, ah->auth_data);
++
++	top_iph->tos = iph->tos;
++	top_iph->ttl = iph->ttl;
++	top_iph->frag_off = iph->frag_off;
++	if (top_iph->ihl != 5) {
++		top_iph->daddr = iph->daddr;
++		memcpy(top_iph+1, iph+1, top_iph->ihl*4 - sizeof(struct iphdr));
++	}
++
++	ip_send_check(top_iph);
++
++	err = 0;
++
++error:
++	return err;
++}
++
++static int ah_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	int ah_hlen;
++	struct iphdr *iph;
++	struct ip_auth_hdr *ah;
++	struct ah_data *ahp;
++	char work_buf[60];
++
++	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
++		goto out;
++
++	ah = (struct ip_auth_hdr*)skb->data;
++	ahp = x->data;
++	ah_hlen = (ah->hdrlen + 2) << 2;
++	
++	if (ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_full_len) &&
++	    ah_hlen != XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len)) 
++		goto out;
++
++	if (!pskb_may_pull(skb, ah_hlen))
++		goto out;
++
++	/* We are going to _remove_ AH header to keep sockets happy,
++	 * so... Later this can change. */
++	if (skb_cloned(skb) &&
++	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++		goto out;
++
++	skb->ip_summed = CHECKSUM_NONE;
++
++	ah = (struct ip_auth_hdr*)skb->data;
++	iph = skb->nh.iph;
++
++	memcpy(work_buf, iph, iph->ihl*4);
++
++	iph->ttl = 0;
++	iph->tos = 0;
++	iph->frag_off = 0;
++	iph->check = 0;
++	if (iph->ihl != 5) {
++		u32 dummy;
++		if (ip_clear_mutable_options(iph, &dummy))
++			goto out;
++	}
++        {
++		u8 auth_data[MAX_AH_AUTH_LEN];
++		
++		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
++		skb_push(skb, skb->data - skb->nh.raw);
++		ahp->icv(ahp, skb, ah->auth_data);
++		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
++			x->stats.integrity_failed++;
++			goto out;
++		}
++	}
++	((struct iphdr*)work_buf)->protocol = ah->nexthdr;
++	skb->nh.raw = skb_pull(skb, ah_hlen);
++	memcpy(skb->nh.raw, work_buf, iph->ihl*4);
++	skb->nh.iph->tot_len = htons(skb->len);
++	skb_pull(skb, skb->nh.iph->ihl*4);
++	skb->h.raw = skb->data;
++
++	return 0;
++
++out:
++	return -EINVAL;
++}
++
++static void ah4_err(struct sk_buff *skb, u32 info)
++{
++	struct iphdr *iph = (struct iphdr*)skb->data;
++	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+(iph->ihl<<2));
++	struct xfrm_state *x;
++
++	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
++	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
++		return;
++
++	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET);
++	if (!x)
++		return;
++	printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n",
++	       ntohl(ah->spi), ntohl(iph->daddr));
++	xfrm_state_put(x);
++}
++
++static int ah_init_state(struct xfrm_state *x, void *args)
++{
++	struct ah_data *ahp = NULL;
++	struct xfrm_algo_desc *aalg_desc;
++
++	if (!x->aalg)
++		goto error;
++
++	/* null auth can use a zero length key */
++	if (x->aalg->alg_key_len > 512)
++		goto error;
++
++	if (x->encap)
++		goto error;
++
++	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
++	if (ahp == NULL)
++		return -ENOMEM;
++
++	memset(ahp, 0, sizeof(*ahp));
++
++	ahp->key = x->aalg->alg_key;
++	ahp->key_len = (x->aalg->alg_key_len+7)/8;
++	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
++	if (!ahp->tfm)
++		goto error;
++	ahp->icv = ah_hmac_digest;
++	
++	/*
++	 * Lookup the algorithm description maintained by xfrm_algo,
++	 * verify crypto transform properties, and store information
++	 * we need for AH processing.  This lookup cannot fail here
++	 * after a successful crypto_alloc_tfm().
++	 */
++	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
++	BUG_ON(!aalg_desc);
++
++	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
++	    crypto_tfm_alg_digestsize(ahp->tfm)) {
++		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
++		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
++		       aalg_desc->uinfo.auth.icv_fullbits/8);
++		goto error;
++	}
++	
++	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
++	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
++	
++	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
++	
++	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
++	if (!ahp->work_icv)
++		goto error;
++	
++	x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len);
++	if (x->props.mode)
++		x->props.header_len += sizeof(struct iphdr);
++	x->data = ahp;
++
++	return 0;
++
++error:
++	if (ahp) {
++		if (ahp->work_icv)
++			kfree(ahp->work_icv);
++		if (ahp->tfm)
++			crypto_free_tfm(ahp->tfm);
++		kfree(ahp);
++	}
++	return -EINVAL;
++}
++
++static void ah_destroy(struct xfrm_state *x)
++{
++	struct ah_data *ahp = x->data;
++
++	if (!ahp)
++		return;
++
++	if (ahp->work_icv) {
++		kfree(ahp->work_icv);
++		ahp->work_icv = NULL;
++	}
++	if (ahp->tfm) {
++		crypto_free_tfm(ahp->tfm);
++		ahp->tfm = NULL;
++	}
++	kfree(ahp);
++}
++
++
++static struct xfrm_type ah_type =
++{
++	.description	= "AH4",
++	.owner		= THIS_MODULE,
++	.proto	     	= IPPROTO_AH,
++	.init_state	= ah_init_state,
++	.destructor	= ah_destroy,
++	.input		= ah_input,
++	.output		= ah_output
++};
++
++static struct inet_protocol ah4_protocol = {
++	.handler	=	xfrm4_rcv,
++	.err_handler	=	ah4_err,
++	.no_policy	=	1,
++};
++
++static int __init ah4_init(void)
++{
++	if (xfrm_register_type(&ah_type, AF_INET) < 0) {
++		printk(KERN_INFO "ip ah init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++	if (inet_add_protocol(&ah4_protocol, IPPROTO_AH) < 0) {
++		printk(KERN_INFO "ip ah init: can't add protocol\n");
++		xfrm_unregister_type(&ah_type, AF_INET);
++		return -EAGAIN;
++	}
++	return 0;
++}
++
++static void __exit ah4_fini(void)
++{
++	if (inet_del_protocol(&ah4_protocol, IPPROTO_AH) < 0)
++		printk(KERN_INFO "ip ah close: can't remove protocol\n");
++	if (xfrm_unregister_type(&ah_type, AF_INET) < 0)
++		printk(KERN_INFO "ip ah close: can't remove xfrm type\n");
++}
++
++module_init(ah4_init);
++module_exit(ah4_fini);
++MODULE_LICENSE("GPL");
+diff -Nru a/net/ipv4/arp.c b/net/ipv4/arp.c
+--- a/net/ipv4/arp.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/arp.c	2005-02-13 21:25:09 +11:00
+@@ -409,11 +409,13 @@
+ 
+ static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev)
+ {
++	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip,
++						 .saddr = tip } } };
+ 	struct rtable *rt;
+ 	int flag = 0; 
+ 	/*unsigned long now; */
+ 
+-	if (ip_route_output(&rt, sip, tip, 0, 0) < 0) 
++	if (ip_route_output_key(&rt, &fl) < 0) 
+ 		return 1;
+ 	if (rt->u.dst.dev != dev) { 
+ 		NET_INC_STATS_BH(ArpFilter);
+@@ -559,11 +561,11 @@
+ 	 */
+ 	
+ 	skb = alloc_skb(sizeof(struct arphdr)+ 2*(dev->addr_len+4)
+-				+ dev->hard_header_len + 15, GFP_ATOMIC);
++				+ LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+ 	if (skb == NULL)
+ 		return NULL;
+ 
+-	skb_reserve(skb, (dev->hard_header_len+15)&~15);
++	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ 	skb->nh.raw = skb->data;
+ 	arp = (struct arphdr *) skb_put(skb,sizeof(struct arphdr) + 2*(dev->addr_len+4));
+ 	skb->dev = dev;
+@@ -1012,8 +1014,10 @@
+ 	if (r->arp_flags & ATF_PERM)
+ 		r->arp_flags |= ATF_COM;
+ 	if (dev == NULL) {
++		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
++							 .tos = RTO_ONLINK } } };
+ 		struct rtable * rt;
+-		if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
++		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+ 			return err;
+ 		dev = rt->u.dst.dev;
+ 		ip_rt_put(rt);
+@@ -1113,8 +1117,10 @@
+ 	}
+ 
+ 	if (dev == NULL) {
++		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = ip,
++							 .tos = RTO_ONLINK } } };
+ 		struct rtable * rt;
+-		if ((err = ip_route_output(&rt, ip, 0, RTO_ONLINK, 0)) != 0)
++		if ((err = ip_route_output_key(&rt, &fl)) != 0)
+ 			return err;
+ 		dev = rt->u.dst.dev;
+ 		ip_rt_put(rt);
+diff -Nru a/net/ipv4/devinet.c b/net/ipv4/devinet.c
+--- a/net/ipv4/devinet.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/devinet.c	2005-02-13 21:25:09 +11:00
+@@ -180,7 +180,9 @@
+ 	/* in_dev_put following below will kill the in_device */
+ 	write_unlock_bh(&inetdev_lock);
+ 
+-
++#ifdef CONFIG_SYSCTL
++	neigh_sysctl_unregister(in_dev->arp_parms);
++#endif
+ 	neigh_parms_release(&arp_tbl, in_dev->arp_parms);
+ 	in_dev_put(in_dev);
+ }
+@@ -942,6 +944,8 @@
+ 				memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
+ 				inet_insert_ifa(ifa);
+ 			}
++			in_dev->cnf.no_xfrm = 1;
++			in_dev->cnf.no_policy = 1;
+ 		}
+ 		ip_mc_up(in_dev);
+ 		break;
+@@ -1148,6 +1152,62 @@
+         return ret;
+ }
+ 
++int ipv4_doint_and_flush(ctl_table *ctl, int write,
++			 struct file* filp, void *buffer,
++			 size_t *lenp)
++{
++	int *valp = ctl->data;
++	int val = *valp;
++	int ret = proc_dointvec(ctl, write, filp, buffer, lenp);
++
++	if (write && *valp != val)
++		rt_cache_flush(0);
++
++	return ret;
++}
++
++int ipv4_doint_and_flush_strategy(ctl_table *table, int *name, int nlen,
++				  void *oldval, size_t *oldlenp,
++				  void *newval, size_t newlen, 
++				  void **context)
++{
++	int *valp = table->data;
++	int new;
++
++	if (!newval || !newlen)
++		return 0;
++
++	if (newlen != sizeof(int))
++		return -EINVAL;
++
++	if (get_user(new, (int *)newval))
++		return -EFAULT;
++
++	if (new == *valp)
++		return 0;
++
++	if (oldval && oldlenp) {
++		size_t len;
++
++		if (get_user(len, oldlenp))
++			return -EFAULT;
++
++		if (len) {
++			if (len > table->maxlen)
++				len = table->maxlen;
++			if (copy_to_user(oldval, valp, len))
++				return -EFAULT;
++			if (put_user(len, oldlenp))
++				return -EFAULT;
++		}
++	}
++
++	*valp = new;
++	rt_cache_flush(0);
++	return 1;
++}
++
++
+ static struct devinet_sysctl_table
+ {
+ 	struct ctl_table_header *sysctl_header;
+@@ -1206,6 +1266,12 @@
+ 	{NET_IPV4_CONF_ARP_IGNORE, "arp_ignore",
+ 	 &ipv4_devconf.arp_ignore, sizeof(int), 0644, NULL,
+ 	 &proc_dointvec},
++	{NET_IPV4_CONF_NOXFRM, "disable_xfrm",
++	 &ipv4_devconf.no_xfrm, sizeof(int), 0644, NULL,
++	 &ipv4_doint_and_flush, &ipv4_doint_and_flush_strategy,},
++	{NET_IPV4_CONF_NOPOLICY, "disable_policy",
++	 &ipv4_devconf.no_policy, sizeof(int), 0644, NULL,
++	 &ipv4_doint_and_flush, &ipv4_doint_and_flush_strategy},
+ 	{NET_IPV4_CONF_FORCE_IGMP_VERSION, "force_igmp_version",
+ 	 &ipv4_devconf.force_igmp_version, sizeof(int), 0644, NULL,
+ 	 &proc_dointvec},
+diff -Nru a/net/ipv4/esp4.c b/net/ipv4/esp4.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/esp4.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,511 @@
++#include <linux/config.h>
++#include <linux/module.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/esp.h>
++#include <asm/scatterlist.h>
++#include <linux/crypto.h>
++#include <linux/pfkeyv2.h>
++#include <linux/random.h>
++#include <net/icmp.h>
++#include <net/udp.h>
++
++/* decapsulation data for use when post-processing */
++struct esp_decap_data {
++	xfrm_address_t	saddr;
++	__u16		sport;
++	__u8		proto;
++};
++
++static int esp_output(struct sk_buff *skb)
++{
++	int err;
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x  = dst->xfrm;
++	struct iphdr *top_iph;
++	struct ip_esp_hdr *esph;
++	struct crypto_tfm *tfm;
++	struct esp_data *esp;
++	struct sk_buff *trailer;
++	int blksize;
++	int clen;
++	int alen;
++	int nfrags;
++
++	/* Strip IP+ESP header. */
++	__skb_pull(skb, skb->h.raw - skb->data);
++	/* Now skb is pure payload to encrypt */
++
++	err = -ENOMEM;
++
++	/* Round to block size */
++	clen = skb->len;
++
++	esp = x->data;
++	alen = esp->auth.icv_trunc_len;
++	tfm = esp->conf.tfm;
++	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
++	clen = (clen + 2 + blksize-1)&~(blksize-1);
++	if (esp->conf.padlen)
++		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
++
++	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0)
++		goto error;
++
++	/* Fill padding... */
++	do {
++		int i;
++		for (i=0; i<clen-skb->len - 2; i++)
++			*(u8*)(trailer->tail + i) = i+1;
++	} while (0);
++	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
++	pskb_put(skb, trailer, clen - skb->len);
++
++	__skb_push(skb, skb->data - skb->nh.raw);
++	top_iph = skb->nh.iph;
++	esph = (struct ip_esp_hdr *)(skb->nh.raw + top_iph->ihl*4);
++	top_iph->tot_len = htons(skb->len + alen);
++	*(u8*)(trailer->tail - 1) = top_iph->protocol;
++
++	/* this is non-NULL only with UDP Encapsulation */
++	if (x->encap) {
++		struct xfrm_encap_tmpl *encap = x->encap;
++		struct udphdr *uh;
++		u32 *udpdata32;
++
++		uh = (struct udphdr *)esph;
++		uh->source = encap->encap_sport;
++		uh->dest = encap->encap_dport;
++		uh->len = htons(skb->len + alen - top_iph->ihl*4);
++		uh->check = 0;
++
++		switch (encap->encap_type) {
++		default:
++		case UDP_ENCAP_ESPINUDP:
++			esph = (struct ip_esp_hdr *)(uh + 1);
++			break;
++		case UDP_ENCAP_ESPINUDP_NON_IKE:
++			udpdata32 = (u32 *)(uh + 1);
++			udpdata32[0] = udpdata32[1] = 0;
++			esph = (struct ip_esp_hdr *)(udpdata32 + 2);
++			break;
++		}
++
++		top_iph->protocol = IPPROTO_UDP;
++	} else
++		top_iph->protocol = IPPROTO_ESP;
++
++	esph->spi = x->id.spi;
++	esph->seq_no = htonl(++x->replay.oseq);
++
++	if (esp->conf.ivlen)
++		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
++
++	do {
++		struct scatterlist *sg = &esp->sgbuf[0];
++
++		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
++			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
++			if (!sg)
++				goto error;
++		}
++		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
++		crypto_cipher_encrypt(tfm, sg, sg, clen);
++		if (unlikely(sg != &esp->sgbuf[0]))
++			kfree(sg);
++	} while (0);
++
++	if (esp->conf.ivlen) {
++		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
++		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
++	}
++
++	if (esp->auth.icv_full_len) {
++		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
++		              sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
++		pskb_put(skb, trailer, alen);
++	}
++
++	ip_send_check(top_iph);
++
++	err = 0;
++
++error:
++	return err;
++}
++
++/*
++ * Note: detecting truncated vs. non-truncated authentication data is very
++ * expensive, so we only support truncated data, which is the recommended
++ * and common case.
++ */
++static int esp_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	struct iphdr *iph;
++	struct ip_esp_hdr *esph;
++	struct esp_data *esp = x->data;
++	struct sk_buff *trailer;
++	int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
++	int alen = esp->auth.icv_trunc_len;
++	int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen;
++	int nfrags;
++	int encap_len = 0;
++
++	if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr)))
++		goto out;
++
++	if (elen <= 0 || (elen & (blksize-1)))
++		goto out;
++
++	/* If integrity check is required, do this. */
++	if (esp->auth.icv_full_len) {
++		u8 sum[esp->auth.icv_full_len];
++		u8 sum1[alen];
++		
++		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
++
++		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
++			BUG();
++
++		if (unlikely(memcmp(sum, sum1, alen))) {
++			x->stats.integrity_failed++;
++			goto out;
++		}
++	}
++
++	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0)
++		goto out;
++
++	skb->ip_summed = CHECKSUM_NONE;
++
++	esph = (struct ip_esp_hdr*)skb->data;
++	iph = skb->nh.iph;
++
++	/* Get ivec. This can be wrong, check against another impls. */
++	if (esp->conf.ivlen)
++		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
++
++        {
++		u8 nexthdr[2];
++		struct scatterlist *sg = &esp->sgbuf[0];
++		u8 workbuf[60];
++		int padlen;
++
++		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
++			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
++			if (!sg)
++				goto out;
++		}
++		skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen);
++		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
++		if (unlikely(sg != &esp->sgbuf[0]))
++			kfree(sg);
++
++		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
++			BUG();
++
++		padlen = nexthdr[0];
++		if (padlen+2 >= elen)
++			goto out;
++
++		/* ... check padding bits here. Silly. :-) */ 
++
++		if (x->encap && decap && decap->decap_type) {
++			struct esp_decap_data *encap_data;
++			struct udphdr *uh = (struct udphdr *) (iph+1);
++
++			encap_data = (struct esp_decap_data *) (decap->decap_data);
++			encap_data->proto = 0;
++
++			switch (decap->decap_type) {
++			case UDP_ENCAP_ESPINUDP:
++			case UDP_ENCAP_ESPINUDP_NON_IKE:
++				encap_data->proto = AF_INET;
++				encap_data->saddr.a4 = iph->saddr;
++				encap_data->sport = uh->source;
++				encap_len = (void*)esph - (void*)uh;
++				break;
++
++			default:
++				goto out;
++			}
++		}
++
++		iph->protocol = nexthdr[1];
++		pskb_trim(skb, skb->len - alen - padlen - 2);
++		memcpy(workbuf, skb->nh.raw, iph->ihl*4);
++		skb->h.raw = skb_pull(skb, sizeof(struct ip_esp_hdr) + esp->conf.ivlen);
++		skb->nh.raw += encap_len + sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
++		memcpy(skb->nh.raw, workbuf, iph->ihl*4);
++		skb->nh.iph->tot_len = htons(skb->len);
++	}
++
++	return 0;
++
++out:
++	return -EINVAL;
++}
++
++static int esp_post_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++  
++	if (x->encap) {
++		struct xfrm_encap_tmpl *encap;
++		struct esp_decap_data *decap_data;
++
++		encap = x->encap;
++		decap_data = (struct esp_decap_data *)(decap->decap_data);
++
++		/* first, make sure that the decap type == the encap type */
++		if (encap->encap_type != decap->decap_type)
++			return -EINVAL;
++
++		switch (encap->encap_type) {
++		default:
++		case UDP_ENCAP_ESPINUDP:
++		case UDP_ENCAP_ESPINUDP_NON_IKE:
++			/*
++			 * 1) if the NAT-T peer's IP or port changed then
++			 *    advertize the change to the keying daemon.
++			 *    This is an inbound SA, so just compare
++			 *    SRC ports.
++			 */
++			if (decap_data->proto == AF_INET &&
++			    (decap_data->saddr.a4 != x->props.saddr.a4 ||
++			     decap_data->sport != encap->encap_sport)) {
++				xfrm_address_t ipaddr;
++
++				ipaddr.a4 = decap_data->saddr.a4;
++				km_new_mapping(x, &ipaddr, decap_data->sport);
++					
++				/* XXX: perhaps add an extra
++				 * policy check here, to see
++				 * if we should allow or
++				 * reject a packet from a
++				 * different source
++				 * address/port.
++				 */
++			}
++		
++			/*
++			 * 2) ignore UDP/TCP checksums in case
++			 *    of NAT-T in Transport Mode, or
++			 *    perform other post-processing fixes
++			 *    as per * draft-ietf-ipsec-udp-encaps-06,
++			 *    section 3.1.2
++			 */
++			if (!x->props.mode)
++				skb->ip_summed = CHECKSUM_UNNECESSARY;
++
++			break;
++		}
++	}
++	return 0;
++}
++
++static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
++{
++	struct esp_data *esp = x->data;
++	u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
++
++	if (x->props.mode) {
++		mtu = (mtu + 2 + blksize-1)&~(blksize-1);
++	} else {
++		/* The worst case. */
++		mtu += 2 + blksize;
++	}
++	if (esp->conf.padlen)
++		mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
++
++	return mtu + x->props.header_len + esp->auth.icv_trunc_len;
++}
++
++static void esp4_err(struct sk_buff *skb, u32 info)
++{
++	struct iphdr *iph = (struct iphdr*)skb->data;
++	struct ip_esp_hdr *esph = (struct ip_esp_hdr*)(skb->data+(iph->ihl<<2));
++	struct xfrm_state *x;
++
++	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
++	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
++		return;
++
++	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET);
++	if (!x)
++		return;
++	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n",
++	       ntohl(esph->spi), ntohl(iph->daddr));
++	xfrm_state_put(x);
++}
++
++static void esp_destroy(struct xfrm_state *x)
++{
++	struct esp_data *esp = x->data;
++
++	if (!esp)
++		return;
++
++	if (esp->conf.tfm) {
++		crypto_free_tfm(esp->conf.tfm);
++		esp->conf.tfm = NULL;
++	}
++	if (esp->conf.ivec) {
++		kfree(esp->conf.ivec);
++		esp->conf.ivec = NULL;
++	}
++	if (esp->auth.tfm) {
++		crypto_free_tfm(esp->auth.tfm);
++		esp->auth.tfm = NULL;
++	}
++	if (esp->auth.work_icv) {
++		kfree(esp->auth.work_icv);
++		esp->auth.work_icv = NULL;
++	}
++	kfree(esp);
++}
++
++static int esp_init_state(struct xfrm_state *x, void *args)
++{
++	struct esp_data *esp = NULL;
++
++	/* null auth and encryption can have zero length keys */
++	if (x->aalg) {
++		if (x->aalg->alg_key_len > 512)
++			goto error;
++	}
++	if (x->ealg == NULL)
++		goto error;
++
++	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
++	if (esp == NULL)
++		return -ENOMEM;
++
++	memset(esp, 0, sizeof(*esp));
++
++	if (x->aalg) {
++		struct xfrm_algo_desc *aalg_desc;
++
++		esp->auth.key = x->aalg->alg_key;
++		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
++		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
++		if (esp->auth.tfm == NULL)
++			goto error;
++		esp->auth.icv = esp_hmac_digest;
++
++		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
++		BUG_ON(!aalg_desc);
++
++		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
++		    crypto_tfm_alg_digestsize(esp->auth.tfm)) {
++			printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
++			       x->aalg->alg_name,
++			       crypto_tfm_alg_digestsize(esp->auth.tfm),
++			       aalg_desc->uinfo.auth.icv_fullbits/8);
++			goto error;
++		}
++
++		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
++		esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
++
++		esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
++		if (!esp->auth.work_icv)
++			goto error;
++	}
++	esp->conf.key = x->ealg->alg_key;
++	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
++	if (x->props.ealgo == SADB_EALG_NULL)
++		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
++	else
++		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
++	if (esp->conf.tfm == NULL)
++		goto error;
++	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
++	esp->conf.padlen = 0;
++	if (esp->conf.ivlen) {
++		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
++		if (unlikely(esp->conf.ivec == NULL))
++			goto error;
++		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
++	}
++	crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len);
++	x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen;
++	if (x->props.mode)
++		x->props.header_len += sizeof(struct iphdr);
++	if (x->encap) {
++		struct xfrm_encap_tmpl *encap = x->encap;
++
++		switch (encap->encap_type) {
++		default:
++			goto error;
++		case UDP_ENCAP_ESPINUDP:
++			x->props.header_len += sizeof(struct udphdr);
++			break;
++		case UDP_ENCAP_ESPINUDP_NON_IKE:
++			x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
++			break;
++		}
++	}
++	x->data = esp;
++	x->props.trailer_len = esp4_get_max_size(x, 0) - x->props.header_len;
++	return 0;
++
++error:
++	x->data = esp;
++	esp_destroy(x);
++	x->data = NULL;
++	return -EINVAL;
++}
++
++static struct xfrm_type esp_type =
++{
++	.description	= "ESP4",
++	.owner		= THIS_MODULE,
++	.proto	     	= IPPROTO_ESP,
++	.init_state	= esp_init_state,
++	.destructor	= esp_destroy,
++	.get_max_size	= esp4_get_max_size,
++	.input		= esp_input,
++	.post_input	= esp_post_input,
++	.output		= esp_output
++};
++
++static struct inet_protocol esp4_protocol = {
++	.handler	=	xfrm4_rcv,
++	.err_handler	=	esp4_err,
++	.no_policy	=	1,
++};
++
++static int __init esp4_init(void)
++{
++	struct xfrm_decap_state decap;
++
++	if (sizeof(struct esp_decap_data)  <
++	    sizeof(decap.decap_data)) {
++		extern void decap_data_too_small(void);
++
++		decap_data_too_small();
++	}
++
++	if (xfrm_register_type(&esp_type, AF_INET) < 0) {
++		printk(KERN_INFO "ip esp init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++	if (inet_add_protocol(&esp4_protocol, IPPROTO_ESP) < 0) {
++		printk(KERN_INFO "ip esp init: can't add protocol\n");
++		xfrm_unregister_type(&esp_type, AF_INET);
++		return -EAGAIN;
++	}
++	return 0;
++}
++
++static void __exit esp4_fini(void)
++{
++	if (inet_del_protocol(&esp4_protocol, IPPROTO_ESP) < 0)
++		printk(KERN_INFO "ip esp close: can't remove protocol\n");
++	if (xfrm_unregister_type(&esp_type, AF_INET) < 0)
++		printk(KERN_INFO "ip esp close: can't remove xfrm type\n");
++}
++
++module_init(esp4_init);
++module_exit(esp4_fini);
++MODULE_LICENSE("GPL");
+diff -Nru a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
+--- a/net/ipv4/fib_frontend.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv4/fib_frontend.c	2005-02-13 21:25:10 +11:00
+@@ -144,17 +144,15 @@
+ 
+ struct net_device * ip_dev_find(u32 addr)
+ {
+-	struct rt_key key;
++	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
+ 	struct fib_result res;
+ 	struct net_device *dev = NULL;
+ 
+-	memset(&key, 0, sizeof(key));
+-	key.dst = addr;
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ 	res.r = NULL;
+ #endif
+ 
+-	if (!local_table || local_table->tb_lookup(local_table, &key, &res)) {
++	if (!local_table || local_table->tb_lookup(local_table, &fl, &res)) {
+ 		return NULL;
+ 	}
+ 	if (res.type != RTN_LOCAL)
+@@ -170,7 +168,7 @@
+ 
+ unsigned inet_addr_type(u32 addr)
+ {
+-	struct rt_key		key;
++	struct flowi		fl = { .nl_u = { .ip4_u = { .daddr = addr } } };
+ 	struct fib_result	res;
+ 	unsigned ret = RTN_BROADCAST;
+ 
+@@ -179,15 +177,13 @@
+ 	if (MULTICAST(addr))
+ 		return RTN_MULTICAST;
+ 
+-	memset(&key, 0, sizeof(key));
+-	key.dst = addr;
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ 	res.r = NULL;
+ #endif
+ 	
+ 	if (local_table) {
+ 		ret = RTN_UNICAST;
+-		if (local_table->tb_lookup(local_table, &key, &res) == 0) {
++		if (local_table->tb_lookup(local_table, &fl, &res) == 0) {
+ 			ret = res.type;
+ 			fib_res_put(&res);
+ 		}
+@@ -207,18 +203,15 @@
+ 			struct net_device *dev, u32 *spec_dst, u32 *itag)
+ {
+ 	struct in_device *in_dev;
+-	struct rt_key key;
++	struct flowi fl = { .nl_u = { .ip4_u =
++				      { .daddr = src,
++					.saddr = dst,
++					.tos = tos } },
++			    .iif = oif };
+ 	struct fib_result res;
+ 	int no_addr, rpf;
+ 	int ret;
+ 
+-	key.dst = src;
+-	key.src = dst;
+-	key.tos = tos;
+-	key.oif = 0;
+-	key.iif = oif;
+-	key.scope = RT_SCOPE_UNIVERSE;
+-
+ 	no_addr = rpf = 0;
+ 	read_lock(&inetdev_lock);
+ 	in_dev = __in_dev_get(dev);
+@@ -231,7 +224,7 @@
+ 	if (in_dev == NULL)
+ 		goto e_inval;
+ 
+-	if (fib_lookup(&key, &res))
++	if (fib_lookup(&fl, &res))
+ 		goto last_resort;
+ 	if (res.type != RTN_UNICAST)
+ 		goto e_inval_res;
+@@ -252,10 +245,10 @@
+ 		goto last_resort;
+ 	if (rpf)
+ 		goto e_inval;
+-	key.oif = dev->ifindex;
++	fl.oif = dev->ifindex;
+ 
+ 	ret = 0;
+-	if (fib_lookup(&key, &res) == 0) {
++	if (fib_lookup(&fl, &res) == 0) {
+ 		if (res.type == RTN_UNICAST) {
+ 			*spec_dst = FIB_RES_PREFSRC(res);
+ 			ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST;
+diff -Nru a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c
+--- a/net/ipv4/fib_hash.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/fib_hash.c	2005-02-13 21:25:09 +11:00
+@@ -290,7 +290,7 @@
+ }
+ 
+ static int
+-fn_hash_lookup(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
++fn_hash_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+ {
+ 	int err;
+ 	struct fn_zone *fz;
+@@ -299,7 +299,7 @@
+ 	read_lock(&fib_hash_lock);
+ 	for (fz = t->fn_zone_list; fz; fz = fz->fz_next) {
+ 		struct fib_node *f;
+-		fn_key_t k = fz_key(key->dst, fz);
++		fn_key_t k = fz_key(flp->fl4_dst, fz);
+ 
+ 		for (f = fz_chain(k, fz); f; f = f->fn_next) {
+ 			if (!fn_key_eq(k, f->fn_key)) {
+@@ -309,17 +309,17 @@
+ 					continue;
+ 			}
+ #ifdef CONFIG_IP_ROUTE_TOS
+-			if (f->fn_tos && f->fn_tos != key->tos)
++			if (f->fn_tos && f->fn_tos != flp->fl4_tos)
+ 				continue;
+ #endif
+ 			f->fn_state |= FN_S_ACCESSED;
+ 
+ 			if (f->fn_state&FN_S_ZOMBIE)
+ 				continue;
+-			if (f->fn_scope < key->scope)
++			if (f->fn_scope < flp->fl4_scope)
+ 				continue;
+ 
+-			err = fib_semantic_match(f->fn_type, FIB_INFO(f), key, res);
++			err = fib_semantic_match(f->fn_type, FIB_INFO(f), flp, res);
+ 			if (err == 0) {
+ 				res->type = f->fn_type;
+ 				res->scope = f->fn_scope;
+@@ -362,7 +362,7 @@
+ }
+ 
+ static void
+-fn_hash_select_default(struct fib_table *tb, const struct rt_key *key, struct fib_result *res)
++fn_hash_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
+ {
+ 	int order, last_idx;
+ 	struct fib_node *f;
+diff -Nru a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
+--- a/net/ipv4/fib_rules.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/fib_rules.c	2005-02-13 21:25:09 +11:00
+@@ -307,28 +307,28 @@
+ 	}
+ }
+ 
+-int fib_lookup(const struct rt_key *key, struct fib_result *res)
++int fib_lookup(const struct flowi *flp, struct fib_result *res)
+ {
+ 	int err;
+ 	struct fib_rule *r, *policy;
+ 	struct fib_table *tb;
+ 
+-	u32 daddr = key->dst;
+-	u32 saddr = key->src;
++	u32 daddr = flp->fl4_dst;
++	u32 saddr = flp->fl4_src;
+ 
+ FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ",
+-	NIPQUAD(key->dst), NIPQUAD(key->src));
++	NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src));
+ 	read_lock(&fib_rules_lock);
+ 	for (r = fib_rules; r; r=r->r_next) {
+ 		if (((saddr^r->r_src) & r->r_srcmask) ||
+ 		    ((daddr^r->r_dst) & r->r_dstmask) ||
+ #ifdef CONFIG_IP_ROUTE_TOS
+-		    (r->r_tos && r->r_tos != key->tos) ||
++		    (r->r_tos && r->r_tos != flp->fl4_tos) ||
+ #endif
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-		    (r->r_fwmark && r->r_fwmark != key->fwmark) ||
++		    (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) ||
+ #endif
+-		    (r->r_ifindex && r->r_ifindex != key->iif))
++		    (r->r_ifindex && r->r_ifindex != flp->iif))
+ 			continue;
+ 
+ FRprintk("tb %d r %d ", r->r_table, r->r_action);
+@@ -351,7 +351,7 @@
+ 
+ 		if ((tb = fib_get_table(r->r_table)) == NULL)
+ 			continue;
+-		err = tb->tb_lookup(tb, key, res);
++		err = tb->tb_lookup(tb, flp, res);
+ 		if (err == 0) {
+ 			res->r = policy;
+ 			if (policy)
+@@ -369,13 +369,13 @@
+ 	return -ENETUNREACH;
+ }
+ 
+-void fib_select_default(const struct rt_key *key, struct fib_result *res)
++void fib_select_default(const struct flowi *flp, struct fib_result *res)
+ {
+ 	if (res->r && res->r->r_action == RTN_UNICAST &&
+ 	    FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) {
+ 		struct fib_table *tb;
+ 		if ((tb = fib_get_table(res->r->r_table)) != NULL)
+-			tb->tb_select_default(tb, key, res);
++			tb->tb_select_default(tb, flp, res);
+ 	}
+ }
+ 
+diff -Nru a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
+--- a/net/ipv4/fib_semantics.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/fib_semantics.c	2005-02-13 21:25:09 +11:00
+@@ -349,7 +349,6 @@
+ 	int err;
+ 
+ 	if (nh->nh_gw) {
+-		struct rt_key key;
+ 		struct fib_result res;
+ 
+ #ifdef CONFIG_IP_ROUTE_PERVASIVE
+@@ -372,16 +371,18 @@
+ 			nh->nh_scope = RT_SCOPE_LINK;
+ 			return 0;
+ 		}
+-		memset(&key, 0, sizeof(key));
+-		key.dst = nh->nh_gw;
+-		key.oif = nh->nh_oif;
+-		key.scope = r->rtm_scope + 1;
+-
+-		/* It is not necessary, but requires a bit of thinking */
+-		if (key.scope < RT_SCOPE_LINK)
+-			key.scope = RT_SCOPE_LINK;
+-		if ((err = fib_lookup(&key, &res)) != 0)
+-			return err;
++		{
++			struct flowi fl = { .nl_u = { .ip4_u =
++						      { .daddr = nh->nh_gw,
++							.scope = r->rtm_scope + 1 } },
++					    .oif = nh->nh_oif };
++
++			/* It is not necessary, but requires a bit of thinking */
++			if (fl.fl4_scope < RT_SCOPE_LINK)
++				fl.fl4_scope = RT_SCOPE_LINK;
++			if ((err = fib_lookup(&fl, &res)) != 0)
++				return err;
++		}
+ 		err = -EINVAL;
+ 		if (res.type != RTN_UNICAST && res.type != RTN_LOCAL)
+ 			goto out;
+@@ -578,7 +579,7 @@
+ }
+ 
+ int 
+-fib_semantic_match(int type, struct fib_info *fi, const struct rt_key *key, struct fib_result *res)
++fib_semantic_match(int type, struct fib_info *fi, const struct flowi *flp, struct fib_result *res)
+ {
+ 	int err = fib_props[type].error;
+ 
+@@ -603,7 +604,7 @@
+ 			for_nexthops(fi) {
+ 				if (nh->nh_flags&RTNH_F_DEAD)
+ 					continue;
+-				if (!key->oif || key->oif == nh->nh_oif)
++				if (!flp->oif || flp->oif == nh->nh_oif)
+ 					break;
+ 			}
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+@@ -949,7 +950,7 @@
+    fair weighted route distribution.
+  */
+ 
+-void fib_select_multipath(const struct rt_key *key, struct fib_result *res)
++void fib_select_multipath(const struct flowi *flp, struct fib_result *res)
+ {
+ 	struct fib_info *fi = res->fi;
+ 	int w;
+diff -Nru a/net/ipv4/icmp.c b/net/ipv4/icmp.c
+--- a/net/ipv4/icmp.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv4/icmp.c	2005-02-13 21:25:10 +11:00
+@@ -101,7 +101,6 @@
+ 	int offset;
+ 	int data_len;
+ 
+-	unsigned int csum;
+ 	struct {
+ 		struct icmphdr icmph;
+ 		__u32	       times[3];
+@@ -139,8 +138,6 @@
+   { EHOSTUNREACH,	1 }	/*	ICMP_PREC_CUTOFF	*/
+ };
+ 
+-extern int sysctl_ip_default_ttl;
+-
+ /* Control parameters for ECHO replies. */
+ int sysctl_icmp_echo_ignore_all;
+ int sysctl_icmp_echo_ignore_broadcasts;
+@@ -281,39 +278,47 @@
+  *	Checksum each fragment, and on the first include the headers and final checksum.
+  */
+  
+-static int icmp_glue_bits(const void *p, char *to, unsigned int offset,
+-                          unsigned int fraglen, struct sk_buff *skb)
++int
++icmp_glue_bits(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
+ {
+-	struct icmp_bxm *icmp_param = (struct icmp_bxm *)p;
+-	struct icmphdr *icmph;
++	struct icmp_bxm *icmp_param = (struct icmp_bxm *)from;
+ 	unsigned int csum;
+ 
++	csum = skb_copy_and_csum_bits(icmp_param->skb,
++				      icmp_param->offset + offset,
++				      to, len, 0);
++
++	skb->csum = csum_block_add(skb->csum, csum, odd);
+ 	if (icmp_pointers[icmp_param->data.icmph.type].error)
+ 		nf_ct_attach(skb, icmp_param->skb);
++	return 0;
++}
+ 
+-	if (offset) {
+-		icmp_param->csum=skb_copy_and_csum_bits(icmp_param->skb,
+-							icmp_param->offset+(offset-icmp_param->head_len), 
+-							to, fraglen,icmp_param->csum);
+-		return 0;
+-	}
++static void
++icmp_push_reply(struct icmp_bxm *icmp_param, struct ipcm_cookie *ipc, struct rtable *rt)
++{
++	struct sk_buff *skb;
+ 
+-	/*
+-	 *	First fragment includes header. Note that we've done
+-	 *	the other fragments first, so that we get the checksum
+-	 *	for the whole packet here.
+-	 */
+-	csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
+-		to, icmp_param->head_len,
+-		icmp_param->csum);
+-	csum=skb_copy_and_csum_bits(icmp_param->skb,
+-				    icmp_param->offset, 
+-				    to+icmp_param->head_len,
+-				    fraglen-icmp_param->head_len,
+-				    csum);
+-	icmph=(struct icmphdr *)to;
+-	icmph->checksum = csum_fold(csum);
+-	return 0;
++	ip_append_data(icmp_socket->sk, icmp_glue_bits, icmp_param,
++		       icmp_param->data_len+icmp_param->head_len,
++		       icmp_param->head_len,
++		       ipc, rt, MSG_DONTWAIT);
++
++	if ((skb = skb_peek(&icmp_socket->sk->write_queue)) != NULL) {
++		struct icmphdr *icmph = skb->h.icmph;
++		unsigned int csum = 0;
++		struct sk_buff *skb1;
++
++		skb_queue_walk(&icmp_socket->sk->write_queue, skb1) {
++			csum = csum_add(csum, skb1->csum);
++		}
++		csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
++						 (char*)icmph, icmp_param->head_len,	
++						 csum);
++		icmph->checksum = csum_fold(csum);
++		skb->ip_summed = CHECKSUM_NONE;
++		ip_push_pending_frames(icmp_socket->sk);
++	}
+ }
+ 
+ /*
+@@ -334,11 +339,9 @@
+ 		return;
+ 
+ 	icmp_param->data.icmph.checksum=0;
+-	icmp_param->csum=0;
+ 	icmp_out_count(icmp_param->data.icmph.type);
+ 
+ 	sk->protinfo.af_inet.tos = skb->nh.iph->tos;
+-	sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
+ 	daddr = ipc.addr = rt->rt_src;
+ 	ipc.opt = NULL;
+ 	if (icmp_param->replyopts.optlen) {
+@@ -346,14 +349,18 @@
+ 		if (ipc.opt->srr)
+ 			daddr = icmp_param->replyopts.faddr;
+ 	}
+-	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
+-		goto out;
+-	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 
+-			       icmp_param->data.icmph.code)) { 
+-		ip_build_xmit(sk, icmp_glue_bits, icmp_param, 
+-			      icmp_param->data_len+icmp_param->head_len,
+-			      &ipc, rt, MSG_DONTWAIT);
++	{
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = daddr,
++						.saddr = rt->rt_spec_dst,
++						.tos = RT_TOS(skb->nh.iph->tos) } },
++				    .proto = IPPROTO_ICMP };
++		if (ip_route_output_key(&rt, &fl))
++			goto out;
+ 	}
++	if (icmpv4_xrlim_allow(rt, icmp_param->data.icmph.type, 
++			       icmp_param->data.icmph.code))
++		icmp_push_reply(icmp_param, &ipc, rt);
+ 	ip_rt_put(rt);
+ out:
+ 	icmp_xmit_unlock();
+@@ -450,8 +457,8 @@
+ 	 *	Restore original addresses if packet has been translated.
+ 	 */
+ 	if (rt->rt_flags&RTCF_NAT && IPCB(skb_in)->flags&IPSKB_TRANSLATED) {
+-		iph->daddr = rt->key.dst;
+-		iph->saddr = rt->key.src;
++		iph->daddr = rt->fl.fl4_dst;
++		iph->saddr = rt->fl.fl4_src;
+ 	}
+ #endif
+ 
+@@ -463,9 +470,14 @@
+ 		((iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL) :
+ 			iph->tos;
+ 
+-	if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
+-		goto out;
+-
++	{
++		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->saddr,
++							 .saddr = saddr,
++							 .tos = RT_TOS(tos) } },
++				    .proto = IPPROTO_ICMP };
++		if (ip_route_output_key(&rt, &fl))
++			goto out;
++	}
+ 	if (ip_options_echo(&icmp_param.replyopts, skb_in)) 
+ 		goto ende;
+ 
+@@ -478,17 +490,20 @@
+ 	icmp_param.data.icmph.code=code;
+ 	icmp_param.data.icmph.un.gateway = info;
+ 	icmp_param.data.icmph.checksum=0;
+-	icmp_param.csum=0;
+ 	icmp_param.skb=skb_in;
+ 	icmp_param.offset=skb_in->nh.raw - skb_in->data;
+ 	icmp_out_count(icmp_param.data.icmph.type);
+ 	icmp_socket->sk->protinfo.af_inet.tos = tos;
+-	icmp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
+ 	ipc.addr = iph->saddr;
+ 	ipc.opt = &icmp_param.replyopts;
+ 	if (icmp_param.replyopts.srr) {
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = icmp_param.replyopts.faddr,
++						.saddr = saddr,
++						.tos = RT_TOS(tos) } },
++				    .proto = IPPROTO_ICMP };
+ 		ip_rt_put(rt);
+-		if (ip_route_output(&rt, icmp_param.replyopts.faddr, saddr, RT_TOS(tos), 0))
++		if (ip_route_output_key(&rt, &fl))
+ 			goto out;
+ 	}
+ 
+@@ -497,7 +512,7 @@
+ 
+ 	/* RFC says return as much as we can without exceeding 576 bytes. */
+ 
+-	room = rt->u.dst.pmtu;
++	room = dst_pmtu(&rt->u.dst);
+ 	if (room > 576)
+ 		room = 576;
+ 	room -= sizeof(struct iphdr) + icmp_param.replyopts.optlen;
+@@ -508,9 +523,7 @@
+ 		icmp_param.data_len = room;
+ 	icmp_param.head_len = sizeof(struct icmphdr);
+ 
+-	ip_build_xmit(icmp_socket->sk, icmp_glue_bits, &icmp_param, 
+-		icmp_param.data_len+sizeof(struct icmphdr),
+-		&ipc, rt, MSG_DONTWAIT);
++	icmp_push_reply(&icmp_param, &ipc, rt);
+ 
+ ende:
+ 	ip_rt_put(rt);
+@@ -649,24 +662,10 @@
+ 	 *	we are OK.
+ 	 */
+ 
+-	ipprot = (struct inet_protocol *) inet_protos[hash];
+-	while (ipprot) {
+-		struct inet_protocol *nextip;
+-
+-		nextip = (struct inet_protocol *) ipprot->next;
+-	
+-		/* 
+-		 *	Pass it off to everyone who wants it. 
+-		 */
++	ipprot = inet_protos[hash];
++	if (ipprot && ipprot->err_handler)
++		ipprot->err_handler(skb, info);
+ 
+-		/* RFC1122: OK. Passes appropriate ICMP errors to the */
+-		/* appropriate protocol layer (MUST), as per 3.2.2. */
+-
+-		if (protocol == ipprot->protocol && ipprot->err_handler)
+- 			ipprot->err_handler(skb, info);
+-
+-		ipprot = nextip;
+-  	}
+ out:;
+ }
+ 
+@@ -995,7 +994,7 @@
+ 		icmp_socket_cpu(i)->sk->sndbuf =
+ 			(2 * ((64 * 1024) + sizeof(struct sk_buff)));
+ 
+-		icmp_socket_cpu(i)->sk->protinfo.af_inet.ttl = MAXTTL;
++		icmp_socket_cpu(i)->sk->protinfo.af_inet.uc_ttl = -1;
+ 		icmp_socket_cpu(i)->sk->protinfo.af_inet.pmtudisc = IP_PMTUDISC_DONT;
+ 
+ 		/* Unhash it so that IP input processing does not even
+diff -Nru a/net/ipv4/igmp.c b/net/ipv4/igmp.c
+--- a/net/ipv4/igmp.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv4/igmp.c	2005-02-13 21:25:10 +11:00
+@@ -218,15 +218,6 @@
+ 
+ #define IGMP_SIZE (sizeof(struct igmphdr)+sizeof(struct iphdr)+4)
+ 
+-/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
+-   changes route */
+-static inline int
+-output_maybe_reroute(struct sk_buff *skb)
+-{
+-	return skb->dst->output(skb);
+-}
+-
+-
+ static int is_in(struct ip_mc_list *pmc, struct ip_sf_list *psf, int type,
+ 	int gdeleted, int sdeleted)
+ {
+@@ -283,13 +274,18 @@
+ 	u32	dst;
+ 
+ 	dst = IGMPV3_ALL_MCR;
+-	if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
+-		return 0;
++	{
++		struct flowi fl = { .oif = dev->ifindex,
++				    .nl_u = { .ip4_u = { .daddr = dst } },
++				    .proto = IPPROTO_IGMP };
++		if (ip_route_output_key(&rt, &fl))
++			return 0;
++	}
+ 	if (rt->rt_src == 0) {
+ 		ip_rt_put(rt);
+ 		return 0;
+ 	}
+-	skb = alloc_skb(size + dev->hard_header_len + 15, GFP_ATOMIC);
++	skb = alloc_skb(size + LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+ 	if (skb == NULL) {
+ 		ip_rt_put(rt);
+ 		return 0;
+@@ -298,7 +294,7 @@
+ 	skb->dst = &rt->u.dst;
+ 	skb->dev = dev;
+ 
+-	skb_reserve(skb, (dev->hard_header_len+15)&~15);
++	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ 
+ 	skb->nh.iph = pip =(struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+ 
+@@ -341,7 +337,7 @@
+ 	pig->csum = ip_compute_csum((void *)skb->h.igmph, igmplen);
+ 
+ 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, skb->dev,
+-		       output_maybe_reroute);
++		       dst_output);
+ }
+ 
+ static int grec_size(struct ip_mc_list *pmc, int type, int gdel, int sdel)
+@@ -623,14 +619,19 @@
+ 	else
+ 		dst = group;
+ 
+-	if (ip_route_output(&rt, dst, 0, 0, dev->ifindex))
+-		return -1;
++	{
++		struct flowi fl = { .oif = dev->ifindex,
++				    .nl_u = { .ip4_u = { .daddr = dst } },
++				    .proto = IPPROTO_IGMP };
++		if (ip_route_output_key(&rt, &fl))
++			return -1;
++	}
+ 	if (rt->rt_src == 0) {
+ 		ip_rt_put(rt);
+ 		return -1;
+ 	}
+ 
+-	skb=alloc_skb(IGMP_SIZE+dev->hard_header_len+15, GFP_ATOMIC);
++	skb=alloc_skb(IGMP_SIZE+LL_RESERVED_SPACE(dev), GFP_ATOMIC);
+ 	if (skb == NULL) {
+ 		ip_rt_put(rt);
+ 		return -1;
+@@ -638,7 +639,7 @@
+ 
+ 	skb->dst = &rt->u.dst;
+ 
+-	skb_reserve(skb, (dev->hard_header_len+15)&~15);
++	skb_reserve(skb, LL_RESERVED_SPACE(dev));
+ 
+ 	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, sizeof(struct iphdr)+4);
+ 
+@@ -666,7 +667,7 @@
+ 	ih->csum=ip_compute_csum((void *)ih, sizeof(struct igmphdr));
+ 
+ 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+-		       output_maybe_reroute);
++		       dst_output);
+ }
+ 
+ static void igmp_gq_timer_expire(unsigned long data)
+@@ -874,7 +875,7 @@
+ 	case IGMPV2_HOST_MEMBERSHIP_REPORT:
+ 	case IGMPV3_HOST_MEMBERSHIP_REPORT:
+ 		/* Is it our report looped back? */
+-		if (((struct rtable*)skb->dst)->key.iif == 0)
++		if (((struct rtable*)skb->dst)->fl.iif == 0)
+ 			break;
+ 		igmp_heard_report(in_dev, ih->group);
+ 		break;
+@@ -1283,6 +1284,8 @@
+ 
+ static struct in_device * ip_mc_find_dev(struct ip_mreqn *imr)
+ {
++	struct flowi fl = { .nl_u = { .ip4_u =
++				      { .daddr = imr->imr_multiaddr.s_addr } } };
+ 	struct rtable *rt;
+ 	struct net_device *dev = NULL;
+ 	struct in_device *idev = NULL;
+@@ -1300,7 +1303,7 @@
+ 		__dev_put(dev);
+ 	}
+ 
+-	if (!dev && !ip_route_output(&rt, imr->imr_multiaddr.s_addr, 0, 0, 0)) {
++	if (!dev && !ip_route_output_key(&rt, &fl)) {
+ 		dev = rt->u.dst.dev;
+ 		ip_rt_put(rt);
+ 	}
+diff -Nru a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c
+--- a/net/ipv4/ip_forward.c	2005-02-13 21:25:08 +11:00
++++ b/net/ipv4/ip_forward.c	2005-02-13 21:25:08 +11:00
+@@ -40,6 +40,7 @@
+ #include <net/checksum.h>
+ #include <linux/route.h>
+ #include <net/route.h>
++#include <net/xfrm.h>
+ 
+ static inline int ip_forward_finish(struct sk_buff *skb)
+ {
+@@ -47,36 +48,20 @@
+ 
+ 	IP_INC_STATS_BH(IpForwDatagrams);
+ 
+-	if (opt->optlen == 0) {
+-#ifdef CONFIG_NET_FASTROUTE
+-		struct rtable *rt = (struct rtable*)skb->dst;
+-
+-		if (rt->rt_flags&RTCF_FAST && !netdev_fastroute_obstacles) {
+-			struct dst_entry *old_dst;
+-			unsigned h = ((*(u8*)&rt->key.dst)^(*(u8*)&rt->key.src))&NETDEV_FASTROUTE_HMASK;
+-
+-			write_lock_irq(&skb->dev->fastpath_lock);
+-			old_dst = skb->dev->fastpath[h];
+-			skb->dev->fastpath[h] = dst_clone(&rt->u.dst);
+-			write_unlock_irq(&skb->dev->fastpath_lock);
+-
+-			dst_release(old_dst);
+-		}
+-#endif
+-		return (ip_send(skb));
+-	}
++	if (unlikely(opt->optlen))
++		ip_forward_options(skb);
+ 
+-	ip_forward_options(skb);
+-	return (ip_send(skb));
++	return dst_output(skb);
+ }
+ 
+ int ip_forward(struct sk_buff *skb)
+ {
+-	struct net_device *dev2;	/* Output device */
+ 	struct iphdr *iph;	/* Our header */
+ 	struct rtable *rt;	/* Route we use */
+ 	struct ip_options * opt	= &(IPCB(skb)->opt);
+-	unsigned short mtu;
++
++	if (!xfrm4_policy_check(NULL, XFRM_POLICY_FWD, skb))
++		goto drop;
+ 
+ 	if (IPCB(skb)->opt.router_alert && ip_call_ra_chain(skb))
+ 		return NET_RX_SUCCESS;
+@@ -93,32 +78,21 @@
+ 	 */
+ 
+ 	iph = skb->nh.iph;
+-	rt = (struct rtable*)skb->dst;
+ 
+ 	if (iph->ttl <= 1)
+                 goto too_many_hops;
+ 
+-	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
+-                goto sr_failed;
+-
+-	/*
+-	 *	Having picked a route we can now send the frame out
+-	 *	after asking the firewall permission to do so.
+-	 */
++	if (!xfrm4_route_forward(skb))
++		goto drop;
+ 
+-	skb->priority = rt_tos2priority(iph->tos);
+-	dev2 = rt->u.dst.dev;
+-	mtu = rt->u.dst.pmtu;
++	iph = skb->nh.iph;
++	rt = (struct rtable*)skb->dst;
+ 
+-	/*
+-	 *	We now generate an ICMP HOST REDIRECT giving the route
+-	 *	we calculated.
+-	 */
+-	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr)
+-		ip_rt_send_redirect(skb);
++	if (opt->is_strictroute && rt->rt_dst != rt->rt_gateway)
++		goto sr_failed;
+ 
+ 	/* We are about to mangle packet. Copy it! */
+-	if (skb_cow(skb, dev2->hard_header_len))
++	if (skb_cow(skb, LL_RESERVED_SPACE(rt->u.dst.dev)+rt->u.dst.header_len))
+ 		goto drop;
+ 	iph = skb->nh.iph;
+ 
+@@ -126,29 +100,16 @@
+ 	ip_decrease_ttl(iph);
+ 
+ 	/*
+-	 * We now may allocate a new buffer, and copy the datagram into it.
+-	 * If the indicated interface is up and running, kick it.
++	 *	We now generate an ICMP HOST REDIRECT giving the route
++	 *	we calculated.
+ 	 */
++	if (rt->rt_flags&RTCF_DOREDIRECT && !opt->srr)
++		ip_rt_send_redirect(skb);
+ 
+-	if (skb->len > mtu && (ntohs(iph->frag_off) & IP_DF))
+-		goto frag_needed;
+-
+-#ifdef CONFIG_IP_ROUTE_NAT
+-	if (rt->rt_flags & RTCF_NAT) {
+-		if (ip_do_nat(skb)) {
+-			kfree_skb(skb);
+-			return NET_RX_BAD;
+-		}
+-	}
+-#endif
++	skb->priority = rt_tos2priority(iph->tos);
+ 
+-	return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, dev2,
++	return NF_HOOK(PF_INET, NF_IP_FORWARD, skb, skb->dev, rt->u.dst.dev,
+ 		       ip_forward_finish);
+-
+-frag_needed:
+-	IP_INC_STATS_BH(IpFragFails);
+-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+-        goto drop;
+ 
+ sr_failed:
+         /*
+diff -Nru a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
+--- a/net/ipv4/ip_gre.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ip_gre.c	2005-02-13 21:25:09 +11:00
+@@ -37,6 +37,7 @@
+ #include <net/arp.h>
+ #include <net/checksum.h>
+ #include <net/inet_ecn.h>
++#include <net/xfrm.h>
+ 
+ #ifdef CONFIG_IPV6
+ #include <net/ipv6.h>
+@@ -410,6 +411,7 @@
+ 	u16 flags;
+ 	int grehlen = (iph->ihl<<2) + 4;
+ 	struct sk_buff *skb2;
++	struct flowi fl;
+ 	struct rtable *rt;
+ 
+ 	if (p[1] != htons(ETH_P_IP))
+@@ -486,7 +488,11 @@
+ 	skb2->nh.raw = skb2->data;
+ 
+ 	/* Try to guess incoming interface */
+-	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
++	memset(&fl, 0, sizeof(fl));
++	fl.fl4_dst = eiph->saddr;
++	fl.fl4_tos = RT_TOS(eiph->tos);
++	fl.proto = IPPROTO_GRE;
++	if (ip_route_output_key(&rt, &fl)) {
+ 		kfree_skb(skb2);
+ 		return;
+ 	}
+@@ -496,7 +502,10 @@
+ 	if (rt->rt_flags&RTCF_LOCAL) {
+ 		ip_rt_put(rt);
+ 		rt = NULL;
+-		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
++		fl.fl4_dst = eiph->daddr;
++		fl.fl4_src = eiph->saddr;
++		fl.fl4_tos = eiph->tos;
++		if (ip_route_output_key(&rt, &fl) ||
+ 		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
+ 			ip_rt_put(rt);
+ 			kfree_skb(skb2);
+@@ -513,11 +522,11 @@
+ 
+ 	/* change mtu on this route */
+ 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+-		if (rel_info > skb2->dst->pmtu) {
++		if (rel_info > dst_pmtu(skb2->dst)) {
+ 			kfree_skb(skb2);
+ 			return;
+ 		}
+-		skb2->dst->pmtu = rel_info;
++		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+ 		rel_info = htonl(rel_info);
+ 	} else if (type == ICMP_TIME_EXCEEDED) {
+ 		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+@@ -606,6 +615,8 @@
+ 
+ 	read_lock(&ipgre_lock);
+ 	if ((tunnel = ipgre_tunnel_lookup(iph->saddr, iph->daddr, key)) != NULL) {
++		secpath_reset(skb);
++
+ 		skb->mac.raw = skb->nh.raw;
+ 		skb->nh.raw = __pskb_pull(skb, offset);
+ 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+@@ -617,7 +628,7 @@
+ #ifdef CONFIG_NET_IPGRE_BROADCAST
+ 		if (MULTICAST(iph->daddr)) {
+ 			/* Looped back packet, drop it! */
+-			if (((struct rtable*)skb->dst)->key.iif == 0)
++			if (((struct rtable*)skb->dst)->fl.iif == 0)
+ 				goto drop;
+ 			tunnel->stat.multicast++;
+ 			skb->pkt_type = PACKET_BROADCAST;
+@@ -659,12 +670,6 @@
+ 	return(0);
+ }
+ 
+-/* Need this wrapper because NF_HOOK takes the function address */
+-static inline int do_ip_send(struct sk_buff *skb)
+-{
+-	return ip_send(skb);
+-}
+-
+ static int ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
+ {
+ 	struct ip_tunnel *tunnel = (struct ip_tunnel*)dev->priv;
+@@ -741,9 +746,17 @@
+ 		tos &= ~1;
+ 	}
+ 
+-	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+-		tunnel->stat.tx_carrier_errors++;
+-		goto tx_error;
++	{
++		struct flowi fl = { .oif = tunnel->parms.link,
++				    .nl_u = { .ip4_u =
++					      { .daddr = dst,
++						.saddr = tiph->saddr,
++						.tos = RT_TOS(tos) } },
++				    .proto = IPPROTO_GRE };
++		if (ip_route_output_key(&rt, &fl)) {
++			tunnel->stat.tx_carrier_errors++;
++			goto tx_error;
++		}
+ 	}
+ 	tdev = rt->u.dst.dev;
+ 
+@@ -755,14 +768,14 @@
+ 
+ 	df = tiph->frag_off;
+ 	if (df)
+-		mtu = rt->u.dst.pmtu - tunnel->hlen;
++		mtu = dst_pmtu(&rt->u.dst) - tunnel->hlen;
+ 	else
+-		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
++		mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
+ 
+-	if (skb->protocol == htons(ETH_P_IP)) {
+-		if (skb->dst && mtu < skb->dst->pmtu && mtu >= 68)
+-			skb->dst->pmtu = mtu;
++	if (skb->dst)
++		skb->dst->ops->update_pmtu(skb->dst, mtu);
+ 
++	if (skb->protocol == htons(ETH_P_IP)) {
+ 		df |= (old_iph->frag_off&htons(IP_DF));
+ 
+ 		if ((old_iph->frag_off&htons(IP_DF)) &&
+@@ -776,11 +789,11 @@
+ 	else if (skb->protocol == htons(ETH_P_IPV6)) {
+ 		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
+ 
+-		if (rt6 && mtu < rt6->u.dst.pmtu && mtu >= IPV6_MIN_MTU) {
++		if (rt6 && mtu < dst_pmtu(skb->dst) && mtu >= IPV6_MIN_MTU) {
+ 			if ((tunnel->parms.iph.daddr && !MULTICAST(tunnel->parms.iph.daddr)) ||
+ 			    rt6->rt6i_dst.plen == 128) {
+ 				rt6->rt6i_flags |= RTF_MODIFIED;
+-				skb->dst->pmtu = mtu;
++				skb->dst->metrics[RTAX_MTU-1] = mtu;
+ 			}
+ 		}
+ 
+@@ -801,7 +814,7 @@
+ 			tunnel->err_count = 0;
+ 	}
+ 
+-	max_headroom = ((tdev->hard_header_len+15)&~15)+ gre_hlen;
++	max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen;
+ 
+ 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+ 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+@@ -846,7 +859,7 @@
+ 			iph->ttl = ((struct ipv6hdr*)old_iph)->hop_limit;
+ #endif
+ 		else
+-			iph->ttl = sysctl_ip_default_ttl;
++			iph->ttl = dst_metric(&rt->u.dst, RTAX_HOPLIMIT);
+ 	}
+ 
+ 	((u16*)(iph+1))[0] = tunnel->parms.o_flags;
+@@ -1090,10 +1103,14 @@
+ 
+ 	MOD_INC_USE_COUNT;
+ 	if (MULTICAST(t->parms.iph.daddr)) {
++		struct flowi fl = { .oif = t->parms.link,
++				    .nl_u = { .ip4_u =
++					      { .daddr = t->parms.iph.daddr,
++						.saddr = t->parms.iph.saddr,
++						.tos = RT_TOS(t->parms.iph.tos) } },
++				    .proto = IPPROTO_GRE };
+ 		struct rtable *rt;
+-		if (ip_route_output(&rt, t->parms.iph.daddr,
+-				    t->parms.iph.saddr, RT_TOS(t->parms.iph.tos), 
+-				    t->parms.link)) {
++		if (ip_route_output_key(&rt, &fl)) {
+ 			MOD_DEC_USE_COUNT;
+ 			return -EADDRNOTAVAIL;
+ 		}
+@@ -1163,8 +1180,14 @@
+ 	/* Guess output device to choose reasonable mtu and hard_header_len */
+ 
+ 	if (iph->daddr) {
++		struct flowi fl = { .oif = tunnel->parms.link,
++				    .nl_u = { .ip4_u =
++					      { .daddr = iph->daddr,
++						.saddr = iph->saddr,
++						.tos = RT_TOS(iph->tos) } },
++				    .proto = IPPROTO_GRE };
+ 		struct rtable *rt;
+-		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
++		if (!ip_route_output_key(&rt, &fl)) {
+ 			tdev = rt->u.dst.dev;
+ 			ip_rt_put(rt);
+ 		}
+@@ -1245,13 +1268,8 @@
+ 
+ 
+ static struct inet_protocol ipgre_protocol = {
+-  ipgre_rcv,             /* GRE handler          */
+-  ipgre_err,             /* TUNNEL error control */
+-  0,                    /* next                 */
+-  IPPROTO_GRE,          /* protocol ID          */
+-  0,                    /* copy                 */
+-  NULL,                 /* data                 */
+-  "GRE"                 /* name                 */
++	.handler	=	ipgre_rcv,
++	.err_handler	=	ipgre_err,
+ };
+ 
+ 
+@@ -1267,9 +1285,13 @@
+ {
+ 	printk(KERN_INFO "GRE over IPv4 tunneling driver\n");
+ 
++	if (inet_add_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) {
++		printk(KERN_INFO "ipgre init: can't add protocol\n");
++		return -EAGAIN;
++	}
++
+ 	ipgre_fb_tunnel_dev.priv = (void*)&ipgre_fb_tunnel;
+ 	register_netdev(&ipgre_fb_tunnel_dev);
+-	inet_add_protocol(&ipgre_protocol);
+ 	return 0;
+ }
+ 
+@@ -1277,7 +1299,7 @@
+ 
+ void cleanup_module(void)
+ {
+-	if ( inet_del_protocol(&ipgre_protocol) < 0 )
++	if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0)
+ 		printk(KERN_INFO "ipgre close: can't remove protocol\n");
+ 
+ 	unregister_netdev(&ipgre_fb_tunnel_dev);
+diff -Nru a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
+--- a/net/ipv4/ip_input.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ip_input.c	2005-02-13 21:25:09 +11:00
+@@ -141,6 +141,7 @@
+ #include <net/raw.h>
+ #include <net/checksum.h>
+ #include <linux/netfilter_ipv4.h>
++#include <net/xfrm.h>
+ #include <linux/mroute.h>
+ #include <linux/netlink.h>
+ 
+@@ -194,34 +195,13 @@
+ 	return 0;
+ }
+ 
+-/* Handle this out of line, it is rare. */
+-static int ip_run_ipprot(struct sk_buff *skb, struct iphdr *iph,
+-			 struct inet_protocol *ipprot, int force_copy)
+-{
+-	int ret = 0;
+-
+-	do {
+-		if (ipprot->protocol == iph->protocol) {
+-			struct sk_buff *skb2 = skb;
+-			if (ipprot->copy || force_copy)
+-				skb2 = skb_clone(skb, GFP_ATOMIC);
+-			if(skb2 != NULL) {
+-				ret = 1;
+-				ipprot->handler(skb2);
+-			}
+-		}
+-		ipprot = (struct inet_protocol *) ipprot->next;
+-	} while(ipprot != NULL);
+-
+-	return ret;
+-}
+-
+ static inline int ip_local_deliver_finish(struct sk_buff *skb)
+ {
+ 	int ihl = skb->nh.iph->ihl*4;
+ 
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	nf_debug_ip_local_deliver(skb);
++	skb->nf_debug = 0;
+ #endif /*CONFIG_NETFILTER_DEBUG*/
+ 
+ 	__skb_pull(skb, ihl);
+@@ -236,44 +216,40 @@
+ 	{
+ 		/* Note: See raw.c and net/raw.h, RAWV4_HTABLE_SIZE==MAX_INET_PROTOS */
+ 		int protocol = skb->nh.iph->protocol;
+-		int hash = protocol & (MAX_INET_PROTOS - 1);
+-		struct sock *raw_sk = raw_v4_htable[hash];
++		int hash;
++		struct sock *raw_sk;
+ 		struct inet_protocol *ipprot;
+-		int flag;
++
++	resubmit:
++		hash = protocol & (MAX_INET_PROTOS - 1);
++		raw_sk = raw_v4_htable[hash];
+ 
+ 		/* If there maybe a raw socket we must check - if not we
+ 		 * don't care less
+ 		 */
+-		if(raw_sk != NULL)
+-			raw_sk = raw_v4_input(skb, skb->nh.iph, hash);
++		if (raw_sk)
++			raw_v4_input(skb, skb->nh.iph, hash);
+ 
+-		ipprot = (struct inet_protocol *) inet_protos[hash];
+-		flag = 0;
+-		if(ipprot != NULL) {
+-			if(raw_sk == NULL &&
+-			   ipprot->next == NULL &&
+-			   ipprot->protocol == protocol) {
+-				int ret;
+-
+-				/* Fast path... */
+-				ret = ipprot->handler(skb);
+-
+-				return ret;
+-			} else {
+-				flag = ip_run_ipprot(skb, skb->nh.iph, ipprot, (raw_sk != NULL));
+-			}
+-		}
++		if ((ipprot = inet_protos[hash]) != NULL) {
++			int ret;
+ 
+-		/* All protocols checked.
+-		 * If this packet was a broadcast, we may *not* reply to it, since that
+-		 * causes (proven, grin) ARP storms and a leakage of memory (i.e. all
+-		 * ICMP reply messages get queued up for transmission...)
+-		 */
+-		if(raw_sk != NULL) {	/* Shift to last raw user */
+-			raw_rcv(raw_sk, skb);
+-			sock_put(raw_sk);
+-		} else if (!flag) {		/* Free and report errors */
+-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);	
++			if (!ipprot->no_policy &&
++			    !xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
++				kfree_skb(skb);
++				return 0;
++			}
++			ret = ipprot->handler(skb);
++			if (ret < 0) {
++				protocol = -ret;
++				goto resubmit;
++			}
++		} else {
++			if (!raw_sk) {
++				if (xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
++					icmp_send(skb, ICMP_DEST_UNREACH,
++						  ICMP_PROT_UNREACH, 0);
++				}
++			}
+ 			kfree_skb(skb);
+ 		}
+ 	}
+@@ -361,7 +337,7 @@
+ 		}
+ 	}
+ 
+-	return skb->dst->input(skb);
++	return dst_input(skb);
+ 
+ inhdr_error:
+ 	IP_INC_STATS_BH(IpInHdrErrors);
+diff -Nru a/net/ipv4/ip_nat_dumb.c b/net/ipv4/ip_nat_dumb.c
+--- a/net/ipv4/ip_nat_dumb.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ip_nat_dumb.c	2005-02-13 21:25:09 +11:00
+@@ -117,23 +117,23 @@
+ 			if (rt->rt_flags&RTCF_SNAT) {
+ 				if (ciph->daddr != osaddr) {
+ 					struct   fib_result res;
+-					struct   rt_key key;
+ 					unsigned flags = 0;
+-
+-					key.src = ciph->daddr;
+-					key.dst = ciph->saddr;
+-					key.iif = skb->dev->ifindex;
+-					key.oif = 0;
++					struct flowi fl = {
++						.iif = skb->dev->ifindex,
++						.nl_u =
++						{ .ip4_u =
++						  { .daddr = ciph->saddr,
++						    .saddr = ciph->daddr,
+ #ifdef CONFIG_IP_ROUTE_TOS
+-					key.tos = RT_TOS(ciph->tos);
+-#endif
+-#ifdef CONFIG_IP_ROUTE_FWMARK
+-					key.fwmark = 0;
++						    .tos = RT_TOS(ciph->tos)
+ #endif
++						  } },
++						.proto = ciph->protocol };
++
+ 					/* Use fib_lookup() until we get our own
+ 					 * hash table of NATed hosts -- Rani
+ 				 	 */
+-					if (fib_lookup(&key, &res) == 0) {
++					if (fib_lookup(&fl, &res) == 0) {
+ 						if (res.r) {
+ 							ciph->daddr = fib_rules_policy(ciph->daddr, &res, &flags);
+ 							if (ciph->daddr != idaddr)
+diff -Nru a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
+--- a/net/ipv4/ip_output.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv4/ip_output.c	2005-02-13 21:25:10 +11:00
+@@ -15,6 +15,7 @@
+  *		Stefan Becker, <stefanb at yello.ping.de>
+  *		Jorge Cwik, <jorge at laser.satlink.net>
+  *		Arnt Gulbrandsen, <agulbra at nvg.unit.no>
++ *		Hirokazu Takahashi, <taka at valinux.co.jp>
+  *
+  *	See ip_input.c for original log
+  *
+@@ -38,6 +39,9 @@
+  *		Marc Boucher	:	When call_out_firewall returns FW_QUEUE,
+  *					silently drop skb instead of failing with -EPERM.
+  *		Detlev Wengorz	:	Copy protocol for fragments.
++ *		Hirokazu Takahashi:	HW checksumming for outgoing UDP
++ *					datagrams.
++ *		Hirokazu Takahashi:	sendfile() on UDP works now.
+  */
+ 
+ #include <asm/uaccess.h>
+@@ -108,16 +112,18 @@
+ 	return 0;
+ }
+ 
+-/* Don't just hand NF_HOOK skb->dst->output, in case netfilter hook
+-   changes route */
+-static inline int
+-output_maybe_reroute(struct sk_buff *skb)
++static inline int ip_select_ttl(struct inet_opt *inet, struct dst_entry *dst)
+ {
+-	return skb->dst->output(skb);
++	int ttl = inet->uc_ttl;
++
++	if (ttl < 0)
++		ttl = dst_metric(dst, RTAX_HOPLIMIT);
++	return ttl;
+ }
+ 
+ /* 
+  *		Add an ip header to a skbuff and send it out.
++ *
+  */
+ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk,
+ 			  u32 saddr, u32 daddr, struct ip_options *opt)
+@@ -138,7 +144,7 @@
+ 		iph->frag_off = htons(IP_DF);
+ 	else
+ 		iph->frag_off = 0;
+-	iph->ttl      = sk->protinfo.af_inet.ttl;
++	iph->ttl      = ip_select_ttl(&sk->protinfo.af_inet, &rt->u.dst);
+ 	iph->daddr    = rt->rt_dst;
+ 	iph->saddr    = rt->rt_src;
+ 	iph->protocol = sk->protocol;
+@@ -152,15 +158,34 @@
+ 	}
+ 	ip_send_check(iph);
+ 
++	skb->priority = sk->priority;
++
+ 	/* Send it out. */
+ 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+-		       output_maybe_reroute);
++		       dst_output);
+ }
+ 
+ static inline int ip_finish_output2(struct sk_buff *skb)
+ {
+ 	struct dst_entry *dst = skb->dst;
+ 	struct hh_cache *hh = dst->hh;
++	struct net_device *dev = dst->dev;
++	int hh_len = LL_RESERVED_SPACE(dev);
++
++	/* Be paranoid, rather than too clever. */
++	if (unlikely(skb_headroom(skb) < hh_len && dev->hard_header)) {
++		struct sk_buff *skb2;
++
++		skb2 = skb_realloc_headroom(skb, LL_RESERVED_SPACE(dev));
++		if (skb2 == NULL) {
++			kfree_skb(skb);
++			return -ENOMEM;
++		}
++		if (skb->sk)
++			skb_set_owner_w(skb2, skb->sk);
++		kfree_skb(skb);
++		skb = skb2;
++	}
+ 
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	nf_debug_ip_finish_output2(skb);
+@@ -184,7 +209,7 @@
+ 	return -EINVAL;
+ }
+ 
+-static __inline__ int __ip_finish_output(struct sk_buff *skb)
++int ip_finish_output(struct sk_buff *skb)
+ {
+ 	struct net_device *dev = skb->dst->dev;
+ 
+@@ -195,11 +220,6 @@
+ 		       ip_finish_output2);
+ }
+ 
+-int ip_finish_output(struct sk_buff *skb)
+-{
+-	return __ip_finish_output(skb);
+-}
+-
+ int ip_mc_output(struct sk_buff *skb)
+ {
+ 	struct sock *sk = skb->sk;
+@@ -210,10 +230,6 @@
+ 	 *	If the indicated interface is up and running, send the packet.
+ 	 */
+ 	IP_INC_STATS(IpOutRequests);
+-#ifdef CONFIG_IP_ROUTE_NAT
+-	if (rt->rt_flags & RTCF_NAT)
+-		ip_do_nat(skb);
+-#endif
+ 
+ 	skb->dev = dev;
+ 	skb->protocol = htons(ETH_P_IP);
+@@ -258,90 +274,26 @@
+ 				newskb->dev, ip_dev_loopback_xmit);
+ 	}
+ 
+-	return __ip_finish_output(skb);
++	if (skb->len > dst_pmtu(&rt->u.dst) || skb_shinfo(skb)->frag_list)
++		return ip_fragment(skb, ip_finish_output);
++	else
++		return ip_finish_output(skb);
+ }
+ 
+ int ip_output(struct sk_buff *skb)
+ {
+-#ifdef CONFIG_IP_ROUTE_NAT
+-	struct rtable *rt = (struct rtable*)skb->dst;
+-#endif
+-
+ 	IP_INC_STATS(IpOutRequests);
+ 
+-#ifdef CONFIG_IP_ROUTE_NAT
+-	if (rt->rt_flags&RTCF_NAT)
+-		ip_do_nat(skb);
++	if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list) &&
++#ifdef NETIF_F_TSO
++	    !skb_shinfo(skb)->tso_size
++#else
++	    1
+ #endif
+-
+-	return __ip_finish_output(skb);
+-}
+-
+-/* Queues a packet to be sent, and starts the transmitter if necessary.  
+- * This routine also needs to put in the total length and compute the 
+- * checksum.  We use to do this in two stages, ip_build_header() then
+- * this, but that scheme created a mess when routes disappeared etc.
+- * So we do it all here, and the TCP send engine has been changed to
+- * match. (No more unroutable FIN disasters, etc. wheee...)  This will
+- * most likely make other reliable transport layers above IP easier
+- * to implement under Linux.
+- */
+-static inline int ip_queue_xmit2(struct sk_buff *skb)
+-{
+-	struct sock *sk = skb->sk;
+-	struct rtable *rt = (struct rtable *)skb->dst;
+-	struct net_device *dev;
+-	struct iphdr *iph = skb->nh.iph;
+-
+-	dev = rt->u.dst.dev;
+-
+-	/* This can happen when the transport layer has segments queued
+-	 * with a cached route, and by the time we get here things are
+-	 * re-routed to a device with a different MTU than the original
+-	 * device.  Sick, but we must cover it.
+-	 */
+-	if (skb_headroom(skb) < dev->hard_header_len && dev->hard_header) {
+-		struct sk_buff *skb2;
+-
+-		skb2 = skb_realloc_headroom(skb, (dev->hard_header_len + 15) & ~15);
+-		kfree_skb(skb);
+-		if (skb2 == NULL)
+-			return -ENOMEM;
+-		if (sk)
+-			skb_set_owner_w(skb2, sk);
+-		skb = skb2;
+-		iph = skb->nh.iph;
+-	}
+-
+-	if (skb->len > rt->u.dst.pmtu)
+-		goto fragment;
+-
+-	ip_select_ident(iph, &rt->u.dst, sk);
+-
+-	/* Add an IP checksum. */
+-	ip_send_check(iph);
+-
+-	skb->priority = sk->priority;
+-	return skb->dst->output(skb);
+-
+-fragment:
+-	if (ip_dont_fragment(sk, &rt->u.dst)) {
+-		/* Reject packet ONLY if TCP might fragment
+-		 * it itself, if were careful enough.
+-		 */
+-		NETDEBUG(printk(KERN_DEBUG "sending pkt_too_big (len[%u] pmtu[%u]) to self\n",
+-				skb->len, rt->u.dst.pmtu));
+-
+-		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+-			  htonl(rt->u.dst.pmtu));
+-		kfree_skb(skb);
+-		return -EMSGSIZE;
+-	}
+-	ip_select_ident(iph, &rt->u.dst, sk);
+-	if (skb->ip_summed == CHECKSUM_HW &&
+-	    (skb = skb_checksum_help(skb)) == NULL)
+-		return -ENOMEM;
+-	return ip_fragment(skb, skb->dst->output);
++		)
++		return ip_fragment(skb, ip_finish_output);
++	else
++		return ip_finish_output(skb);
+ }
+ 
+ int ip_queue_xmit(struct sk_buff *skb, int ipfragok)
+@@ -350,6 +302,9 @@
+ 	struct ip_options *opt = sk->protinfo.af_inet.opt;
+ 	struct rtable *rt;
+ 	struct iphdr *iph;
++#ifdef NETIF_F_TSO
++	u32 mtu;
++#endif
+ 
+ 	/* Skip all of this if the packet is already routed,
+ 	 * f.e. by something like SCTP.
+@@ -368,14 +323,24 @@
+ 		if(opt && opt->srr)
+ 			daddr = opt->faddr;
+ 
+-		/* If this fails, retransmit mechanism of transport layer will
+-		 * keep trying until route appears or the connection times itself
+-		 * out.
+-		 */
+-		if (ip_route_output(&rt, daddr, sk->saddr,
+-				    RT_CONN_FLAGS(sk),
+-				    sk->bound_dev_if))
+-			goto no_route;
++		{
++			struct flowi fl = { .oif = sk->bound_dev_if,
++					    .nl_u = { .ip4_u =
++						      { .daddr = daddr,
++							.saddr = sk->saddr,
++							.tos = RT_CONN_FLAGS(sk) } },
++					    .proto = sk->protocol,
++					    .uli_u = { .ports =
++						       { .sport = sk->sport,
++							 .dport = sk->dport } } };
++
++			/* If this fails, retransmit mechanism of transport layer will
++			 * keep trying until route appears or the connection times
++			 * itself out.
++			 */
++			if (ip_route_output_flow(&rt, &fl, sk, 0))
++				goto no_route;
++		}
+ 		__sk_dst_set(sk, &rt->u.dst);
+ 		sk->route_caps = rt->u.dst.dev->features;
+ 	}
+@@ -393,7 +358,7 @@
+ 		iph->frag_off = htons(IP_DF);
+ 	else
+ 		iph->frag_off = 0;
+-	iph->ttl      = sk->protinfo.af_inet.ttl;
++	iph->ttl      = ip_select_ttl(&sk->protinfo.af_inet, &rt->u.dst);
+ 	iph->protocol = sk->protocol;
+ 	iph->saddr    = rt->rt_src;
+ 	iph->daddr    = rt->rt_dst;
+@@ -405,8 +370,30 @@
+ 		ip_options_build(skb, opt, sk->daddr, rt, 0);
+ 	}
+ 
++#ifdef NETIF_F_TSO
++	mtu = dst_pmtu(&rt->u.dst);
++ 	if (skb->len > mtu && (sk->route_caps&NETIF_F_TSO)) {
++		unsigned int hlen;
++
++		/* Hack zone: all this must be done by TCP. */
++		hlen = ((skb->h.raw - skb->data) + (skb->h.th->doff << 2));
++		skb_shinfo(skb)->tso_size = mtu - hlen;
++		skb_shinfo(skb)->tso_segs =
++			(skb->len - hlen + skb_shinfo(skb)->tso_size - 1)/
++				skb_shinfo(skb)->tso_size - 1;
++	}
++	ip_select_ident_more(iph, &rt->u.dst, sk, skb_shinfo(skb)->tso_segs);
++#else
++	ip_select_ident(iph, &rt->u.dst, sk);
++#endif
++
++	/* Add an IP checksum. */
++	ip_send_check(iph);
++
++	skb->priority = sk->priority;
++
+ 	return NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+-		       ip_queue_xmit2);
++		       dst_output);
+ 
+ no_route:
+ 	IP_INC_STATS(IpOutNoRoutes);
+@@ -414,338 +401,32 @@
+ 	return -EHOSTUNREACH;
+ }
+ 
+-/*
+- *	Build and send a packet, with as little as one copy
+- *
+- *	Doesn't care much about ip options... option length can be
+- *	different for fragment at 0 and other fragments.
+- *
+- *	Note that the fragment at the highest offset is sent first,
+- *	so the getfrag routine can fill in the TCP/UDP checksum header
+- *	field in the last fragment it sends... actually it also helps
+- * 	the reassemblers, they can put most packets in at the head of
+- *	the fragment queue, and they know the total size in advance. This
+- *	last feature will measurably improve the Linux fragment handler one
+- *	day.
+- *
+- *	The callback has five args, an arbitrary pointer (copy of frag),
+- *	the source IP address (may depend on the routing table), the 
+- *	destination address (char *), the offset to copy from, and the
+- *	length to be copied.
+- */
+-
+-static int ip_build_xmit_slow(struct sock *sk,
+-		  int getfrag (const void *,
+-			       char *,
+-			       unsigned int,	
+-			       unsigned int,
+-			       struct sk_buff *),
+-		  const void *frag,
+-		  unsigned length,
+-		  struct ipcm_cookie *ipc,
+-		  struct rtable *rt,
+-		  int flags)
+-{
+-	unsigned int fraglen, maxfraglen, fragheaderlen;
+-	int err;
+-	int offset, mf;
+-	int mtu;
+-	u16 id;
+-
+-	int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+-	int nfrags=0;
+-	struct ip_options *opt = ipc->opt;
+-	int df = 0;
+-
+-	mtu = rt->u.dst.pmtu;
+-	if (ip_dont_fragment(sk, &rt->u.dst))
+-		df = htons(IP_DF);
+-
+-	length -= sizeof(struct iphdr);
+-
+-	if (opt) {
+-		fragheaderlen = sizeof(struct iphdr) + opt->optlen;
+-		maxfraglen = ((mtu-sizeof(struct iphdr)-opt->optlen) & ~7) + fragheaderlen;
+-	} else {
+-		fragheaderlen = sizeof(struct iphdr);
+-
+-		/*
+-		 *	Fragheaderlen is the size of 'overhead' on each buffer. Now work
+-		 *	out the size of the frames to send.
+-		 */
+-
+-		maxfraglen = ((mtu-sizeof(struct iphdr)) & ~7) + fragheaderlen;
+-	}
+-
+-	if (length + fragheaderlen > 0xFFFF) {
+-		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
+-		return -EMSGSIZE;
+-	}
+-
+-	/*
+-	 *	Start at the end of the frame by handling the remainder.
+-	 */
+-
+-	offset = length - (length % (maxfraglen - fragheaderlen));
+-
+-	/*
+-	 *	Amount of memory to allocate for final fragment.
+-	 */
+-
+-	fraglen = length - offset + fragheaderlen;
+-
+-	if (length-offset==0) {
+-		fraglen = maxfraglen;
+-		offset -= maxfraglen-fragheaderlen;
+-	}
+-
+-	/*
+-	 *	The last fragment will not have MF (more fragments) set.
+-	 */
+-
+-	mf = 0;
+-
+-	/*
+-	 *	Don't fragment packets for path mtu discovery.
+-	 */
+-
+-	if (offset > 0 && sk->protinfo.af_inet.pmtudisc==IP_PMTUDISC_DO) { 
+-		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
+- 		return -EMSGSIZE;
+-	}
+-	if (flags&MSG_PROBE)
+-		goto out;
+-
+-	/*
+-	 *	Begin outputting the bytes.
+-	 */
+-
+-	id = sk->protinfo.af_inet.id++;
+-
+-	do {
+-		char *data;
+-		struct sk_buff * skb;
+-
+-		/*
+-		 *	Get the memory we require with some space left for alignment.
+-		 */
+-		if (!(flags & MSG_DONTWAIT) || nfrags == 0) {
+-			skb = sock_alloc_send_skb(sk, fraglen + hh_len + 15,
+-						  (flags & MSG_DONTWAIT), &err);
+-		} else {
+-			/* On a non-blocking write, we check for send buffer
+-			 * usage on the first fragment only.
+-			 */
+-			skb = sock_wmalloc(sk, fraglen + hh_len + 15, 1,
+-					   sk->allocation);
+-			if (!skb)
+-				err = -ENOBUFS;
+-		}
+-		if (skb == NULL)
+-			goto error;
+-
+-		/*
+-		 *	Fill in the control structures
+-		 */
+-
+-		skb->priority = sk->priority;
+-		skb->dst = dst_clone(&rt->u.dst);
+-		skb_reserve(skb, hh_len);
+-
+-		/*
+-		 *	Find where to start putting bytes.
+-		 */
+-
+-		data = skb_put(skb, fraglen);
+-		skb->nh.iph = (struct iphdr *)data;
+-
+-		/*
+-		 *	Only write IP header onto non-raw packets 
+-		 */
+-
+-		{
+-			struct iphdr *iph = (struct iphdr *)data;
+-
+-			iph->version = 4;
+-			iph->ihl = 5;
+-			if (opt) {
+-				iph->ihl += opt->optlen>>2;
+-				ip_options_build(skb, opt,
+-						 ipc->addr, rt, offset);
+-			}
+-			iph->tos = sk->protinfo.af_inet.tos;
+-			iph->tot_len = htons(fraglen - fragheaderlen + iph->ihl*4);
+-			iph->frag_off = htons(offset>>3)|mf|df;
+-			iph->id = id;
+-			if (!mf) {
+-				if (offset || !df) {
+-					/* Select an unpredictable ident only
+-					 * for packets without DF or having
+-					 * been fragmented.
+-					 */
+-					__ip_select_ident(iph, &rt->u.dst);
+-					id = iph->id;
+-				}
+-
+-				/*
+-				 *	Any further fragments will have MF set.
+-				 */
+-				mf = htons(IP_MF);
+-			}
+-			if (rt->rt_type == RTN_MULTICAST)
+-				iph->ttl = sk->protinfo.af_inet.mc_ttl;
+-			else
+-				iph->ttl = sk->protinfo.af_inet.ttl;
+-			iph->protocol = sk->protocol;
+-			iph->check = 0;
+-			iph->saddr = rt->rt_src;
+-			iph->daddr = rt->rt_dst;
+-			iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+-			data += iph->ihl*4;
+-		}
+-
+-		/*
+-		 *	User data callback
+-		 */
+-
+-		if (getfrag(frag, data, offset, fraglen-fragheaderlen, skb)) {
+-			err = -EFAULT;
+-			kfree_skb(skb);
+-			goto error;
+-		}
+-
+-		offset -= (maxfraglen-fragheaderlen);
+-		fraglen = maxfraglen;
+-
+-		nfrags++;
+-
+-		err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
+-			      skb->dst->dev, output_maybe_reroute);
+-		if (err) {
+-			if (err > 0)
+-				err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
+-			if (err)
+-				goto error;
+-		}
+-	} while (offset >= 0);
+-
+-	if (nfrags>1)
+-		ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
+-out:
+-	return 0;
+-
+-error:
+-	IP_INC_STATS(IpOutDiscards);
+-	if (nfrags>1)
+-		ip_statistics[smp_processor_id()*2 + !in_softirq()].IpFragCreates += nfrags;
+-	return err; 
+-}
+-
+-/*
+- *	Fast path for unfragmented packets.
+- */
+-int ip_build_xmit(struct sock *sk, 
+-		  int getfrag (const void *,
+-			       char *,
+-			       unsigned int,	
+-			       unsigned int,
+-			       struct sk_buff *),
+-		  const void *frag,
+-		  unsigned length,
+-		  struct ipcm_cookie *ipc,
+-		  struct rtable *rt,
+-		  int flags)
++static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
+ {
+-	int err;
+-	struct sk_buff *skb;
+-	int df;
+-	struct iphdr *iph;
+-
+-	/*
+-	 *	Try the simple case first. This leaves fragmented frames, and by
+-	 *	choice RAW frames within 20 bytes of maximum size(rare) to the long path
+-	 */
+-
+-	if (!sk->protinfo.af_inet.hdrincl) {
+-		length += sizeof(struct iphdr);
+-
+-		/*
+-		 * 	Check for slow path.
+-		 */
+-		if (length > rt->u.dst.pmtu || ipc->opt != NULL)  
+-			return ip_build_xmit_slow(sk,getfrag,frag,length,ipc,rt,flags); 
+-	} else {
+-		if (length > rt->u.dst.dev->mtu) {
+-			ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, rt->u.dst.dev->mtu);
+-			return -EMSGSIZE;
+-		}
+-	}
+-	if (flags&MSG_PROBE)
+-		goto out;
+-
+-	/*
+-	 *	Do path mtu discovery if needed.
+-	 */
+-	df = 0;
+-	if (ip_dont_fragment(sk, &rt->u.dst))
+-		df = htons(IP_DF);
+-
+-	/* 
+-	 *	Fast path for unfragmented frames without options. 
+-	 */ 
+-	{
+-	int hh_len = (rt->u.dst.dev->hard_header_len + 15)&~15;
+-
+-	skb = sock_alloc_send_skb(sk, length+hh_len+15,
+-				  flags&MSG_DONTWAIT, &err);
+-	if(skb==NULL)
+-		goto error; 
+-	skb_reserve(skb, hh_len);
+-	}
+-
+-	skb->priority = sk->priority;
+-	skb->dst = dst_clone(&rt->u.dst);
+-
+-	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+-
+-	if(!sk->protinfo.af_inet.hdrincl) {
+-		iph->version=4;
+-		iph->ihl=5;
+-		iph->tos=sk->protinfo.af_inet.tos;
+-		iph->tot_len = htons(length);
+-		iph->frag_off = df;
+-		iph->ttl=sk->protinfo.af_inet.mc_ttl;
+-		ip_select_ident(iph, &rt->u.dst, sk);
+-		if (rt->rt_type != RTN_MULTICAST)
+-			iph->ttl=sk->protinfo.af_inet.ttl;
+-		iph->protocol=sk->protocol;
+-		iph->saddr=rt->rt_src;
+-		iph->daddr=rt->rt_dst;
+-		iph->check=0;
+-		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+-		err = getfrag(frag, ((char *)iph)+iph->ihl*4,0, length-iph->ihl*4, skb);
+-	}
+-	else
+-		err = getfrag(frag, (void *)iph, 0, length, skb);
+-
+-	if (err)
+-		goto error_fault;
++	to->pkt_type = from->pkt_type;
++	to->priority = from->priority;
++	to->protocol = from->protocol;
++	to->security = from->security;
++	to->dst = dst_clone(from->dst);
++	to->dev = from->dev;
+ 
+-	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+-		      output_maybe_reroute);
+-	if (err > 0)
+-		err = sk->protinfo.af_inet.recverr ? net_xmit_errno(err) : 0;
+-	if (err)
+-		goto error;
+-out:
+-	return 0;
++	/* Copy the flags to each fragment. */
++	IPCB(to)->flags = IPCB(from)->flags;
+ 
+-error_fault:
+-	err = -EFAULT;
+-	kfree_skb(skb);
+-error:
+-	IP_INC_STATS(IpOutDiscards);
+-	return err; 
++#ifdef CONFIG_NET_SCHED
++	to->tc_index = from->tc_index;
++#endif
++#ifdef CONFIG_NETFILTER
++	to->nfmark = from->nfmark;
++	to->nfcache = from->nfcache;
++	/* Connection association is same as pre-frag packet */
++	nf_conntrack_put(to->nfct);
++	to->nfct = from->nfct;
++	nf_conntrack_get(to->nfct);
++#ifdef CONFIG_NETFILTER_DEBUG
++	to->nf_debug = from->nf_debug;
++#endif
++#endif
+ }
+ 
+ /*
+@@ -753,8 +434,6 @@
+  *	smaller pieces (each of size equal to IP header plus
+  *	a block of the data of the original IP data part) that will yet fit in a
+  *	single device frame, and queue such a frame for sending.
+- *
+- *	Yes this is inefficient, feel free to submit a quicker one.
+  */
+ 
+ int ip_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
+@@ -778,13 +457,111 @@
+ 
+ 	iph = skb->nh.iph;
+ 
++	if (unlikely((iph->frag_off & htons(IP_DF)) && !skb->local_df)) {
++		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
++			  htonl(dst_pmtu(&rt->u.dst)));
++		kfree_skb(skb);
++		return -EMSGSIZE;
++	}
++
+ 	/*
+ 	 *	Setup starting values.
+ 	 */
+ 
+ 	hlen = iph->ihl * 4;
++	mtu = dst_pmtu(&rt->u.dst) - hlen;	/* Size of data space */
++
++	/* When frag_list is given, use it. First, check its validity:
++	 * some transformers could create wrong frag_list or break existing
++	 * one, it is not prohibited. In this case fall back to copying.
++	 *
++	 * LATER: this step can be merged to real generation of fragments,
++	 * we can switch to copy when see the first bad fragment.
++	 */
++	if (skb_shinfo(skb)->frag_list) {
++		struct sk_buff *frag;
++		int first_len = skb_pagelen(skb);
++
++		if (first_len - hlen > mtu ||
++		    ((first_len - hlen) & 7) ||
++		    (iph->frag_off & htons(IP_MF|IP_OFFSET)) ||
++		    skb_cloned(skb))
++			goto slow_path;
++
++		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
++			/* Correct geometry. */
++			if (frag->len > mtu ||
++			    ((frag->len & 7) && frag->next) ||
++			    skb_headroom(frag) < hlen)
++			    goto slow_path;
++
++			/* Correct socket ownership. */
++			if (frag->sk == NULL)
++				goto slow_path;
++
++			/* Partially cloned skb? */
++			if (skb_shared(frag))
++				goto slow_path;
++		}
++
++		/* Everything is OK. Generate! */
++
++		err = 0;
++		offset = 0;
++		frag = skb_shinfo(skb)->frag_list;
++		skb_shinfo(skb)->frag_list = 0;
++		skb->data_len = first_len - skb_headlen(skb);
++		skb->len = first_len;
++		iph->tot_len = htons(first_len);
++		iph->frag_off |= htons(IP_MF);
++		ip_send_check(iph);
++
++		for (;;) {
++			/* Prepare header of the next frame,
++			 * before previous one went down. */
++			if (frag) {
++				frag->h.raw = frag->data;
++				frag->nh.raw = __skb_push(frag, hlen);
++				memcpy(frag->nh.raw, iph, hlen);
++				iph = frag->nh.iph;
++				iph->tot_len = htons(frag->len);
++				ip_copy_metadata(frag, skb);
++				if (offset == 0)
++					ip_options_fragment(frag);
++				offset += skb->len - hlen;
++				iph->frag_off = htons(offset>>3);
++				if (frag->next != NULL)
++					iph->frag_off |= htons(IP_MF);
++				/* Ready, complete checksum */
++				ip_send_check(iph);
++			}
++
++			err = output(skb);
++
++			if (err || !frag)
++				break;
++
++			skb = frag;
++			frag = skb->next;
++			skb->next = NULL;
++		}
++
++		if (err == 0) {
++			IP_INC_STATS(IpFragOKs);
++			return 0;
++		}
++
++		while (frag) {
++			skb = frag->next;
++			kfree_skb(frag);
++			frag = skb;
++		}
++		IP_INC_STATS(IpFragFails);
++		return err;
++	}
++
++slow_path:
+ 	left = skb->len - hlen;		/* Space per frame */
+-	mtu = rt->u.dst.pmtu - hlen;	/* Size of data space */
+ 	ptr = raw + hlen;		/* Where to start from */
+ 
+ 	/*
+@@ -812,7 +589,7 @@
+ 		 *	Allocate buffer.
+ 		 */
+ 
+-		if ((skb2 = alloc_skb(len+hlen+dev->hard_header_len+15,GFP_ATOMIC)) == NULL) {
++		if ((skb2 = alloc_skb(len+hlen+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
+ 			NETDEBUG(printk(KERN_INFO "IP: frag: no memory for new fragment!\n"));
+ 			err = -ENOMEM;
+ 			goto fail;
+@@ -822,14 +599,11 @@
+ 		 *	Set up data on packet
+ 		 */
+ 
+-		skb2->pkt_type = skb->pkt_type;
+-		skb2->priority = skb->priority;
+-		skb_reserve(skb2, (dev->hard_header_len+15)&~15);
++		ip_copy_metadata(skb2, skb);
++		skb_reserve(skb2, LL_RESERVED_SPACE(rt->u.dst.dev));
+ 		skb_put(skb2, len + hlen);
+ 		skb2->nh.raw = skb2->data;
+ 		skb2->h.raw = skb2->data + hlen;
+-		skb2->protocol = skb->protocol;
+-		skb2->security = skb->security;
+ 
+ 		/*
+ 		 *	Charge the memory for the fragment to any owner
+@@ -838,8 +612,6 @@
+ 
+ 		if (skb->sk)
+ 			skb_set_owner_w(skb2, skb->sk);
+-		skb2->dst = dst_clone(skb->dst);
+-		skb2->dev = skb->dev;
+ 
+ 		/*
+ 		 *	Copy the packet header into the new buffer.
+@@ -869,9 +641,6 @@
+ 		if (offset == 0)
+ 			ip_options_fragment(skb);
+ 
+-		/* Copy the flags to each fragment. */
+-		IPCB(skb2)->flags = IPCB(skb)->flags;
+-
+ 		/*
+ 		 *	Added AC : If we are fragmenting a fragment that's not the
+ 		 *		   last fragment then keep MF on each bit
+@@ -881,20 +650,6 @@
+ 		ptr += len;
+ 		offset += len;
+ 
+-#ifdef CONFIG_NET_SCHED
+-		skb2->tc_index = skb->tc_index;
+-#endif
+-#ifdef CONFIG_NETFILTER
+-		skb2->nfmark = skb->nfmark;
+-		skb2->nfcache = skb->nfcache;
+-		/* Connection association is same as pre-frag packet */
+-		skb2->nfct = skb->nfct;
+-		nf_conntrack_get(skb2->nfct);
+-#ifdef CONFIG_NETFILTER_DEBUG
+-		skb2->nf_debug = skb->nf_debug;
+-#endif
+-#endif
+-
+ 		/*
+ 		 *	Put this fragment into the sending queue.
+ 		 */
+@@ -919,40 +674,555 @@
+ 	return err;
+ }
+ 
++int
++ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
++{
++	struct iovec *iov = from;
++
++	if (skb->ip_summed == CHECKSUM_HW) {
++		if (memcpy_fromiovecend(to, iov, offset, len) < 0)
++			return -EFAULT;
++	} else {
++		unsigned int csum = 0;
++		if (csum_partial_copy_fromiovecend(to, iov, offset, len, &csum) < 0)
++			return -EFAULT;
++		skb->csum = csum_block_add(skb->csum, csum, odd);
++	}
++	return 0;
++}
++
++static inline int
++skb_can_coalesce(struct sk_buff *skb, int i, struct page *page, int off)
++{
++	if (i) {
++		skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
++		return page == frag->page &&
++			off == frag->page_offset+frag->size;
++	}
++	return 0;
++}
++
++static inline unsigned int
++csum_page(struct page *page, int offset, int copy)
++{
++	char *kaddr;
++	unsigned int csum;
++	kaddr = kmap(page);
++	csum = csum_partial(kaddr + offset, copy, 0);
++	kunmap(page);
++	return csum;
++}
++
+ /*
+- *	Fetch data from kernel space and fill in checksum if needed.
++ *	ip_append_data() and ip_append_page() can make one large IP datagram
++ *	from many pieces of data. Each pieces will be holded on the socket
++ *	until ip_push_pending_frames() is called. Eache pieces can be a page
++ *	or non-page data.
++ *	
++ *	Not only UDP, other transport protocols - e.g. raw sockets - can use
++ *	this interface potentially.
++ *
++ *	LATER: length must be adjusted by pad at tail, when it is required.
+  */
+-static int ip_reply_glue_bits(const void *dptr, char *to, unsigned int offset, 
+-			      unsigned int fraglen, struct sk_buff *skb)
++int ip_append_data(struct sock *sk,
++		   int getfrag(void *from, char *to, int offset, int len,
++			       int odd, struct sk_buff *skb),
++		   void *from, int length, int transhdrlen,
++		   struct ipcm_cookie *ipc, struct rtable *rt,
++		   unsigned int flags)
+ {
+-        struct ip_reply_arg *dp = (struct ip_reply_arg*)dptr;
+-	u16 *pktp = (u16 *)to;
+-	struct iovec *iov; 
+-	int len; 
+-	int hdrflag = 1; 
+-
+-	iov = &dp->iov[0]; 
+-	if (offset >= iov->iov_len) { 
+-		offset -= iov->iov_len;
+-		iov++; 
+-		hdrflag = 0; 
+-	}
+-	len = iov->iov_len - offset;
+-	if (fraglen > len) { /* overlapping. */ 
+-		dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, len,
+-					     dp->csum);
+-		offset = 0;
+-		fraglen -= len; 
+-		to += len; 
+-		iov++;
++	struct inet_opt *inet = inet_sk(sk);
++	struct sk_buff *skb;
++
++	struct ip_options *opt = NULL;
++	int hh_len;
++	int exthdrlen;
++	int mtu;
++	int copy;
++	int err;
++	int offset = 0;
++	unsigned int maxfraglen, fragheaderlen;
++	int csummode = CHECKSUM_NONE;
++
++	if (flags&MSG_PROBE)
++		return 0;
++
++	if (skb_queue_empty(&sk->write_queue)) {
++		/*
++		 * setup for corking.
++		 */
++		opt = ipc->opt;
++		if (opt) {
++			if (inet->cork.opt == NULL) {
++				inet->cork.opt = kmalloc(sizeof(struct ip_options)+40, sk->allocation);
++				if (unlikely(inet->cork.opt == NULL))
++					return -ENOBUFS;
++			}
++			memcpy(inet->cork.opt, opt, sizeof(struct ip_options)+opt->optlen);
++			inet->cork.flags |= IPCORK_OPT;
++			inet->cork.addr = ipc->addr;
++		}
++		dst_hold(&rt->u.dst);
++		inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
++		inet->cork.rt = rt;
++		inet->cork.length = 0;
++		inet->sndmsg_page = NULL;
++		inet->sndmsg_off = 0;
++		if ((exthdrlen = rt->u.dst.header_len) != 0) {
++			length += exthdrlen;
++			transhdrlen += exthdrlen;
++		}
++	} else {
++		rt = inet->cork.rt;
++		if (inet->cork.flags & IPCORK_OPT)
++			opt = inet->cork.opt;
++
++		transhdrlen = 0;
++		exthdrlen = 0;
++		mtu = inet->cork.fragsize;
++	}
++	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
++
++	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
++	maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
++
++	if (inet->cork.length + length > 0xFFFF - fragheaderlen) {
++		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu-exthdrlen);
++		return -EMSGSIZE;
++	}
++
++	/*
++	 * transhdrlen > 0 means that this is the first fragment and we wish
++	 * it won't be fragmented in the future.
++	 */
++	if (transhdrlen &&
++	    length + fragheaderlen <= maxfraglen &&
++	    rt->u.dst.dev->features&(NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM) &&
++	    !exthdrlen)
++		csummode = CHECKSUM_HW;
++
++	inet->cork.length += length;
++
++	/* So, what's going on in the loop below?
++	 *
++	 * We use calculated fragment length to generate chained skb,
++	 * each of segments is IP fragment ready for sending to network after
++	 * adding appropriate IP header.
++	 *
++	 * Mistake is:
++	 *
++	 *    If mtu-fragheaderlen is not 0 modulo 8, we generate additional
++	 *    small fragment of length (mtu-fragheaderlen)%8, even though
++	 *    it is not necessary. Not a big bug, but needs a fix.
++	 */
++
++	if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
++		goto alloc_new_skb;
++
++	while (length > 0) {
++		if ((copy = maxfraglen - skb->len) <= 0) {
++			char *data;
++			unsigned int datalen;
++			unsigned int fraglen;
++			unsigned int alloclen;
++			BUG_TRAP(copy == 0);
++
++alloc_new_skb:
++			datalen = maxfraglen - fragheaderlen;
++			if (datalen > length)
++				datalen = length;
++
++			fraglen = datalen + fragheaderlen;
++			if ((flags & MSG_MORE) && 
++			    !(rt->u.dst.dev->features&NETIF_F_SG))
++				alloclen = maxfraglen;
++			else
++				alloclen = datalen + fragheaderlen;
++
++			/* The last fragment gets additional space at tail.
++			 * Note, with MSG_MORE we overallocate on fragments,
++			 * because we have no idea what fragment will be
++			 * the last.
++			 */
++			if (datalen == length)
++				alloclen += rt->u.dst.trailer_len;
++
++			if (transhdrlen) {
++				skb = sock_alloc_send_skb(sk, 
++						alloclen + hh_len + 15,
++						(flags & MSG_DONTWAIT), &err);
++			} else {
++				skb = NULL;
++				if (atomic_read(&sk->wmem_alloc) <= 2*sk->sndbuf)
++					skb = sock_wmalloc(sk, 
++							   alloclen + hh_len + 15, 1,
++							   sk->allocation);
++				if (unlikely(skb == NULL))
++					err = -ENOBUFS;
++			}
++			if (skb == NULL)
++				goto error;
++
++			/*
++			 *	Fill in the control structures
++			 */
++			skb->ip_summed = csummode;
++			skb->csum = 0;
++			skb_reserve(skb, hh_len);
++
++			/*
++			 *	Find where to start putting bytes.
++			 */
++			data = skb_put(skb, fraglen);
++			skb->nh.raw = data + exthdrlen;
++			data += fragheaderlen;
++			skb->h.raw = data + exthdrlen;
++
++			copy = datalen - transhdrlen;
++			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
++				err = -EFAULT;
++				kfree_skb(skb);
++				goto error;
++			}
++
++			offset += copy;
++			length -= datalen;
++			transhdrlen = 0;
++			exthdrlen = 0;
++			csummode = CHECKSUM_NONE;
++
++			/*
++			 * Put the packet on the pending queue.
++			 */
++			__skb_queue_tail(&sk->write_queue, skb);
++			continue;
++		}
++
++		if (copy > length)
++			copy = length;
++
++		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
++			unsigned int off;
++
++			off = skb->len;
++			if (getfrag(from, skb_put(skb, copy), 
++					offset, copy, off, skb) < 0) {
++				__skb_trim(skb, off);
++				err = -EFAULT;
++				goto error;
++			}
++		} else {
++			int i = skb_shinfo(skb)->nr_frags;
++			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
++			struct page *page = inet->sndmsg_page;
++			int off = inet->sndmsg_off;
++			unsigned int left;
++
++			if (page && (left = PAGE_SIZE - off) > 0) {
++				if (copy >= left)
++					copy = left;
++				if (page != frag->page) {
++					if (i == MAX_SKB_FRAGS) {
++						err = -EMSGSIZE;
++						goto error;
++					}
++					get_page(page);
++	 				skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
++					frag = &skb_shinfo(skb)->frags[i];
++				}
++			} else if (i < MAX_SKB_FRAGS) {
++				if (copy > PAGE_SIZE)
++					copy = PAGE_SIZE;
++				page = alloc_pages(sk->allocation, 0);
++				if (page == NULL)  {
++					err = -ENOMEM;
++					goto error;
++				}
++				inet->sndmsg_page = page;
++				inet->sndmsg_off = 0;
++
++				skb_fill_page_desc(skb, i, page, 0, 0);
++				frag = &skb_shinfo(skb)->frags[i];
++				skb->truesize += PAGE_SIZE;
++				atomic_add(PAGE_SIZE, &sk->wmem_alloc);
++			} else {
++				err = -EMSGSIZE;
++				goto error;
++			}
++			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
++				err = -EFAULT;
++				goto error;
++			}
++			inet->sndmsg_off += copy;
++			frag->size += copy;
++			skb->len += copy;
++			skb->data_len += copy;
++		}
++		offset += copy;
++		length -= copy;
+ 	}
+ 
+-	dp->csum = csum_partial_copy_nocheck(iov->iov_base+offset, to, fraglen, 
+-					     dp->csum); 
++	return 0;
+ 
+-	if (hdrflag && dp->csumoffset)
+-		*(pktp + dp->csumoffset) = csum_fold(dp->csum); /* fill in checksum */
+-	return 0;	       
++error:
++	inet->cork.length -= length;
++	IP_INC_STATS(IpOutDiscards);
++	return err; 
++}
++
++ssize_t	ip_append_page(struct sock *sk, struct page *page,
++		       int offset, size_t size, int flags)
++{
++	struct inet_opt *inet = inet_sk(sk);
++	struct sk_buff *skb;
++	struct rtable *rt;
++	struct ip_options *opt = NULL;
++	int hh_len;
++	int mtu;
++	int len;
++	int err;
++	unsigned int maxfraglen, fragheaderlen;
++
++	if (inet->hdrincl)
++		return -EPERM;
++
++	if (flags&MSG_PROBE)
++		return 0;
++
++	if (skb_queue_empty(&sk->write_queue))
++		return -EINVAL;
++
++	rt = inet->cork.rt;
++	if (inet->cork.flags & IPCORK_OPT)
++		opt = inet->cork.opt;
++
++	if (!(rt->u.dst.dev->features&NETIF_F_SG))
++		return -EOPNOTSUPP;
++
++	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
++	mtu = inet->cork.fragsize;
++
++	fragheaderlen = sizeof(struct iphdr) + (opt ? opt->optlen : 0);
++	maxfraglen = ((mtu-fragheaderlen) & ~7) + fragheaderlen;
++
++	if (inet->cork.length + size > 0xFFFF - fragheaderlen) {
++		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport, mtu);
++		return -EMSGSIZE;
++	}
++
++	if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
++		return -EINVAL;
++
++	inet->cork.length += size;
++
++	while (size > 0) {
++		int i;
++		if ((len = maxfraglen - skb->len) <= 0) {
++			char *data;
++			struct iphdr *iph;
++			BUG_TRAP(len == 0);
++
++			skb = sock_wmalloc(sk, fragheaderlen + hh_len + 15, 1,
++					   sk->allocation);
++			if (unlikely(!skb)) {
++				err = -ENOBUFS;
++				goto error;
++			}
++
++			/*
++			 *	Fill in the control structures
++			 */
++			skb->ip_summed = CHECKSUM_NONE;
++			skb->csum = 0;
++			skb_reserve(skb, hh_len);
++
++			/*
++			 *	Find where to start putting bytes.
++			 */
++			data = skb_put(skb, fragheaderlen);
++			skb->nh.iph = iph = (struct iphdr *)data;
++			data += fragheaderlen;
++			skb->h.raw = data;
++
++			/*
++			 * Put the packet on the pending queue.
++			 */
++			__skb_queue_tail(&sk->write_queue, skb);
++			continue;
++		}
++
++		i = skb_shinfo(skb)->nr_frags;
++		if (len > size)
++			len = size;
++		if (skb_can_coalesce(skb, i, page, offset)) {
++			skb_shinfo(skb)->frags[i-1].size += len;
++		} else if (i < MAX_SKB_FRAGS) {
++			get_page(page);
++			skb_fill_page_desc(skb, i, page, offset, len);
++		} else {
++			err = -EMSGSIZE;
++			goto error;
++		}
++
++		if (skb->ip_summed == CHECKSUM_NONE) {
++			unsigned int csum;
++			csum = csum_page(page, offset, len);
++			skb->csum = csum_block_add(skb->csum, csum, skb->len);
++		}
++
++		skb->len += len;
++		skb->data_len += len;
++		offset += len;
++		size -= len;
++	}
++	return 0;
++
++error:
++	inet->cork.length -= size;
++	IP_INC_STATS(IpOutDiscards);
++	return err;
++}
++
++/*
++ *	Combined all pending IP fragments on the socket as one IP datagram
++ *	and push them out.
++ */
++int ip_push_pending_frames(struct sock *sk)
++{
++	struct sk_buff *skb, *tmp_skb;
++	struct sk_buff **tail_skb;
++	struct inet_opt *inet = inet_sk(sk);
++	struct ip_options *opt = NULL;
++	struct rtable *rt = inet->cork.rt;
++	struct iphdr *iph;
++	int df = 0;
++	__u8 ttl;
++	int err = 0;
++
++	if ((skb = __skb_dequeue(&sk->write_queue)) == NULL)
++		goto out;
++	tail_skb = &(skb_shinfo(skb)->frag_list);
++
++	/* move skb->data to ip header from ext header */
++	if (skb->data < skb->nh.raw)
++		__skb_pull(skb, skb->nh.raw - skb->data);
++	while ((tmp_skb = __skb_dequeue(&sk->write_queue)) != NULL) {
++		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
++		*tail_skb = tmp_skb;
++		tail_skb = &(tmp_skb->next);
++		skb->len += tmp_skb->len;
++		skb->data_len += tmp_skb->len;
++#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
++		skb->truesize += tmp_skb->truesize;
++		__sock_put(tmp_skb->sk);
++		tmp_skb->destructor = NULL;
++		tmp_skb->sk = NULL;
++#endif
++	}
++
++	/* Unless user demanded real pmtu discovery (IP_PMTUDISC_DO), we allow
++	 * to fragment the frame generated here. No matter, what transforms
++	 * how transforms change size of the packet, it will come out.
++	 */
++	if (inet->pmtudisc != IP_PMTUDISC_DO)
++		skb->local_df = 1;
++
++	/* DF bit is set when we want to see DF on outgoing frames.
++	 * If local_df is set too, we still allow to fragment this frame
++	 * locally. */
++	if (inet->pmtudisc == IP_PMTUDISC_DO ||
++	    (!skb_shinfo(skb)->frag_list && ip_dont_fragment(sk, &rt->u.dst)))
++		df = htons(IP_DF);
++
++	if (inet->cork.flags & IPCORK_OPT)
++		opt = inet->cork.opt;
++
++	if (rt->rt_type == RTN_MULTICAST)
++		ttl = inet->mc_ttl;
++	else
++		ttl = ip_select_ttl(inet, &rt->u.dst);
++
++	iph = (struct iphdr *)skb->data;
++	iph->version = 4;
++	iph->ihl = 5;
++	if (opt) {
++		iph->ihl += opt->optlen>>2;
++		ip_options_build(skb, opt, inet->cork.addr, rt, 0);
++	}
++	iph->tos = inet->tos;
++	iph->tot_len = htons(skb->len);
++	iph->frag_off = df;
++	if (!df) {
++		__ip_select_ident(iph, &rt->u.dst);
++	} else {
++		iph->id = htons(inet->id++);
++	}
++	iph->ttl = ttl;
++	iph->protocol = sk->protocol;
++	iph->saddr = rt->rt_src;
++	iph->daddr = rt->rt_dst;
++	ip_send_check(iph);
++
++	skb->priority = sk->priority;
++	skb->dst = dst_clone(&rt->u.dst);
++
++	/* Netfilter gets whole the not fragmented skb. */
++	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, 
++		      skb->dst->dev, dst_output);
++	if (err) {
++		if (err > 0)
++			err = inet->recverr ? net_xmit_errno(err) : 0;
++		if (err)
++			goto error;
++	}
++
++out:
++	inet->cork.flags &= ~IPCORK_OPT;
++	if (inet->cork.rt) {
++		ip_rt_put(inet->cork.rt);
++		inet->cork.rt = NULL;
++	}
++	return err;
++
++error:
++	IP_INC_STATS(IpOutDiscards);
++	goto out;
++}
++
++/*
++ *	Throw away all pending data on the socket.
++ */
++void ip_flush_pending_frames(struct sock *sk)
++{
++	struct inet_opt *inet = inet_sk(sk);
++	struct sk_buff *skb;
++
++	while ((skb = __skb_dequeue_tail(&sk->write_queue)) != NULL)
++		kfree_skb(skb);
++
++	inet->cork.flags &= ~IPCORK_OPT;
++	if (inet->cork.opt) {
++		kfree(inet->cork.opt);
++		inet->cork.opt = NULL;
++	}
++	if (inet->cork.rt) {
++		ip_rt_put(inet->cork.rt);
++		inet->cork.rt = NULL;
++	}
++}
++
++
++/*
++ *	Fetch data from kernel space and fill in checksum if needed.
++ */
++static int ip_reply_glue_bits(void *dptr, char *to, int offset, 
++			      int len, int odd, struct sk_buff *skb)
++{
++	unsigned int csum;
++
++	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
++	skb->csum = csum_block_add(skb->csum, csum, odd);
++	return 0;  
+ }
+ 
+ /* 
+@@ -961,6 +1231,8 @@
+  *
+  *	Should run single threaded per socket because it uses the sock 
+  *     	structure to pass arguments.
++ *
++ *	LATER: switch from ip_build_xmit to ip_append_*
+  */
+ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *arg,
+ 		   unsigned int len)
+@@ -986,8 +1258,19 @@
+ 			daddr = replyopts.opt.faddr;
+ 	}
+ 
+-	if (ip_route_output(&rt, daddr, rt->rt_spec_dst, RT_TOS(skb->nh.iph->tos), 0))
+-		return;
++	{
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = daddr,
++						.saddr = rt->rt_spec_dst,
++						.tos = RT_TOS(skb->nh.iph->tos) } },
++				    /* Not quite clean, but right. */
++				    .uli_u = { .ports =
++					       { .sport = skb->h.th->dest,
++					         .dport = skb->h.th->source } },
++				    .proto = sk->protocol };
++		if (ip_route_output_key(&rt, &fl))
++			return;
++	}
+ 
+ 	/* And let IP do all the hard work.
+ 
+@@ -999,7 +1282,15 @@
+ 	sk->protinfo.af_inet.tos = skb->nh.iph->tos;
+ 	sk->priority = skb->priority;
+ 	sk->protocol = skb->nh.iph->protocol;
+-	ip_build_xmit(sk, ip_reply_glue_bits, arg, len, &ipc, rt, MSG_DONTWAIT);
++	ip_append_data(sk, ip_reply_glue_bits, arg->iov->iov_base, len, 0,
++		       &ipc, rt, MSG_DONTWAIT);
++	if ((skb = skb_peek(&sk->write_queue)) != NULL) {
++		if (arg->csumoffset >= 0)
++			*((u16 *)skb->h.raw + arg->csumoffset) = csum_fold(csum_add(skb->csum, arg->csum));
++		skb->ip_summed = CHECKSUM_NONE;
++		ip_push_pending_frames(sk);
++	}
++
+ 	bh_unlock_sock(sk);
+ 
+ 	ip_rt_put(rt);
+diff -Nru a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
+--- a/net/ipv4/ip_sockglue.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ip_sockglue.c	2005-02-13 21:25:09 +11:00
+@@ -36,6 +36,7 @@
+ #include <linux/route.h>
+ #include <linux/mroute.h>
+ #include <net/route.h>
++#include <net/xfrm.h>
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ #include <net/transp_v6.h>
+ #endif
+@@ -377,6 +378,7 @@
+ 
+ int ip_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
+ {
++	struct inet_opt *inet = inet_sk(sk);
+ 	int val=0,err;
+ 
+ 	if (level != SOL_IP)
+@@ -428,8 +430,10 @@
+ 				    (!((1<<sk->state)&(TCPF_LISTEN|TCPF_CLOSE))
+ 				     && sk->daddr != LOOPBACK4_IPV6)) {
+ #endif
++					if (inet->opt)
++						tp->ext_header_len -= inet->opt->optlen;
+ 					if (opt)
+-						tp->ext_header_len = opt->optlen;
++						tp->ext_header_len += opt->optlen;
+ 					tcp_sync_mss(sk, tp->pmtu_cookie);
+ #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+ 				}
+@@ -489,11 +493,9 @@
+ 		case IP_TTL:
+ 			if (optlen<1)
+ 				goto e_inval;
+-			if(val==-1)
+-				val = sysctl_ip_default_ttl;
+-			if(val<1||val>255)
++			if (val != -1 && (val < 1 || val>255))
+ 				goto e_inval;
+-			sk->protinfo.af_inet.ttl=val;
++			sk->protinfo.af_inet.uc_ttl = val;
+ 			break;
+ 		case IP_HDRINCL:
+ 			if(sk->type!=SOCK_RAW) {
+@@ -837,6 +839,11 @@
+ 			sk->protinfo.af_inet.freebind = !!val; 
+ 	                break;			
+  
++		case IP_IPSEC_POLICY:
++		case IP_XFRM_POLICY:
++			err = xfrm_user_policy(sk, optname, optval, optlen);
++			break;
++
+ 		default:
+ #ifdef CONFIG_NETFILTER
+ 			err = nf_setsockopt(sk, PF_INET, optname, optval, 
+@@ -924,7 +931,9 @@
+ 			val=sk->protinfo.af_inet.tos;
+ 			break;
+ 		case IP_TTL:
+-			val=sk->protinfo.af_inet.ttl;
++			val = (sk->protinfo.af_inet.uc_ttl == -1 ?
++			       sysctl_ip_default_ttl :
++			       sk->protinfo.af_inet.uc_ttl);
+ 			break;
+ 		case IP_HDRINCL:
+ 			val=sk->protinfo.af_inet.hdrincl;
+@@ -938,7 +947,7 @@
+ 			val = 0;
+ 			dst = sk_dst_get(sk);
+ 			if (dst) {
+-				val = dst->pmtu;
++				val = dst_pmtu(dst) - dst->header_len;
+ 				dst_release(dst);
+ 			}
+ 			if (!val) {
+diff -Nru a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/ipcomp.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,378 @@
++/*
++ * IP Payload Compression Protocol (IPComp) - RFC3173.
++ *
++ * Copyright (c) 2003 James Morris <jmorris at intercode.com.au>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the Free
++ * Software Foundation; either version 2 of the License, or (at your option) 
++ * any later version.
++ *
++ * Todo:
++ *   - Tunable compression parameters.
++ *   - Compression stats.
++ *   - Adaptive compression.
++ */
++#include <linux/config.h>
++#include <linux/module.h>
++#include <asm/scatterlist.h>
++#include <linux/crypto.h>
++#include <linux/pfkeyv2.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/icmp.h>
++#include <net/ipcomp.h>
++
++static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb)
++{
++	int err, plen, dlen;
++	struct iphdr *iph;
++	struct ipcomp_data *ipcd = x->data;
++	u8 *start, *scratch = ipcd->scratch;
++	
++	plen = skb->len;
++	dlen = IPCOMP_SCRATCH_SIZE;
++	start = skb->data;
++
++	err = crypto_comp_decompress(ipcd->tfm, start, plen, scratch, &dlen);
++	if (err)
++		goto out;
++
++	if (dlen < (plen + sizeof(struct ip_comp_hdr))) {
++		err = -EINVAL;
++		goto out;
++	}
++
++	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
++	if (err)
++		goto out;
++		
++	skb_put(skb, dlen - plen);
++	memcpy(skb->data, scratch, dlen);
++	iph = skb->nh.iph;
++	iph->tot_len = htons(dlen + iph->ihl * 4);
++out:	
++	return err;
++}
++
++static int ipcomp_input(struct xfrm_state *x,
++                        struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	u8 nexthdr;
++	int err = 0;
++	struct iphdr *iph;
++	union {
++		struct iphdr	iph;
++		char 		buf[60];
++	} tmp_iph;
++
++
++	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
++	    skb_linearize(skb, GFP_ATOMIC) != 0) {
++	    	err = -ENOMEM;
++	    	goto out;
++	}
++
++	skb->ip_summed = CHECKSUM_NONE;
++
++	/* Remove ipcomp header and decompress original payload */	
++	iph = skb->nh.iph;
++	memcpy(&tmp_iph, iph, iph->ihl * 4);
++	nexthdr = *(u8 *)skb->data;
++	skb_pull(skb, sizeof(struct ip_comp_hdr));
++	skb->nh.raw += sizeof(struct ip_comp_hdr);
++	memcpy(skb->nh.raw, &tmp_iph, tmp_iph.iph.ihl * 4);
++	iph = skb->nh.iph;
++	iph->tot_len = htons(ntohs(iph->tot_len) - sizeof(struct ip_comp_hdr));
++	iph->protocol = nexthdr;
++	skb->h.raw = skb->data;
++	err = ipcomp_decompress(x, skb);
++
++out:	
++	return err;
++}
++
++static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb)
++{
++	int err, plen, dlen, ihlen;
++	struct iphdr *iph = skb->nh.iph;
++	struct ipcomp_data *ipcd = x->data;
++	u8 *start, *scratch = ipcd->scratch;
++	
++	ihlen = iph->ihl * 4;
++	plen = skb->len - ihlen;
++	dlen = IPCOMP_SCRATCH_SIZE;
++	start = skb->data + ihlen;
++
++	err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen);
++	if (err)
++		goto out;
++
++	if ((dlen + sizeof(struct ip_comp_hdr)) >= plen) {
++		err = -EMSGSIZE;
++		goto out;
++	}
++	
++	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
++	pskb_trim(skb, ihlen + dlen + sizeof(struct ip_comp_hdr));
++	
++out:	
++	return err;
++}
++
++static int ipcomp_output(struct sk_buff *skb)
++{
++	int err;
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x = dst->xfrm;
++	struct iphdr *iph;
++	struct ip_comp_hdr *ipch;
++	struct ipcomp_data *ipcd = x->data;
++	int hdr_len = 0;
++
++	iph = skb->nh.iph;
++	iph->tot_len = htons(skb->len);
++	hdr_len = iph->ihl * 4;
++	if ((skb->len - hdr_len) < ipcd->threshold) {
++		/* Don't bother compressing */
++		if (x->props.mode) {
++			ip_send_check(iph);
++		}
++		goto out_ok;
++	}
++
++	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
++	    skb_linearize(skb, GFP_ATOMIC) != 0) {
++	    	err = -ENOMEM;
++	    	goto error;
++	}
++	
++	err = ipcomp_compress(x, skb);
++	if (err) {
++		if (err == -EMSGSIZE) {
++			if (x->props.mode) {
++				iph = skb->nh.iph;
++				ip_send_check(iph);
++			}
++			goto out_ok;
++		}
++		goto error;
++	}
++
++	/* Install ipcomp header, convert into ipcomp datagram. */
++	iph = skb->nh.iph;
++	iph->tot_len = htons(skb->len);
++	ipch = (struct ip_comp_hdr *)((char *)iph + iph->ihl * 4);
++	ipch->nexthdr = iph->protocol;
++	ipch->flags = 0;
++	ipch->cpi = htons((u16 )ntohl(x->id.spi));
++	iph->protocol = IPPROTO_COMP;
++	ip_send_check(iph);
++
++out_ok:
++	err = 0;
++
++error:
++	return err;
++}
++
++static void ipcomp4_err(struct sk_buff *skb, u32 info)
++{
++	u32 spi;
++	struct iphdr *iph = (struct iphdr *)skb->data;
++	struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2));
++	struct xfrm_state *x;
++
++	if (skb->h.icmph->type != ICMP_DEST_UNREACH ||
++	    skb->h.icmph->code != ICMP_FRAG_NEEDED)
++		return;
++
++	spi = ntohl(ntohs(ipch->cpi));
++	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr,
++	                      spi, IPPROTO_COMP, AF_INET);
++	if (!x)
++		return;
++	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/%u.%u.%u.%u\n",
++	       spi, NIPQUAD(iph->daddr));
++	xfrm_state_put(x);
++}
++
++/* We always hold one tunnel user reference to indicate a tunnel */ 
++static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
++{
++	struct xfrm_state *t;
++	
++	t = xfrm_state_alloc();
++	if (t == NULL)
++		goto out;
++
++	t->id.proto = IPPROTO_IPIP;
++	t->id.spi = x->props.saddr.a4;
++	t->id.daddr.a4 = x->id.daddr.a4;
++	memcpy(&t->sel, &x->sel, sizeof(t->sel));
++	t->props.family = AF_INET;
++	t->props.mode = 1;
++	t->props.saddr.a4 = x->props.saddr.a4;
++	t->props.flags = x->props.flags;
++	
++	t->type = xfrm_get_type(IPPROTO_IPIP, t->props.family);
++	if (t->type == NULL)
++		goto error;
++		
++	if (t->type->init_state(t, NULL))
++		goto error;
++
++	t->km.state = XFRM_STATE_VALID;
++	atomic_set(&t->tunnel_users, 1);
++out:
++	return t;
++
++error:
++	t->km.state = XFRM_STATE_DEAD;
++	xfrm_state_put(t);
++	t = NULL;
++	goto out;
++}
++
++/*
++ * Must be protected by xfrm_cfg_sem.  State and tunnel user references are
++ * always incremented on success.
++ */
++static int ipcomp_tunnel_attach(struct xfrm_state *x)
++{
++	int err = 0;
++	struct xfrm_state *t;
++
++	t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr.a4,
++	                      x->props.saddr.a4, IPPROTO_IPIP, AF_INET);
++	if (!t) {
++		t = ipcomp_tunnel_create(x);
++		if (!t) {
++			err = -EINVAL;
++			goto out;
++		}
++		xfrm_state_insert(t);
++		xfrm_state_hold(t);
++	}
++	x->tunnel = t;
++	atomic_inc(&t->tunnel_users);
++out:
++	return err;
++}
++
++static void ipcomp_free_data(struct ipcomp_data *ipcd)
++{
++	if (ipcd->tfm)
++		crypto_free_tfm(ipcd->tfm);
++	if (ipcd->scratch)
++		kfree(ipcd->scratch);	
++}
++
++static void ipcomp_destroy(struct xfrm_state *x)
++{
++	struct ipcomp_data *ipcd = x->data;
++	if (!ipcd)
++		return;
++	xfrm_state_delete_tunnel(x);
++	ipcomp_free_data(ipcd);
++	kfree(ipcd);
++}
++
++static int ipcomp_init_state(struct xfrm_state *x, void *args)
++{
++	int err;
++	struct ipcomp_data *ipcd;
++	struct xfrm_algo_desc *calg_desc;
++
++	err = -EINVAL;
++	if (!x->calg)
++		goto out;
++
++	if (x->encap)
++		goto out;
++
++	err = -ENOMEM;
++	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
++	if (!ipcd)
++		goto error;
++
++	memset(ipcd, 0, sizeof(*ipcd));
++	x->props.header_len = 0;
++	if (x->props.mode)
++		x->props.header_len += sizeof(struct iphdr);
++
++	ipcd->scratch = kmalloc(IPCOMP_SCRATCH_SIZE, GFP_KERNEL);
++	if (!ipcd->scratch)
++		goto error;
++	
++	ipcd->tfm = crypto_alloc_tfm(x->calg->alg_name, 0);
++	if (!ipcd->tfm)
++		goto error;
++
++	if (x->props.mode) {
++		err = ipcomp_tunnel_attach(x);
++		if (err)
++			goto error;
++	}
++
++	calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
++	BUG_ON(!calg_desc);
++	ipcd->threshold = calg_desc->uinfo.comp.threshold;
++	x->data = ipcd;
++	err = 0;
++out:
++	return err;
++
++error:
++	if (ipcd) {
++		ipcomp_free_data(ipcd);
++		kfree(ipcd);
++	}
++	goto out;
++}
++
++static struct xfrm_type ipcomp_type = {
++	.description	= "IPCOMP4",
++	.owner		= THIS_MODULE,
++	.proto	     	= IPPROTO_COMP,
++	.init_state	= ipcomp_init_state,
++	.destructor	= ipcomp_destroy,
++	.input		= ipcomp_input,
++	.output		= ipcomp_output
++};
++
++static struct inet_protocol ipcomp4_protocol = {
++	.handler	=	xfrm4_rcv,
++	.err_handler	=	ipcomp4_err,
++	.no_policy	=	1,
++};
++
++static int __init ipcomp4_init(void)
++{
++	if (xfrm_register_type(&ipcomp_type, AF_INET) < 0) {
++		printk(KERN_INFO "ipcomp init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++	if (inet_add_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0) {
++		printk(KERN_INFO "ipcomp init: can't add protocol\n");
++		xfrm_unregister_type(&ipcomp_type, AF_INET);
++		return -EAGAIN;
++	}
++	return 0;
++}
++
++static void __exit ipcomp4_fini(void)
++{
++	if (inet_del_protocol(&ipcomp4_protocol, IPPROTO_COMP) < 0)
++		printk(KERN_INFO "ip ipcomp close: can't remove protocol\n");
++	if (xfrm_unregister_type(&ipcomp_type, AF_INET) < 0)
++		printk(KERN_INFO "ip ipcomp close: can't remove xfrm type\n");
++}
++
++module_init(ipcomp4_init);
++module_exit(ipcomp4_fini);
++
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) - RFC3173");
++MODULE_AUTHOR("James Morris <jmorris at intercode.com.au>");
++
+diff -Nru a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c
+--- a/net/ipv4/ipconfig.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ipconfig.c	2005-02-13 21:25:09 +11:00
+@@ -655,7 +655,7 @@
+ 	struct net_device *dev = d->dev;
+ 	struct sk_buff *skb;
+ 	struct bootp_pkt *b;
+-	int hh_len = (dev->hard_header_len + 15) & ~15;
++	int hh_len = LL_RESERVED_SPACE(dev);
+ 	struct iphdr *h;
+ 
+ 	/* Allocate packet */
+diff -Nru a/net/ipv4/ipip.c b/net/ipv4/ipip.c
+--- a/net/ipv4/ipip.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv4/ipip.c	2005-02-13 21:25:10 +11:00
+@@ -115,6 +115,7 @@
+ #include <net/protocol.h>
+ #include <net/ipip.h>
+ #include <net/inet_ecn.h>
++#include <net/xfrm.h>
+ 
+ #define HASH_SIZE  16
+ #define HASH(addr) ((addr^(addr>>4))&0xF)
+@@ -207,7 +208,7 @@
+ 	write_unlock_bh(&ipip_lock);
+ }
+ 
+-struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
++static struct ip_tunnel * ipip_tunnel_locate(struct ip_tunnel_parm *parms, int create)
+ {
+ 	u32 remote = parms->iph.daddr;
+ 	u32 local = parms->iph.saddr;
+@@ -289,7 +290,7 @@
+ 	dev_put(dev);
+ }
+ 
+-void ipip_err(struct sk_buff *skb, u32 info)
++static void ipip_err(struct sk_buff *skb, void *__unused)
+ {
+ #ifndef I_WISH_WORLD_WERE_PERFECT
+ 
+@@ -355,6 +356,7 @@
+ 	int rel_code = 0;
+ 	int rel_info = 0;
+ 	struct sk_buff *skb2;
++	struct flowi fl;
+ 	struct rtable *rt;
+ 
+ 	if (len < hlen + sizeof(struct iphdr))
+@@ -417,7 +419,11 @@
+ 	skb2->nh.raw = skb2->data;
+ 
+ 	/* Try to guess incoming interface */
+-	if (ip_route_output(&rt, eiph->saddr, 0, RT_TOS(eiph->tos), 0)) {
++	memset(&fl, 0, sizeof(fl));
++	fl.fl4_daddr = eiph->saddr;
++	fl.fl4_tos = RT_TOS(eiph->tos);
++	fl.proto = IPPROTO_IPIP;
++	if (ip_route_output_key(&rt, &key)) {
+ 		kfree_skb(skb2);
+ 		return;
+ 	}
+@@ -427,8 +433,11 @@
+ 	if (rt->rt_flags&RTCF_LOCAL) {
+ 		ip_rt_put(rt);
+ 		rt = NULL;
+-		if (ip_route_output(&rt, eiph->daddr, eiph->saddr, eiph->tos, 0) ||
+-		    rt->u.dst.dev->type != ARPHRD_IPGRE) {
++		fl.fl4_daddr = eiph->daddr;
++		fl.fl4_src = eiph->saddr;
++		fl.fl4_tos = eiph->tos;
++		if (ip_route_output_key(&rt, &fl) ||
++		    rt->u.dst.dev->type != ARPHRD_TUNNEL) {
+ 			ip_rt_put(rt);
+ 			kfree_skb(skb2);
+ 			return;
+@@ -436,7 +445,7 @@
+ 	} else {
+ 		ip_rt_put(rt);
+ 		if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) ||
+-		    skb2->dst->dev->type != ARPHRD_IPGRE) {
++		    skb2->dst->dev->type != ARPHRD_TUNNEL) {
+ 			kfree_skb(skb2);
+ 			return;
+ 		}
+@@ -444,11 +453,11 @@
+ 
+ 	/* change mtu on this route */
+ 	if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) {
+-		if (rel_info > skb2->dst->pmtu) {
++		if (rel_info > dst_pmtu(skb2->dst)) {
+ 			kfree_skb(skb2);
+ 			return;
+ 		}
+-		skb2->dst->pmtu = rel_info;
++		skb2->dst->ops->update_pmtu(skb2->dst, rel_info);
+ 		rel_info = htonl(rel_info);
+ 	} else if (type == ICMP_TIME_EXCEEDED) {
+ 		struct ip_tunnel *t = (struct ip_tunnel*)skb2->dev->priv;
+@@ -473,7 +482,7 @@
+ 		IP_ECN_set_ce(inner_iph);
+ }
+ 
+-int ipip_rcv(struct sk_buff *skb)
++static int ipip_rcv(struct sk_buff *skb)
+ {
+ 	struct iphdr *iph;
+ 	struct ip_tunnel *tunnel;
+@@ -482,14 +491,23 @@
+ 		goto out;
+ 
+ 	iph = skb->nh.iph;
+-	skb->mac.raw = skb->nh.raw;
+-	skb->nh.raw = skb->data;
+-	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+-	skb->protocol = htons(ETH_P_IP);
+-	skb->pkt_type = PACKET_HOST;
+ 
+ 	read_lock(&ipip_lock);
+ 	if ((tunnel = ipip_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
++		if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) {
++			read_unlock(&ipip_lock);
++			kfree_skb(skb);
++			return 0;
++		}
++
++		secpath_reset(skb);
++
++		skb->mac.raw = skb->nh.raw;
++		skb->nh.raw = skb->data;
++		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
++		skb->protocol = htons(ETH_P_IP);
++		skb->pkt_type = PACKET_HOST;
++
+ 		tunnel->stat.rx_packets++;
+ 		tunnel->stat.rx_bytes += skb->len;
+ 		skb->dev = tunnel->dev;
+@@ -503,16 +521,8 @@
+ 	}
+ 	read_unlock(&ipip_lock);
+ 
+-	icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, 0);
+ out:
+-	kfree_skb(skb);
+-	return 0;
+-}
+-
+-/* Need this wrapper because NF_HOOK takes the function address */
+-static inline int do_ip_send(struct sk_buff *skb)
+-{
+-	return ip_send(skb);
++	return -1;
+ }
+ 
+ /*
+@@ -556,9 +566,17 @@
+ 			goto tx_error_icmp;
+ 	}
+ 
+-	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+-		tunnel->stat.tx_carrier_errors++;
+-		goto tx_error_icmp;
++	{
++		struct flowi fl = { .oif = tunnel->parms.link,
++				    .nl_u = { .ip4_u =
++					      { .daddr = dst,
++						.saddr = tiph->saddr,
++						.tos = RT_TOS(tos) } },
++				    .proto = IPPROTO_IPIP };
++		if (ip_route_output_key(&rt, &fl)) {
++			tunnel->stat.tx_carrier_errors++;
++			goto tx_error_icmp;
++		}
+ 	}
+ 	tdev = rt->u.dst.dev;
+ 
+@@ -569,17 +587,17 @@
+ 	}
+ 
+ 	if (tiph->frag_off)
+-		mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
++		mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
+ 	else
+-		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
++		mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
+ 
+ 	if (mtu < 68) {
+ 		tunnel->stat.collisions++;
+ 		ip_rt_put(rt);
+ 		goto tx_error;
+ 	}
+-	if (skb->dst && mtu < skb->dst->pmtu)
+-		skb->dst->pmtu = mtu;
++	if (skb->dst)
++		skb->dst->ops->update_pmtu(skb->dst, mtu);
+ 
+ 	df |= (old_iph->frag_off&htons(IP_DF));
+ 
+@@ -600,7 +618,7 @@
+ 	/*
+ 	 * Okay, now see if we can stuff it in the buffer as-is.
+ 	 */
+-	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
++	max_headroom = (LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr));
+ 
+ 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+ 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+@@ -812,8 +830,14 @@
+ 	ipip_tunnel_init_gen(dev);
+ 
+ 	if (iph->daddr) {
++		struct flowi fl = { .oif = tunnel->parms.link,
++				    .nl_u = { .ip4_u =
++					      { .daddr = iph->daddr,
++						.saddr = iph->saddr,
++						.tos = RT_TOS(iph->tos) } },
++				    .proto = IPPROTO_IPIP };
+ 		struct rtable *rt;
+-		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
++		if (!ip_route_output_key(&rt, &fl)) {
+ 			tdev = rt->u.dst.dev;
+ 			ip_rt_put(rt);
+ 		}
+@@ -846,7 +870,7 @@
+ }
+ #endif
+ 
+-int __init ipip_fb_tunnel_init(struct net_device *dev)
++static int __init ipip_fb_tunnel_init(struct net_device *dev)
+ {
+ 	struct iphdr *iph;
+ 
+@@ -866,11 +890,9 @@
+ 	return 0;
+ }
+ 
+-static struct inet_protocol ipip_protocol = {
+-	handler:	ipip_rcv,
+-	err_handler:	ipip_err,
+-	protocol:	IPPROTO_IPIP,
+-	name:		"IPIP"
++static struct xfrm_tunnel ipip_handler = {
++	.handler	=	ipip_rcv,
++	.err_handler	=	ipip_err,
+ };
+ 
+ static char banner[] __initdata =
+@@ -880,16 +902,20 @@
+ {
+ 	printk(banner);
+ 
++	if (xfrm4_tunnel_register(&ipip_handler) < 0) {
++		printk(KERN_INFO "ipip init: can't register tunnel\n");
++		return -EAGAIN;
++	}
++
+ 	ipip_fb_tunnel_dev.priv = (void*)&ipip_fb_tunnel;
+ 	register_netdev(&ipip_fb_tunnel_dev);
+-	inet_add_protocol(&ipip_protocol);
+ 	return 0;
+ }
+ 
+ static void __exit ipip_fini(void)
+ {
+-	if ( inet_del_protocol(&ipip_protocol) < 0 )
+-		printk(KERN_INFO "ipip close: can't remove protocol\n");
++	if (xfrm4_tunnel_deregister(&ipip_handler) < 0)
++		printk(KERN_INFO "ipip close: can't deregister tunnel\n");
+ 
+ 	unregister_netdev(&ipip_fb_tunnel_dev);
+ }
+diff -Nru a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
+--- a/net/ipv4/ipmr.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ipmr.c	2005-02-13 21:25:09 +11:00
+@@ -108,7 +108,7 @@
+ static int ipmr_cache_report(struct sk_buff *pkt, vifi_t vifi, int assert);
+ static int ipmr_fill_mroute(struct sk_buff *skb, struct mfc_cache *c, struct rtmsg *rtm);
+ 
+-extern struct inet_protocol pim_protocol;
++static struct inet_protocol pim_protocol;
+ 
+ static struct timer_list ipmr_expire_timer;
+ 
+@@ -928,23 +928,28 @@
+ #ifdef CONFIG_IP_PIMSM
+ 		case MRT_PIM:
+ 		{
+-			int v;
++			int v, ret;
+ 			if(get_user(v,(int *)optval))
+ 				return -EFAULT;
+ 			v = (v)?1:0;
+ 			rtnl_lock();
++			ret = 0;
+ 			if (v != mroute_do_pim) {
+ 				mroute_do_pim = v;
+ 				mroute_do_assert = v;
+ #ifdef CONFIG_IP_PIMSM_V2
+ 				if (mroute_do_pim)
+-					inet_add_protocol(&pim_protocol);
++					ret = inet_add_protocol(&pim_protocol,
++								IPPROTO_PIM);
+ 				else
+-					inet_del_protocol(&pim_protocol);
++					ret = inet_del_protocol(&pim_protocol,
++								IPPROTO_PIM);
++				if (ret < 0)
++					ret = -EAGAIN;
+ #endif
+ 			}
+ 			rtnl_unlock();
+-			return 0;
++			return ret;
+ 		}
+ #endif
+ 		/*
+@@ -1102,16 +1107,14 @@
+ 
+ static inline int ipmr_forward_finish(struct sk_buff *skb)
+ {
+-	struct ip_options *opt = &(IPCB(skb)->opt);
+-	struct dst_entry *dst = skb->dst;
++	struct ip_options * opt	= &(IPCB(skb)->opt);
++
++	IP_INC_STATS_BH(IpForwDatagrams);
+ 
+ 	if (unlikely(opt->optlen))
+ 		ip_forward_options(skb);
+ 
+-	if (skb->len <= dst->pmtu)
+-		return dst->output(skb);
+-	else
+-		return ip_fragment(skb, dst->output);
++	return dst_output(skb);
+ }
+ 
+ /*
+@@ -1143,17 +1146,28 @@
+ #endif
+ 
+ 	if (vif->flags&VIFF_TUNNEL) {
+-		if (ip_route_output(&rt, vif->remote, vif->local, RT_TOS(iph->tos), vif->link))
++		struct flowi fl = { .oif = vif->link,
++				    .nl_u = { .ip4_u =
++					      { .daddr = vif->remote,
++						.saddr = vif->local,
++						.tos = RT_TOS(iph->tos) } },
++				    .proto = IPPROTO_IPIP };
++		if (ip_route_output_key(&rt, &fl))
+ 			return;
+ 		encap = sizeof(struct iphdr);
+ 	} else {
+-		if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(iph->tos), vif->link))
++		struct flowi fl = { .oif = vif->link,
++				    .nl_u = { .ip4_u =
++					      { .daddr = iph->daddr,
++						.tos = RT_TOS(iph->tos) } },
++				    .proto = IPPROTO_IPIP };
++		if (ip_route_output_key(&rt, &fl))
+ 			return;
+ 	}
+ 
+ 	dev = rt->u.dst.dev;
+ 
+-	if (skb->len+encap > rt->u.dst.pmtu && (ntohs(iph->frag_off) & IP_DF)) {
++	if (skb->len+encap > dst_pmtu(&rt->u.dst) && (ntohs(iph->frag_off) & IP_DF)) {
+ 		/* Do not fragment multicasts. Alas, IPv4 does not
+ 		   allow to send ICMP, so that packets will disappear
+ 		   to blackhole.
+@@ -1164,7 +1178,7 @@
+ 		return;
+ 	}
+ 
+-	encap += dev->hard_header_len;
++	encap += LL_RESERVED_SPACE(dev);
+ 
+ 	if (skb_headroom(skb) < encap || skb_cloned(skb) || !last)
+ 		skb2 = skb_realloc_headroom(skb, (encap + 15)&~15);
+@@ -1241,7 +1255,7 @@
+ 	if (vif_table[vif].dev != skb->dev) {
+ 		int true_vifi;
+ 
+-		if (((struct rtable*)skb->dst)->key.iif == 0) {
++		if (((struct rtable*)skb->dst)->fl.iif == 0) {
+ 			/* It is our own packet, looped back.
+ 			   Very complicated situation...
+ 
+@@ -1391,19 +1405,15 @@
+ 	struct net_device  *reg_dev = NULL;
+ 
+ 	if (skb_is_nonlinear(skb)) {
+-		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
+-			kfree_skb(skb);
+-			return -ENOMEM;
+-		}
++		if (skb_linearize(skb, GFP_ATOMIC) != 0) 
++			goto drop;
+ 		pim = (struct igmphdr*)skb->h.raw;
+ 	}
+ 
+         if (!mroute_do_pim ||
+ 	    skb->len < sizeof(*pim) + sizeof(*encap) ||
+-	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) {
+-		kfree_skb(skb);
+-                return -EINVAL;
+-        }
++	    pim->group != PIM_V1_VERSION || pim->code != PIM_V1_REGISTER) 
++		goto drop;
+ 
+ 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct igmphdr));
+ 	/*
+@@ -1413,11 +1423,9 @@
+ 	   c. packet is not truncated
+ 	 */
+ 	if (!MULTICAST(encap->daddr) ||
+-	    ntohs(encap->tot_len) == 0 ||
+-	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
+-		kfree_skb(skb);
+-		return -EINVAL;
+-	}
++	    encap->tot_len == 0 ||
++	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
++		goto drop;
+ 
+ 	read_lock(&mrt_lock);
+ 	if (reg_vif_num >= 0)
+@@ -1426,10 +1434,8 @@
+ 		dev_hold(reg_dev);
+ 	read_unlock(&mrt_lock);
+ 
+-	if (reg_dev == NULL) {
+-		kfree_skb(skb);
+-		return -EINVAL;
+-	}
++	if (reg_dev == NULL) 
++		goto drop;
+ 
+ 	skb->mac.raw = skb->nh.raw;
+ 	skb_pull(skb, (u8*)encap - skb->data);
+@@ -1447,6 +1453,9 @@
+ 	netif_rx(skb);
+ 	dev_put(reg_dev);
+ 	return 0;
++ drop:
++	kfree_skb(skb);
++	return 0;
+ }
+ #endif
+ 
+@@ -1458,10 +1467,8 @@
+ 	struct net_device  *reg_dev = NULL;
+ 
+ 	if (skb_is_nonlinear(skb)) {
+-		if (skb_linearize(skb, GFP_ATOMIC) != 0) {
+-			kfree_skb(skb);
+-			return -ENOMEM;
+-		}
++		if (skb_linearize(skb, GFP_ATOMIC) != 0) 
++			goto drop;
+ 		pim = (struct pimreghdr*)skb->h.raw;
+ 	}
+ 
+@@ -1469,19 +1476,15 @@
+ 	    pim->type != ((PIM_VERSION<<4)|(PIM_REGISTER)) ||
+ 	    (pim->flags&PIM_NULL_REGISTER) ||
+ 	    (ip_compute_csum((void *)pim, sizeof(*pim)) != 0 &&
+-	     ip_compute_csum((void *)pim, skb->len))) {
+-		kfree_skb(skb);
+-                return -EINVAL;
+-        }
++	     ip_compute_csum((void *)pim, skb->len))) 
++		goto drop;
+ 
+ 	/* check if the inner packet is destined to mcast group */
+ 	encap = (struct iphdr*)(skb->h.raw + sizeof(struct pimreghdr));
+ 	if (!MULTICAST(encap->daddr) ||
+-	    ntohs(encap->tot_len) == 0 ||
+-	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) {
+-		kfree_skb(skb);
+-		return -EINVAL;
+-	}
++	    encap->tot_len == 0 ||
++	    ntohs(encap->tot_len) + sizeof(*pim) > skb->len) 
++		goto drop;
+ 
+ 	read_lock(&mrt_lock);
+ 	if (reg_vif_num >= 0)
+@@ -1490,10 +1493,8 @@
+ 		dev_hold(reg_dev);
+ 	read_unlock(&mrt_lock);
+ 
+-	if (reg_dev == NULL) {
+-		kfree_skb(skb);
+-		return -EINVAL;
+-	}
++	if (reg_dev == NULL) 
++		goto drop;
+ 
+ 	skb->mac.raw = skb->nh.raw;
+ 	skb_pull(skb, (u8*)encap - skb->data);
+@@ -1511,6 +1512,9 @@
+ 	netif_rx(skb);
+ 	dev_put(reg_dev);
+ 	return 0;
++ drop:
++	kfree_skb(skb);
++	return 0;
+ }
+ #endif
+ 
+@@ -1723,15 +1727,8 @@
+ #endif	
+ 
+ #ifdef CONFIG_IP_PIMSM_V2
+-struct inet_protocol pim_protocol = 
+-{
+-	pim_rcv,		/* PIM handler		*/
+-	NULL,			/* PIM error control	*/
+-	NULL,			/* next			*/
+-	IPPROTO_PIM,		/* protocol ID		*/
+-	0,			/* copy			*/
+-	NULL,			/* data			*/
+-	"PIM"			/* name			*/
++static struct inet_protocol pim_protocol = {
++	.handler	=	pim_rcv,
+ };
+ #endif
+ 
+diff -Nru a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c
+--- a/net/ipv4/ipvs/ip_vs_conn.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ipvs/ip_vs_conn.c	2005-02-13 21:25:09 +11:00
+@@ -606,17 +606,25 @@
+ 	struct iphdr  *iph = skb->nh.iph;
+ 	u8     tos = iph->tos;
+ 	int    mtu;
++	struct flowi fl = {
++		.oif = 0,
++		.nl_u = {
++			.ip4_u = {
++				.daddr = iph->daddr,
++				.saddr = 0,
++				.tos = RT_TOS(tos), } },
++	};
+ 
+ 	EnterFunction(10);
+ 
+-	if (ip_route_output(&rt, iph->daddr, 0, RT_TOS(tos), 0)) {
++	if (ip_route_output_key(&rt, &fl)) {
+ 		IP_VS_DBG_RL("ip_vs_bypass_xmit(): ip_route_output error, "
+ 			     "dest: %u.%u.%u.%u\n", NIPQUAD(iph->daddr));
+ 		goto tx_error_icmp;
+ 	}
+ 
+ 	/* MTU checking */
+-	mtu = rt->u.dst.pmtu;
++	mtu = dst_pmtu(&rt->u.dst);
+ 	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ 		ip_rt_put(rt);
+ 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+@@ -642,8 +650,7 @@
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+ #endif /* CONFIG_NETFILTER_DEBUG */
+-	skb->nfcache |= NFC_IPVS_PROPERTY;
+-	ip_send(skb);
++	IP_VS_XMIT(skb, rt);
+ 
+ 	LeaveFunction(10);
+ 	return NF_STOLEN;
+@@ -742,7 +749,7 @@
+ 		goto tx_error_icmp;
+ 
+ 	/* MTU checking */
+-	mtu = rt->u.dst.pmtu;
++	mtu = dst_pmtu(&rt->u.dst);
+ 	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ 		ip_rt_put(rt);
+ 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+@@ -814,8 +821,7 @@
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+ #endif /* CONFIG_NETFILTER_DEBUG */
+-	skb->nfcache |= NFC_IPVS_PROPERTY;
+-	ip_send(skb);
++	IP_VS_XMIT(skb, rt);
+ 
+ 	LeaveFunction(10);
+ 	return NF_STOLEN;
+@@ -870,14 +876,14 @@
+ 
+ 	tdev = rt->u.dst.dev;
+ 
+-	mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
++	mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
+ 	if (mtu < 68) {
+ 		ip_rt_put(rt);
+ 		IP_VS_DBG_RL("ip_vs_tunnel_xmit(): mtu less than 68\n");
+ 		goto tx_error;
+ 	}
+-	if (skb->dst && mtu < skb->dst->pmtu)
+-		skb->dst->pmtu = mtu;
++	if (skb->dst)
++		skb->dst->ops->update_pmtu(skb->dst, mtu);
+ 
+ 	df |= (old_iph->frag_off&__constant_htons(IP_DF));
+ 
+@@ -939,8 +945,7 @@
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+ #endif /* CONFIG_NETFILTER_DEBUG */
+-	skb->nfcache |= NFC_IPVS_PROPERTY;
+-	ip_send(skb);
++	IP_VS_XMIT(skb, rt);
+ 
+ 	LeaveFunction(10);
+ 
+@@ -969,7 +974,7 @@
+ 		goto tx_error_icmp;
+ 
+ 	/* MTU checking */
+-	mtu = rt->u.dst.pmtu;
++	mtu = dst_pmtu(&rt->u.dst);
+ 	if ((iph->frag_off&__constant_htons(IP_DF)) && skb->len > mtu) {
+ 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+ 		ip_rt_put(rt);
+@@ -995,8 +1000,7 @@
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+ #endif /* CONFIG_NETFILTER_DEBUG */
+-	skb->nfcache |= NFC_IPVS_PROPERTY;
+-	ip_send(skb);
++	IP_VS_XMIT(skb, rt);
+ 
+ #if 0000
+ 	NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
+diff -Nru a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c
+--- a/net/ipv4/ipvs/ip_vs_core.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/ipvs/ip_vs_core.c	2005-02-13 21:25:09 +11:00
+@@ -953,7 +953,7 @@
+ 		goto tx_error_icmp;
+ 
+ 	/* MTU checking */
+-	mtu = rt->u.dst.pmtu;
++	mtu = dst_pmtu(&rt->u.dst);
+ 	if ((skb->len > mtu) && (iph->frag_off&__constant_htons(IP_DF))) {
+ 		ip_rt_put(rt);
+ 		icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu));
+@@ -1001,7 +1001,7 @@
+ #ifdef CONFIG_NETFILTER_DEBUG
+ 	skb->nf_debug = 1 << NF_IP_LOCAL_OUT;
+ #endif /* CONFIG_NETFILTER_DEBUG */
+-	ip_send(skb);
++	IP_VS_XMIT(skb, rt);
+ 	ip_vs_conn_put(cp);
+ 	return NF_STOLEN;
+ 
+diff -Nru a/net/ipv4/netfilter/ip_conntrack_standalone.c b/net/ipv4/netfilter/ip_conntrack_standalone.c
+--- a/net/ipv4/netfilter/ip_conntrack_standalone.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/netfilter/ip_conntrack_standalone.c	2005-02-13 21:25:09 +11:00
+@@ -204,7 +204,7 @@
+ 	/* Local packets are never produced too large for their
+ 	   interface.  We degfragment them at LOCAL_OUT, however,
+ 	   so we have to refragment them here. */
+-	if ((*pskb)->len > rt->u.dst.pmtu) {
++	if ((*pskb)->len > dst_pmtu(&rt->u.dst)) {
+ 		/* No hook can be after us, so this should be OK. */
+ 		ip_fragment(*pskb, okfn);
+ 		return NF_STOLEN;
+diff -Nru a/net/ipv4/netfilter/ip_fw_compat_masq.c b/net/ipv4/netfilter/ip_fw_compat_masq.c
+--- a/net/ipv4/netfilter/ip_fw_compat_masq.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/netfilter/ip_fw_compat_masq.c	2005-02-13 21:25:09 +11:00
+@@ -69,12 +69,13 @@
+ 	/* Setup the masquerade, if not already */
+ 	if (!info->initialized) {
+ 		u_int32_t newsrc;
++		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = iph->daddr } } };
+ 		struct rtable *rt;
+ 		struct ip_nat_multi_range range;
+ 
+ 		/* Pass 0 instead of saddr, since it's going to be changed
+ 		   anyway. */
+-		if (ip_route_output(&rt, iph->daddr, 0, 0, 0) != 0) {
++		if (ip_route_output_key(&rt, &fl) != 0) {
+ 			DEBUGP("ipnat_rule_masquerade: Can't reroute.\n");
+ 			return NF_DROP;
+ 		}
+diff -Nru a/net/ipv4/netfilter/ip_nat_core.c b/net/ipv4/netfilter/ip_nat_core.c
+--- a/net/ipv4/netfilter/ip_nat_core.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/netfilter/ip_nat_core.c	2005-02-13 21:25:09 +11:00
+@@ -203,10 +203,11 @@
+ static int
+ do_extra_mangle(u_int32_t var_ip, u_int32_t *other_ipp)
+ {
++	struct flowi fl = { .nl_u = { .ip4_u = { .daddr = var_ip } } };
+ 	struct rtable *rt;
+ 
+ 	/* FIXME: IPTOS_TOS(iph->tos) --RR */
+-	if (ip_route_output(&rt, var_ip, 0, 0, 0) != 0) {
++	if (ip_route_output_key(&rt, &fl) != 0) {
+ 		DEBUGP("do_extra_mangle: Can't get route to %u.%u.%u.%u\n",
+ 		       NIPQUAD(var_ip));
+ 		return 0;
+diff -Nru a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c
+--- a/net/ipv4/netfilter/ipt_MASQUERADE.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/netfilter/ipt_MASQUERADE.c	2005-02-13 21:25:09 +11:00
+@@ -69,7 +69,6 @@
+ 	struct ip_nat_multi_range newrange;
+ 	u_int32_t newsrc;
+ 	struct rtable *rt;
+-	struct rt_key key;
+ 
+ 	IP_NF_ASSERT(hooknum == NF_IP_POST_ROUTING);
+ 
+@@ -84,26 +83,29 @@
+ 
+ 	mr = targinfo;
+ 
+-	key.dst = (*pskb)->nh.iph->daddr;
+-	key.src = 0; /* Unknown: that's what we're trying to establish */
+-	key.tos = RT_TOS((*pskb)->nh.iph->tos)|RTO_CONN;
+-	key.oif = 0;
++	{
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = (*pskb)->nh.iph->daddr,
++						.tos = (RT_TOS((*pskb)->nh.iph->tos) |
++							RTO_CONN),
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-	key.fwmark = (*pskb)->nfmark;
++						.fwmark = (*pskb)->nfmark
+ #endif
+-	if (ip_route_output_key(&rt, &key) != 0) {
+-                /* Funky routing can do this. */
+-                if (net_ratelimit())
+-                        printk("MASQUERADE:"
+-                               " No route: Rusty's brain broke!\n");
+-                return NF_DROP;
+-        }
+-        if (rt->u.dst.dev != out) {
+-                if (net_ratelimit())
+-                        printk("MASQUERADE:"
+-                               " Route sent us somewhere else.\n");
++					      } } };
++		if (ip_route_output_key(&rt, &fl) != 0) {
++	                /* Funky routing can do this. */
++	                if (net_ratelimit())
++				printk("MASQUERADE:"
++				       " No route: Rusty's brain broke!\n");
++			return NF_DROP;
++		}
++	        if (rt->u.dst.dev != out) {
++	                if (net_ratelimit())
++	                        printk("MASQUERADE:"
++	                               " Route sent us somewhere else.\n");
+ 			ip_rt_put(rt);
+-		return NF_DROP;
++			return NF_DROP;
++		}
+ 	}
+ 
+ 	newsrc = rt->rt_src;
+diff -Nru a/net/ipv4/netfilter/ipt_MIRROR.c b/net/ipv4/netfilter/ipt_MIRROR.c
+--- a/net/ipv4/netfilter/ipt_MIRROR.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv4/netfilter/ipt_MIRROR.c	2005-02-13 21:25:10 +11:00
+@@ -44,21 +44,21 @@
+ {
+         struct iphdr *iph = skb->nh.iph;
+ 	struct dst_entry *odst;
+-	struct rt_key key = {};
++	struct flowi fl = {};
+ 	struct rtable *rt;
+ 
+ 	if (local) {
+-		key.dst = iph->saddr;
+-		key.src = iph->daddr;
+-		key.tos = RT_TOS(iph->tos);
++		fl.nl_u.ip4_u.daddr = iph->saddr;
++		fl.nl_u.ip4_u.saddr = iph->daddr;
++		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+ 
+-		if (ip_route_output_key(&rt, &key) != 0)
++		if (ip_route_output_key(&rt, &fl) != 0)
+ 			return NULL;
+ 	} else {
+ 		/* non-local src, find valid iif to satisfy
+ 		 * rp-filter when calling ip_route_input. */
+-		key.dst = iph->daddr;
+-		if (ip_route_output_key(&rt, &key) != 0)
++		fl.nl_u.ip4_u.daddr = iph->daddr;
++		if (ip_route_output_key(&rt, &fl) != 0)
+ 			return NULL;
+ 
+ 		odst = skb->dst;
+diff -Nru a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c
+--- a/net/ipv4/netfilter/ipt_REJECT.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/netfilter/ipt_REJECT.c	2005-02-13 21:25:09 +11:00
+@@ -26,22 +26,22 @@
+ {
+ 	struct iphdr *iph = skb->nh.iph;
+ 	struct dst_entry *odst;
+-	struct rt_key key = {};
++	struct flowi fl = {};
+ 	struct rtable *rt;
+ 
+ 	if (hook != NF_IP_FORWARD) {
+-		key.dst = iph->saddr;
++		fl.nl_u.ip4_u.daddr = iph->saddr;
+ 		if (hook == NF_IP_LOCAL_IN)
+-			key.src = iph->daddr;
+-		key.tos = RT_TOS(iph->tos);
++			fl.nl_u.ip4_u.saddr = iph->daddr;
++		fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
+ 
+-		if (ip_route_output_key(&rt, &key) != 0)
++		if (ip_route_output_key(&rt, &fl) != 0)
+ 			return NULL;
+ 	} else {
+ 		/* non-local src, find valid iif to satisfy
+ 		 * rp-filter when calling ip_route_input. */
+-		key.dst = iph->daddr;
+-		if (ip_route_output_key(&rt, &key) != 0)
++		fl.nl_u.ip4_u.daddr = iph->daddr;
++		if (ip_route_output_key(&rt, &fl) != 0)
+ 			return NULL;
+ 
+ 		odst = skb->dst;
+@@ -172,7 +172,7 @@
+ 					   nskb->nh.iph->ihl);
+ 
+ 	/* "Never happens" */
+-	if (nskb->len > nskb->dst->pmtu)
++	if (nskb->len > dst_pmtu(nskb->dst))
+ 		goto free_nskb;
+ 
+ 	nf_ct_attach(nskb, oldskb);
+@@ -252,14 +252,19 @@
+ 
+ 	tos = (iph->tos & IPTOS_TOS_MASK) | IPTOS_PREC_INTERNETCONTROL;
+ 
+-	if (ip_route_output(&rt, iph->saddr, saddr, RT_TOS(tos), 0))
+-		return;
+-
++	{
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = iph->saddr,
++						.saddr = saddr,
++						.tos = RT_TOS(tos) } } };
++		if (ip_route_output_key(&rt, &fl))
++			return;
++	}
+ 	/* RFC says return as much as we can without exceeding 576 bytes. */
+ 	length = skb_in->len + sizeof(struct iphdr) + sizeof(struct icmphdr);
+ 
+-	if (length > rt->u.dst.pmtu)
+-		length = rt->u.dst.pmtu;
++	if (length > dst_pmtu(&rt->u.dst))
++		length = dst_pmtu(&rt->u.dst);
+ 	if (length > 576)
+ 		length = 576;
+ 
+diff -Nru a/net/ipv4/netfilter/ipt_TCPMSS.c b/net/ipv4/netfilter/ipt_TCPMSS.c
+--- a/net/ipv4/netfilter/ipt_TCPMSS.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/netfilter/ipt_TCPMSS.c	2005-02-13 21:25:09 +11:00
+@@ -85,14 +85,14 @@
+ 			return NF_DROP; /* or IPT_CONTINUE ?? */
+ 		}
+ 
+-		if((*pskb)->dst->pmtu <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
++		if(dst_pmtu((*pskb)->dst) <= (sizeof(struct iphdr) + sizeof(struct tcphdr))) {
+ 			if (net_ratelimit())
+ 				printk(KERN_ERR
+-		       			"ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", (*pskb)->dst->pmtu);
++		       			"ipt_tcpmss_target: unknown or invalid path-MTU (%d)\n", dst_pmtu((*pskb)->dst));
+ 			return NF_DROP; /* or IPT_CONTINUE ?? */
+ 		}
+ 
+-		newmss = (*pskb)->dst->pmtu - sizeof(struct iphdr) - sizeof(struct tcphdr);
++		newmss = dst_pmtu((*pskb)->dst) - sizeof(struct iphdr) - sizeof(struct tcphdr);
+ 	} else
+ 		newmss = tcpmssinfo->mss;
+ 
+diff -Nru a/net/ipv4/netfilter/ipt_multiport.c b/net/ipv4/netfilter/ipt_multiport.c
+--- a/net/ipv4/netfilter/ipt_multiport.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/netfilter/ipt_multiport.c	2005-02-13 21:25:09 +11:00
+@@ -4,6 +4,7 @@
+ #include <linux/types.h>
+ #include <linux/udp.h>
+ #include <linux/skbuff.h>
++#include <linux/socket.h>
+ 
+ #include <linux/netfilter_ipv4/ipt_multiport.h>
+ #include <linux/netfilter_ipv4/ip_tables.h>
+diff -Nru a/net/ipv4/proc.c b/net/ipv4/proc.c
+--- a/net/ipv4/proc.c	2005-02-13 21:25:08 +11:00
++++ b/net/ipv4/proc.c	2005-02-13 21:25:08 +11:00
+@@ -116,7 +116,6 @@
+  
+ int snmp_get_info(char *buffer, char **start, off_t offset, int length)
+ {
+-	extern int sysctl_ip_default_ttl;
+ 	int len, i;
+ 
+ 	len = sprintf (buffer,
+diff -Nru a/net/ipv4/protocol.c b/net/ipv4/protocol.c
+--- a/net/ipv4/protocol.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/protocol.c	2005-02-13 21:25:09 +11:00
+@@ -48,134 +48,52 @@
+ #include <net/ipip.h>
+ #include <linux/igmp.h>
+ 
+-#define IPPROTO_PREVIOUS NULL
+-
+-#ifdef CONFIG_IP_MULTICAST
+-
+-static struct inet_protocol igmp_protocol = {
+-	handler:	igmp_rcv,
+-	next:		IPPROTO_PREVIOUS,
+-	protocol:	IPPROTO_IGMP,
+-	name:		"IGMP"
+-};
+-
+-#undef  IPPROTO_PREVIOUS
+-#define IPPROTO_PREVIOUS &igmp_protocol
+-
+-#endif
+-
+-static struct inet_protocol tcp_protocol = {
+-	handler:	tcp_v4_rcv,
+-	err_handler:	tcp_v4_err,
+-	next:		IPPROTO_PREVIOUS,
+-	protocol:	IPPROTO_TCP,
+-	name:		"TCP"
+-};
+-
+-#undef  IPPROTO_PREVIOUS
+-#define IPPROTO_PREVIOUS &tcp_protocol
+-
+-static struct inet_protocol udp_protocol = {
+-	handler:	udp_rcv,
+-	err_handler:	udp_err,
+-	next:		IPPROTO_PREVIOUS,
+-	protocol:	IPPROTO_UDP,
+-	name:		"UDP"
+-};
+-
+-#undef  IPPROTO_PREVIOUS
+-#define IPPROTO_PREVIOUS &udp_protocol
+-
+-static struct inet_protocol icmp_protocol = {
+-	handler:	icmp_rcv,
+-	next:		IPPROTO_PREVIOUS,
+-	protocol:	IPPROTO_ICMP,
+-	name:		"ICMP"
+-};
+-
+-#undef  IPPROTO_PREVIOUS
+-#define IPPROTO_PREVIOUS &icmp_protocol
+-
+-
+-struct inet_protocol *inet_protocol_base = IPPROTO_PREVIOUS;
+-
+ struct inet_protocol *inet_protos[MAX_INET_PROTOS];
+ 
+ /*
+  *	Add a protocol handler to the hash tables
+  */
+ 
+-void inet_add_protocol(struct inet_protocol *prot)
++int inet_add_protocol(struct inet_protocol *prot, unsigned char protocol)
+ {
+-	unsigned char hash;
+-	struct inet_protocol *p2;
++	int hash, ret;
++
++	hash = protocol & (MAX_INET_PROTOS - 1);
+ 
+-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
+ 	br_write_lock_bh(BR_NETPROTO_LOCK);
+-	prot ->next = inet_protos[hash];
+-	inet_protos[hash] = prot;
+-	prot->copy = 0;
+-
+-	/*
+-	 *	Set the copy bit if we need to. 
+-	 */
+-	 
+-	p2 = (struct inet_protocol *) prot->next;
+-	while (p2) {
+-		if (p2->protocol == prot->protocol) {
+-			prot->copy = 1;
+-			break;
+-		}
+-		p2 = (struct inet_protocol *) p2->next;
++
++	if (inet_protos[hash]) {
++		ret = -1;
++	} else {
++		inet_protos[hash] = prot;
++		ret = 0;
+ 	}
++
+ 	br_write_unlock_bh(BR_NETPROTO_LOCK);
++
++	return ret;
+ }
+ 
+ /*
+  *	Remove a protocol from the hash tables.
+  */
+  
+-int inet_del_protocol(struct inet_protocol *prot)
++int inet_del_protocol(struct inet_protocol *prot, unsigned char protocol)
+ {
+-	struct inet_protocol *p;
+-	struct inet_protocol *lp = NULL;
+-	unsigned char hash;
+-
+-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
+-	br_write_lock_bh(BR_NETPROTO_LOCK);
+-	if (prot == inet_protos[hash]) {
+-		inet_protos[hash] = (struct inet_protocol *) inet_protos[hash]->next;
+-		br_write_unlock_bh(BR_NETPROTO_LOCK);
+-		return 0;
+-	}
++	int hash, ret;
+ 
+-	p = (struct inet_protocol *) inet_protos[hash];
++	hash = protocol & (MAX_INET_PROTOS - 1);
+ 
+-	if (p != NULL && p->protocol == prot->protocol)
+-		lp = p;
+-
+-	while (p) {
+-		/*
+-		 * We have to worry if the protocol being deleted is
+-		 * the last one on the list, then we may need to reset
+-		 * someone's copied bit.
+-		 */
+-		if (p->next && p->next == prot) {
+-			/*
+-			 * if we are the last one with this protocol and
+-			 * there is a previous one, reset its copy bit.
+-			 */
+-			if (prot->copy == 0 && lp != NULL)
+-				lp->copy = 0;
+-			p->next = prot->next;
+-			br_write_unlock_bh(BR_NETPROTO_LOCK);
+-			return 0;
+-		}
+-		if (p->next != NULL && p->next->protocol == prot->protocol) 
+-			lp = p->next;
++	br_write_lock_bh(BR_NETPROTO_LOCK);
+ 
+-		p = (struct inet_protocol *) p->next;
++	if (inet_protos[hash] == prot) {
++		inet_protos[hash] = NULL;
++		ret = 0;
++	} else {
++		ret = -1;
+ 	}
++
+ 	br_write_unlock_bh(BR_NETPROTO_LOCK);
+-	return -1;
++
++	return ret;
+ }
+diff -Nru a/net/ipv4/raw.c b/net/ipv4/raw.c
+--- a/net/ipv4/raw.c	2005-02-13 21:25:08 +11:00
++++ b/net/ipv4/raw.c	2005-02-13 21:25:08 +11:00
+@@ -64,6 +64,8 @@
+ #include <net/raw.h>
+ #include <net/inet_common.h>
+ #include <net/checksum.h>
++#include <net/xfrm.h>
++#include <linux/netfilter_ipv4.h>
+ 
+ struct sock *raw_v4_htable[RAWV4_HTABLE_SIZE];
+ rwlock_t raw_v4_lock = RW_LOCK_UNLOCKED;
+@@ -132,13 +134,12 @@
+ }
+ 
+ /* IP input processing comes here for RAW socket delivery.
+- * This is fun as to avoid copies we want to make no surplus
+- * copies.
++ * Caller owns SKB, so we must make clones.
+  *
+  * RFC 1122: SHOULD pass TOS value up to the transport layer.
+  * -> It does. And not only TOS, but all IP header.
+  */
+-struct sock *raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
++void raw_v4_input(struct sk_buff *skb, struct iphdr *iph, int hash)
+ {
+ 	struct sock *sk;
+ 
+@@ -150,28 +151,19 @@
+ 			     skb->dev->ifindex);
+ 
+ 	while (sk) {
+-		struct sock *sknext = __raw_v4_lookup(sk->next, iph->protocol,
+-						      iph->saddr, iph->daddr,
+-						      skb->dev->ifindex);
+-		if (iph->protocol != IPPROTO_ICMP ||
+-		    !icmp_filter(sk, skb)) {
+-			struct sk_buff *clone;
++		if (iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) {
++			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
+ 
+-			if (!sknext)
+-				break;
+-			clone = skb_clone(skb, GFP_ATOMIC);
+ 			/* Not releasing hash table! */
+ 			if (clone)
+ 				raw_rcv(sk, clone);
+ 		}
+-		sk = sknext;
++		sk = __raw_v4_lookup(sk->next, iph->protocol,
++				     iph->saddr, iph->daddr,
++				     skb->dev->ifindex);
+ 	}
+ out:
+-	if (sk)
+-		sock_hold(sk);
+ 	read_unlock(&raw_v4_lock);
+-
+-	return sk;
+ }
+ 
+ void raw_err (struct sock *sk, struct sk_buff *skb, u32 info)
+@@ -244,71 +236,137 @@
+ 
+ int raw_rcv(struct sock *sk, struct sk_buff *skb)
+ {
++	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
++		kfree_skb(skb);
++		return NET_RX_DROP;
++	}
++
+ 	skb_push(skb, skb->data - skb->nh.raw);
+ 
+ 	raw_rcv_skb(sk, skb);
+ 	return 0;
+ }
+ 
+-struct rawfakehdr 
+-{
+-	struct	iovec *iov;
+-	u32	saddr;
+-	struct	dst_entry *dst;
+-};
++static int raw_send_hdrinc(struct sock *sk, void *from, int length,
++			struct rtable *rt, 
++			unsigned int flags)
++{
++	struct inet_opt *inet = inet_sk(sk);
++	int hh_len;
++	struct iphdr *iph;
++	struct sk_buff *skb;
++	int err;
+ 
+-/*
+- *	Send a RAW IP packet.
+- */
++	if (length > rt->u.dst.dev->mtu) {
++		ip_local_error(sk, EMSGSIZE, rt->rt_dst, sk->dport,
++			       rt->u.dst.dev->mtu);
++		return -EMSGSIZE;
++	}
++	if (flags&MSG_PROBE)
++		goto out;
+ 
+-/*
+- *	Callback support is trivial for SOCK_RAW
+- */
+-  
+-static int raw_getfrag(const void *p, char *to, unsigned int offset,
+-		       unsigned int fraglen, struct sk_buff *skb)
+-{
+-	struct rawfakehdr *rfh = (struct rawfakehdr *) p;
+-	return memcpy_fromiovecend(to, rfh->iov, offset, fraglen);
+-}
++	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
+ 
+-/*
+- *	IPPROTO_RAW needs extra work.
+- */
+- 
+-static int raw_getrawfrag(const void *p, char *to, unsigned int offset,
+-				unsigned int fraglen, struct sk_buff *skb)
+-{
+-	struct rawfakehdr *rfh = (struct rawfakehdr *) p;
++	skb = sock_alloc_send_skb(sk, length+hh_len+15,
++				  flags&MSG_DONTWAIT, &err);
++	if (skb == NULL)
++		goto error; 
++	skb_reserve(skb, hh_len);
++
++	skb->priority = sk->priority;
++	skb->dst = dst_clone(&rt->u.dst);
++
++	skb->nh.iph = iph = (struct iphdr *)skb_put(skb, length);
+ 
+-	if (memcpy_fromiovecend(to, rfh->iov, offset, fraglen))
+-		return -EFAULT;
++	skb->ip_summed = CHECKSUM_NONE;
+ 
+-	if (!offset) {
+-		struct iphdr *iph = (struct iphdr *)to;
++	skb->h.raw = skb->nh.raw;
++	err = memcpy_fromiovecend((void *)iph, from, 0, length);
++	if (err)
++		goto error_fault;
++
++	/* We don't modify invalid header */
++	if (length >= sizeof(*iph) && iph->ihl * 4 <= length) {
+ 		if (!iph->saddr)
+-			iph->saddr = rfh->saddr;
++			iph->saddr = rt->rt_src;
+ 		iph->check   = 0;
+-		iph->tot_len = htons(fraglen); /* This is right as you can't
+-						  frag RAW packets */
+-		/*
+-	 	 *	Deliberate breach of modularity to keep 
+-	 	 *	ip_build_xmit clean (well less messy).
+-		 */
++		iph->tot_len = htons(length);
+ 		if (!iph->id)
+-			ip_select_ident(iph, rfh->dst, NULL);
++			ip_select_ident(iph, &rt->u.dst, NULL);
++
+ 		iph->check = ip_fast_csum((unsigned char *)iph, iph->ihl);
+ 	}
++
++	err = NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
++		      dst_output);
++	if (err > 0)
++		err = inet->recverr ? net_xmit_errno(err) : 0;
++	if (err)
++		goto error;
++out:
+ 	return 0;
++
++error_fault:
++	err = -EFAULT;
++	kfree_skb(skb);
++error:
++	IP_INC_STATS(IpOutDiscards);
++	return err; 
++}
++
++static void raw_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
++{
++	struct iovec *iov;
++	u8 __user *type = NULL;
++	u8 __user *code = NULL;
++	int probed = 0;
++	int i;
++
++	if (!msg->msg_iov)
++		return;
++
++	for (i = 0; i < msg->msg_iovlen; i++) {
++		iov = &msg->msg_iov[i];
++		if (!iov)
++			continue;
++
++		switch (fl->proto) {
++		case IPPROTO_ICMP:
++			/* check if one-byte field is readable or not. */
++			if (iov->iov_base && iov->iov_len < 1)
++				break;
++
++			if (!type) {
++				type = iov->iov_base;
++				/* check if code field is readable or not. */
++				if (iov->iov_len > 1)
++					code = type + 1;
++			} else if (!code)
++				code = iov->iov_base;
++
++			if (type && code) {
++				get_user(fl->fl_icmp_type, type);
++				__get_user(fl->fl_icmp_code, code);
++				probed = 1;
++			}
++			break;
++		default:
++			probed = 1;
++			break;
++		}
++		if (probed)
++			break;
++	}
+ }
+ 
+ static int raw_sendmsg(struct sock *sk, struct msghdr *msg, int len)
+ {
++	struct inet_opt *inet = inet_sk(sk);
+ 	struct ipcm_cookie ipc;
+-	struct rawfakehdr rfh;
+ 	struct rtable *rt = NULL;
+ 	int free = 0;
+ 	u32 daddr;
++	u32 saddr;
+ 	u8  tos;
+ 	int err;
+ 
+@@ -378,7 +436,7 @@
+ 			free = 1;
+ 	}
+ 
+-	rfh.saddr = ipc.addr;
++	saddr = ipc.addr;
+ 	ipc.addr = daddr;
+ 
+ 	if (!ipc.opt)
+@@ -404,12 +462,22 @@
+ 	if (MULTICAST(daddr)) {
+ 		if (!ipc.oif)
+ 			ipc.oif = sk->protinfo.af_inet.mc_index;
+-		if (!rfh.saddr)
+-			rfh.saddr = sk->protinfo.af_inet.mc_addr;
++		if (!saddr)
++			saddr = sk->protinfo.af_inet.mc_addr;
+ 	}
+ 
+-	err = ip_route_output(&rt, daddr, rfh.saddr, tos, ipc.oif);
++	{
++		struct flowi fl = { .oif = ipc.oif,
++				    .nl_u = { .ip4_u =
++					      { .daddr = daddr,
++						.saddr = saddr,
++						.tos = tos } },
++				    .proto = inet->hdrincl ? IPPROTO_RAW : sk->protocol };
++		if (!inet->hdrincl)
++			raw_probe_proto_opt(&fl, msg);
+ 
++		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
++	}
+ 	if (err)
+ 		goto done;
+ 
+@@ -421,14 +489,22 @@
+ 		goto do_confirm;
+ back_from_confirm:
+ 
+-	rfh.iov		= msg->msg_iov;
+-	rfh.saddr	= rt->rt_src;
+-	rfh.dst		= &rt->u.dst;
+-	if (!ipc.addr)
+-		ipc.addr = rt->rt_dst;
+-	err = ip_build_xmit(sk, sk->protinfo.af_inet.hdrincl ? raw_getrawfrag :
+-		       	    raw_getfrag, &rfh, len, &ipc, rt, msg->msg_flags);
+-
++	if (inet->hdrincl)
++		err = raw_send_hdrinc(sk, msg->msg_iov, len, 
++					rt, msg->msg_flags);
++	
++	 else {
++		if (!ipc.addr)
++			ipc.addr = rt->rt_dst;
++		lock_sock(sk);
++		err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
++					&ipc, rt, msg->msg_flags);
++		if (err)
++			ip_flush_pending_frames(sk);
++		else if (!(msg->msg_flags & MSG_MORE))
++			err = ip_push_pending_frames(sk);
++		release_sock(sk);
++	}
+ done:
+ 	if (free)
+ 		kfree(ipc.opt);
+diff -Nru a/net/ipv4/route.c b/net/ipv4/route.c
+--- a/net/ipv4/route.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv4/route.c	2005-02-13 21:25:10 +11:00
+@@ -95,6 +95,7 @@
+ #include <net/arp.h>
+ #include <net/tcp.h>
+ #include <net/icmp.h>
++#include <net/xfrm.h>
+ #ifdef CONFIG_SYSCTL
+ #include <linux/sysctl.h>
+ #endif
+@@ -132,11 +133,10 @@
+  */
+ 
+ static struct dst_entry *ipv4_dst_check(struct dst_entry *dst, u32 cookie);
+-static struct dst_entry *ipv4_dst_reroute(struct dst_entry *dst,
+-					   struct sk_buff *skb);
+ static void		 ipv4_dst_destroy(struct dst_entry *dst);
+ static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst);
+ static void		 ipv4_link_failure(struct sk_buff *skb);
++static void		 ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+ static int rt_garbage_collect(void);
+ 
+ 
+@@ -145,10 +145,10 @@
+ 	protocol:		__constant_htons(ETH_P_IP),
+ 	gc:			rt_garbage_collect,
+ 	check:			ipv4_dst_check,
+-	reroute:		ipv4_dst_reroute,
+ 	destroy:		ipv4_dst_destroy,
+ 	negative_advice:	ipv4_negative_advice,
+ 	link_failure:		ipv4_link_failure,
++	update_pmtu:		ip_rt_update_pmtu,
+ 	entry_size:		sizeof(struct rtable),
+ };
+ 
+@@ -248,11 +248,12 @@
+ 				r->u.dst.__use,
+ 				0,
+ 				(unsigned long)r->rt_src,
+-				(r->u.dst.advmss ?
+-				 (int) r->u.dst.advmss + 40 : 0),
+-				r->u.dst.window,
+-				(int)((r->u.dst.rtt >> 3) + r->u.dst.rttvar),
+-				r->key.tos,
++				(dst_metric(&r->u.dst, RTAX_ADVMSS) ?
++				 (int) dst_metric(&r->u.dst, RTAX_ADVMSS) + 40 : 0),
++				dst_metric(&r->u.dst, RTAX_WINDOW),
++				(int)((dst_metric(&r->u.dst, RTAX_RTT) >> 3)
++				      + dst_metric(&r->u.dst, RTAX_RTTVAR)),
++				r->fl.fl4_tos,
+ 				r->u.dst.hh ?
+ 					atomic_read(&r->u.dst.hh->hh_refcnt) :
+ 					-1,
+@@ -338,7 +339,7 @@
+ 	/* Kill broadcast/multicast entries very aggresively, if they
+ 	   collide in hash table with more useful entries */
+ 	return (rth->rt_flags & (RTCF_BROADCAST | RTCF_MULTICAST)) &&
+-		rth->key.iif && rth->u.rt_next;
++		rth->fl.iif && rth->u.rt_next;
+ }
+ 
+ static __inline__ int rt_valuable(struct rtable *rth)
+@@ -383,7 +384,7 @@
+ 	if (rt_valuable(rt))
+ 		score |= (1<<31);
+ 
+-	if (!rt->key.iif ||
++	if (!rt->fl.iif ||
+ 	    !(rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL)))
+ 		score |= (1<<30);
+ 
+@@ -648,6 +649,13 @@
+ out:	return 0;
+ }
+ 
++static inline int compare_keys(struct flowi *fl1, struct flowi *fl2)
++{
++	return memcmp(&fl1->nl_u.ip4_u, &fl2->nl_u.ip4_u, sizeof(fl1->nl_u.ip4_u)) == 0 &&
++	       fl1->oif     == fl2->oif &&
++	       fl1->iif     == fl2->iif;
++}
++
+ static int rt_intern_hash(unsigned hash, struct rtable *rt, struct rtable **rp)
+ {
+ 	struct rtable	*rth, **rthp;
+@@ -668,7 +676,7 @@
+ 
+ 	write_lock_bh(&rt_hash_table[hash].lock);
+ 	while ((rth = *rthp) != NULL) {
+-		if (memcmp(&rth->key, &rt->key, sizeof(rt->key)) == 0) {
++		if (compare_keys(&rth->fl, &rt->fl)) {
+ 			/* Put it first */
+ 			*rthp = rth->u.rt_next;
+ 			rth->u.rt_next = rt_hash_table[hash].chain;
+@@ -715,7 +723,7 @@
+ 	/* Try to bind route to arp only if it is output
+ 	   route or unicast forwarding path.
+ 	 */
+-	if (rt->rt_type == RTN_UNICAST || rt->key.iif == 0) {
++	if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) {
+ 		int err = arp_bind_neighbour(&rt->u.dst);
+ 		if (err) {
+ 			write_unlock_bh(&rt_hash_table[hash].lock);
+@@ -878,11 +886,11 @@
+ 			while ((rth = *rthp) != NULL) {
+ 				struct rtable *rt;
+ 
+-				if (rth->key.dst != daddr ||
+-				    rth->key.src != skeys[i] ||
+-				    rth->key.tos != tos ||
+-				    rth->key.oif != ikeys[k] ||
+-				    rth->key.iif != 0) {
++				if (rth->fl.fl4_dst != daddr ||
++				    rth->fl.fl4_src != skeys[i] ||
++				    rth->fl.fl4_tos != tos ||
++				    rth->fl.oif != ikeys[k] ||
++				    rth->fl.iif != 0) {
+ 					rthp = &rth->u.rt_next;
+ 					continue;
+ 				}
+@@ -908,12 +916,15 @@
+ 				*rt = *rth;
+ 				rt->u.dst.__use		= 1;
+ 				atomic_set(&rt->u.dst.__refcnt, 1);
++				rt->u.dst.child		= NULL;
+ 				if (rt->u.dst.dev)
+ 					dev_hold(rt->u.dst.dev);
++				rt->u.dst.obsolete	= 0;
+ 				rt->u.dst.lastuse	= jiffies;
++				rt->u.dst.path		= &rt->u.dst;
+ 				rt->u.dst.neighbour	= NULL;
+ 				rt->u.dst.hh		= NULL;
+-				rt->u.dst.obsolete	= 0;
++				rt->u.dst.xfrm		= NULL;
+ 
+ 				rt->rt_flags		|= RTCF_REDIRECTED;
+ 
+@@ -973,14 +984,14 @@
+ 			ret = NULL;
+ 		} else if ((rt->rt_flags & RTCF_REDIRECTED) ||
+ 			   rt->u.dst.expires) {
+-			unsigned hash = rt_hash_code(rt->key.dst,
+-						     rt->key.src ^
+-							(rt->key.oif << 5),
+-						     rt->key.tos);
++			unsigned hash = rt_hash_code(rt->fl.fl4_dst,
++						     rt->fl.fl4_src ^
++							(rt->fl.oif << 5),
++						     rt->fl.fl4_tos);
+ #if RT_CACHE_DEBUG >= 1
+ 			printk(KERN_DEBUG "ip_rt_advice: redirect to "
+ 					  "%u.%u.%u.%u/%02x dropped\n",
+-				NIPQUAD(rt->rt_dst), rt->key.tos);
++				NIPQUAD(rt->rt_dst), rt->fl.fl4_tos);
+ #endif
+ 			rt_del(hash, rt);
+ 			ret = NULL;
+@@ -1125,34 +1136,34 @@
+ 		read_lock(&rt_hash_table[hash].lock);
+ 		for (rth = rt_hash_table[hash].chain; rth;
+ 		     rth = rth->u.rt_next) {
+-			if (rth->key.dst == daddr &&
+-			    rth->key.src == skeys[i] &&
++			if (rth->fl.fl4_dst == daddr &&
++			    rth->fl.fl4_src == skeys[i] &&
+ 			    rth->rt_dst  == daddr &&
+ 			    rth->rt_src  == iph->saddr &&
+-			    rth->key.tos == tos &&
+-			    rth->key.iif == 0 &&
+-			    !(rth->u.dst.mxlock & (1 << RTAX_MTU))) {
++			    rth->fl.fl4_tos == tos &&
++			    rth->fl.iif == 0 &&
++			    !(dst_metric_locked(&rth->u.dst, RTAX_MTU))) {
+ 				unsigned short mtu = new_mtu;
+ 
+ 				if (new_mtu < 68 || new_mtu >= old_mtu) {
+ 
+ 					/* BSD 4.2 compatibility hack :-( */
+ 					if (mtu == 0 &&
+-					    old_mtu >= rth->u.dst.pmtu &&
++					    old_mtu >= rth->u.dst.metrics[RTAX_MTU-1] &&
+ 					    old_mtu >= 68 + (iph->ihl << 2))
+ 						old_mtu -= iph->ihl << 2;
+ 
+ 					mtu = guess_mtu(old_mtu);
+ 				}
+-				if (mtu <= rth->u.dst.pmtu) {
+-					if (mtu < rth->u.dst.pmtu) { 
++				if (mtu <= rth->u.dst.metrics[RTAX_MTU-1]) {
++					if (mtu < rth->u.dst.metrics[RTAX_MTU-1]) { 
+ 						dst_confirm(&rth->u.dst);
+ 						if (mtu < ip_rt_min_pmtu) {
+ 							mtu = ip_rt_min_pmtu;
+-							rth->u.dst.mxlock |=
++							rth->u.dst.metrics[RTAX_LOCK-1] |=
+ 								(1 << RTAX_MTU);
+ 						}
+-						rth->u.dst.pmtu = mtu;
++						rth->u.dst.metrics[RTAX_MTU-1] = mtu;
+ 						dst_set_expires(&rth->u.dst,
+ 							ip_rt_mtu_expires);
+ 					}
+@@ -1165,15 +1176,15 @@
+ 	return est_mtu ? : new_mtu;
+ }
+ 
+-void ip_rt_update_pmtu(struct dst_entry *dst, unsigned mtu)
++static void ip_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
+ {
+-	if (dst->pmtu > mtu && mtu >= 68 &&
+-	    !(dst->mxlock & (1 << RTAX_MTU))) {
++	if (dst->metrics[RTAX_MTU-1] > mtu && mtu >= 68 &&
++	    !(dst_metric_locked(dst, RTAX_MTU))) {
+ 		if (mtu < ip_rt_min_pmtu) {
+ 			mtu = ip_rt_min_pmtu;
+-			dst->mxlock |= (1 << RTAX_MTU);
++			dst->metrics[RTAX_LOCK-1] |= (1 << RTAX_MTU);
+ 		}
+-		dst->pmtu = mtu;
++		dst->metrics[RTAX_MTU-1] = mtu;
+ 		dst_set_expires(dst, ip_rt_mtu_expires);
+ 	}
+ }
+@@ -1184,12 +1195,6 @@
+ 	return NULL;
+ }
+ 
+-static struct dst_entry *ipv4_dst_reroute(struct dst_entry *dst,
+-					  struct sk_buff *skb)
+-{
+-	return NULL;
+-}
+-
+ static void ipv4_dst_destroy(struct dst_entry *dst)
+ {
+ 	struct rtable *rt = (struct rtable *) dst;
+@@ -1235,9 +1240,9 @@
+ 	u32 src;
+ 	struct fib_result res;
+ 
+-	if (rt->key.iif == 0)
++	if (rt->fl.iif == 0)
+ 		src = rt->rt_src;
+-	else if (fib_lookup(&rt->key, &res) == 0) {
++	else if (fib_lookup(&rt->fl, &res) == 0) {
+ #ifdef CONFIG_IP_ROUTE_NAT
+ 		if (res.type == RTN_NAT)
+ 			src = inet_select_addr(rt->u.dst.dev, rt->rt_gateway,
+@@ -1270,28 +1275,30 @@
+ 		if (FIB_RES_GW(*res) &&
+ 		    FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK)
+ 			rt->rt_gateway = FIB_RES_GW(*res);
+-		memcpy(&rt->u.dst.mxlock, fi->fib_metrics,
+-			sizeof(fi->fib_metrics));
++		memcpy(rt->u.dst.metrics, fi->fib_metrics,
++		       sizeof(rt->u.dst.metrics));
+ 		if (fi->fib_mtu == 0) {
+-			rt->u.dst.pmtu = rt->u.dst.dev->mtu;
+-			if (rt->u.dst.mxlock & (1 << RTAX_MTU) &&
++			rt->u.dst.metrics[RTAX_MTU-1] = rt->u.dst.dev->mtu;
++			if (rt->u.dst.metrics[RTAX_LOCK-1] & (1 << RTAX_MTU) &&
+ 			    rt->rt_gateway != rt->rt_dst &&
+-			    rt->u.dst.pmtu > 576)
+-				rt->u.dst.pmtu = 576;
++			    rt->u.dst.dev->mtu > 576)
++				rt->u.dst.metrics[RTAX_MTU-1] = 576;
+ 		}
+ #ifdef CONFIG_NET_CLS_ROUTE
+ 		rt->u.dst.tclassid = FIB_RES_NH(*res).nh_tclassid;
+ #endif
+ 	} else
+-		rt->u.dst.pmtu	= rt->u.dst.dev->mtu;
++		rt->u.dst.metrics[RTAX_MTU-1]= rt->u.dst.dev->mtu;
+ 
+-	if (rt->u.dst.pmtu > IP_MAX_MTU)
+-		rt->u.dst.pmtu = IP_MAX_MTU;
+-	if (rt->u.dst.advmss == 0)
+-		rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
++	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0)
++		rt->u.dst.metrics[RTAX_HOPLIMIT-1] = sysctl_ip_default_ttl;
++	if (rt->u.dst.metrics[RTAX_MTU-1] > IP_MAX_MTU)
++		rt->u.dst.metrics[RTAX_MTU-1] = IP_MAX_MTU;
++	if (rt->u.dst.metrics[RTAX_ADVMSS-1] == 0)
++		rt->u.dst.metrics[RTAX_ADVMSS-1] = max_t(unsigned int, rt->u.dst.dev->mtu - 40,
+ 				       ip_rt_min_advmss);
+-	if (rt->u.dst.advmss > 65535 - 40)
+-		rt->u.dst.advmss = 65535 - 40;
++	if (rt->u.dst.metrics[RTAX_ADVMSS-1] > 65535 - 40)
++		rt->u.dst.metrics[RTAX_ADVMSS-1] = 65535 - 40;
+ 
+ #ifdef CONFIG_NET_CLS_ROUTE
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+@@ -1336,13 +1343,15 @@
+ 
+ 	atomic_set(&rth->u.dst.__refcnt, 1);
+ 	rth->u.dst.flags= DST_HOST;
+-	rth->key.dst	= daddr;
++	if (in_dev->cnf.no_policy)
++		rth->u.dst.flags |= DST_NOPOLICY;
++	rth->fl.fl4_dst	= daddr;
+ 	rth->rt_dst	= daddr;
+-	rth->key.tos	= tos;
++	rth->fl.fl4_tos	= tos;
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-	rth->key.fwmark	= skb->nfmark;
++	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
+-	rth->key.src	= saddr;
++	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ #ifdef CONFIG_IP_ROUTE_NAT
+ 	rth->rt_dst_map	= daddr;
+@@ -1352,10 +1361,10 @@
+ 	rth->u.dst.tclassid = itag;
+ #endif
+ 	rth->rt_iif	=
+-	rth->key.iif	= dev->ifindex;
++	rth->fl.iif	= dev->ifindex;
+ 	rth->u.dst.dev	= &loopback_dev;
+ 	dev_hold(rth->u.dst.dev);
+-	rth->key.oif	= 0;
++	rth->fl.oif	= 0;
+ 	rth->rt_gateway	= daddr;
+ 	rth->rt_spec_dst= spec_dst;
+ 	rth->rt_type	= RTN_MULTICAST;
+@@ -1397,10 +1406,19 @@
+ int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr,
+ 			u8 tos, struct net_device *dev)
+ {
+-	struct rt_key	key;
+ 	struct fib_result res;
+ 	struct in_device *in_dev = in_dev_get(dev);
+ 	struct in_device *out_dev = NULL;
++	struct flowi fl = { .nl_u = { .ip4_u =
++				      { .daddr = daddr,
++					.saddr = saddr,
++					.tos = tos,
++					.scope = RT_SCOPE_UNIVERSE,
++#ifdef CONFIG_IP_ROUTE_FWMARK
++					.fwmark = skb->nfmark
++#endif
++				      } },
++			    .iif = dev->ifindex };
+ 	unsigned	flags = 0;
+ 	u32		itag = 0;
+ 	struct rtable * rth;
+@@ -1414,17 +1432,7 @@
+ 	if (!in_dev)
+ 		goto out;
+ 
+-	key.dst		= daddr;
+-	key.src		= saddr;
+-	key.tos		= tos;
+-#ifdef CONFIG_IP_ROUTE_FWMARK
+-	key.fwmark	= skb->nfmark;
+-#endif
+-	key.iif		= dev->ifindex;
+-	key.oif		= 0;
+-	key.scope	= RT_SCOPE_UNIVERSE;
+-
+-	hash = rt_hash_code(daddr, saddr ^ (key.iif << 5), tos);
++	hash = rt_hash_code(daddr, saddr ^ (fl.iif << 5), tos);
+ 
+ 	/* Check for the most weird martians, which can be not detected
+ 	   by fib_lookup.
+@@ -1448,7 +1456,7 @@
+ 	/*
+ 	 *	Now we are ready to route packet.
+ 	 */
+-	if ((err = fib_lookup(&key, &res)) != 0) {
++	if ((err = fib_lookup(&fl, &res)) != 0) {
+ 		if (!IN_DEV_FORWARD(in_dev))
+ 			goto e_inval;
+ 		goto no_route;
+@@ -1468,17 +1476,17 @@
+ 			src_map = fib_rules_policy(saddr, &res, &flags);
+ 
+ 		if (res.type == RTN_NAT) {
+-			key.dst = fib_rules_map_destination(daddr, &res);
++			fl.fl4_dst = fib_rules_map_destination(daddr, &res);
+ 			fib_res_put(&res);
+ 			free_res = 0;
+-			if (fib_lookup(&key, &res))
++			if (fib_lookup(&fl, &res))
+ 				goto e_inval;
+ 			free_res = 1;
+ 			if (res.type != RTN_UNICAST)
+ 				goto e_inval;
+ 			flags |= RTCF_DNAT;
+ 		}
+-		key.src = src_map;
++		fl.fl4_src = src_map;
+ 	}
+ #endif
+ 
+@@ -1504,8 +1512,8 @@
+ 		goto martian_destination;
+ 
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+-	if (res.fi->fib_nhs > 1 && key.oif == 0)
+-		fib_select_multipath(&key, &res);
++	if (res.fi->fib_nhs > 1 && fl.oif == 0)
++		fib_select_multipath(&fl, &res);
+ #endif
+ 	out_dev = in_dev_get(FIB_RES_DEV(res));
+ 	if (out_dev == NULL) {
+@@ -1542,26 +1550,30 @@
+ 
+ 	atomic_set(&rth->u.dst.__refcnt, 1);
+ 	rth->u.dst.flags= DST_HOST;
+-	rth->key.dst	= daddr;
++	if (in_dev->cnf.no_policy)
++		rth->u.dst.flags |= DST_NOPOLICY;
++	if (in_dev->cnf.no_xfrm)
++		rth->u.dst.flags |= DST_NOXFRM;
++	rth->fl.fl4_dst	= daddr;
+ 	rth->rt_dst	= daddr;
+-	rth->key.tos	= tos;
++	rth->fl.fl4_tos	= tos;
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-	rth->key.fwmark	= skb->nfmark;
++	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
+-	rth->key.src	= saddr;
++	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ 	rth->rt_gateway	= daddr;
+ #ifdef CONFIG_IP_ROUTE_NAT
+-	rth->rt_src_map	= key.src;
+-	rth->rt_dst_map	= key.dst;
++	rth->rt_src_map	= fl.fl4_src;
++	rth->rt_dst_map	= fl.fl4_dst;
+ 	if (flags&RTCF_DNAT)
+-		rth->rt_gateway	= key.dst;
++		rth->rt_gateway	= fl.fl4_dst;
+ #endif
+ 	rth->rt_iif 	=
+-	rth->key.iif	= dev->ifindex;
++	rth->fl.iif	= dev->ifindex;
+ 	rth->u.dst.dev	= out_dev->dev;
+ 	dev_hold(rth->u.dst.dev);
+-	rth->key.oif 	= 0;
++	rth->fl.oif 	= 0;
+ 	rth->rt_spec_dst= spec_dst;
+ 
+ 	rth->u.dst.input = ip_forward;
+@@ -1619,26 +1631,27 @@
+ 
+ 	atomic_set(&rth->u.dst.__refcnt, 1);
+ 	rth->u.dst.flags= DST_HOST;
+-	rth->key.dst	= daddr;
++	if (in_dev->cnf.no_policy)
++		rth->u.dst.flags |= DST_NOPOLICY;
++	rth->fl.fl4_dst	= daddr;
+ 	rth->rt_dst	= daddr;
+-	rth->key.tos	= tos;
++	rth->fl.fl4_tos	= tos;
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-	rth->key.fwmark	= skb->nfmark;
++	rth->fl.fl4_fwmark= skb->nfmark;
+ #endif
+-	rth->key.src	= saddr;
++	rth->fl.fl4_src	= saddr;
+ 	rth->rt_src	= saddr;
+ #ifdef CONFIG_IP_ROUTE_NAT
+-	rth->rt_dst_map	= key.dst;
+-	rth->rt_src_map	= key.src;
++	rth->rt_dst_map	= fl.fl4_dst;
++	rth->rt_src_map	= fl.fl4_src;
+ #endif
+ #ifdef CONFIG_NET_CLS_ROUTE
+ 	rth->u.dst.tclassid = itag;
+ #endif
+ 	rth->rt_iif	=
+-	rth->key.iif	= dev->ifindex;
++	rth->fl.iif	= dev->ifindex;
+ 	rth->u.dst.dev	= &loopback_dev;
+ 	dev_hold(rth->u.dst.dev);
+-	rth->key.oif 	= 0;
+ 	rth->rt_gateway	= daddr;
+ 	rth->rt_spec_dst= spec_dst;
+ 	rth->u.dst.input= ip_local_deliver;
+@@ -1716,14 +1729,14 @@
+ 
+ 	read_lock(&rt_hash_table[hash].lock);
+ 	for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
+-		if (rth->key.dst == daddr &&
+-		    rth->key.src == saddr &&
+-		    rth->key.iif == iif &&
+-		    rth->key.oif == 0 &&
++		if (rth->fl.fl4_dst == daddr &&
++		    rth->fl.fl4_src == saddr &&
++		    rth->fl.iif == iif &&
++		    rth->fl.oif == 0 &&
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-		    rth->key.fwmark == skb->nfmark &&
++		    rth->fl.fl4_fwmark == skb->nfmark &&
+ #endif
+-		    rth->key.tos == tos) {
++		    rth->fl.fl4_tos == tos) {
+ 			rth->u.dst.lastuse = jiffies;
+ 			dst_hold(&rth->u.dst);
+ 			rth->u.dst.__use++;
+@@ -1773,43 +1786,45 @@
+  * Major route resolver routine.
+  */
+ 
+-int ip_route_output_slow(struct rtable **rp, const struct rt_key *oldkey)
++int ip_route_output_slow(struct rtable **rp, const struct flowi *oldflp)
+ {
+-	struct rt_key key;
++	u32 tos	= oldflp->fl4_tos & (IPTOS_RT_MASK | RTO_ONLINK);
++	struct flowi fl = { .nl_u = { .ip4_u =
++				      { .daddr = oldflp->fl4_dst,
++					.saddr = oldflp->fl4_src,
++					.tos = tos & IPTOS_RT_MASK,
++					.scope = ((tos & RTO_ONLINK) ?
++						  RT_SCOPE_LINK :
++						  RT_SCOPE_UNIVERSE),
++#ifdef CONFIG_IP_ROUTE_FWMARK
++					.fwmark = oldflp->fl4_fwmark
++#endif
++				      } },
++			    .iif = loopback_dev.ifindex,
++			    .oif = oldflp->oif };
+ 	struct fib_result res;
+ 	unsigned flags = 0;
+ 	struct rtable *rth;
+ 	struct net_device *dev_out = NULL;
++	struct in_device *in_dev = NULL;
+ 	unsigned hash;
+ 	int free_res = 0;
+ 	int err;
+-	u32 tos;
+ 
+-	tos		= oldkey->tos & (IPTOS_RT_MASK | RTO_ONLINK);
+-	key.dst		= oldkey->dst;
+-	key.src		= oldkey->src;
+-	key.tos		= tos & IPTOS_RT_MASK;
+-	key.iif		= loopback_dev.ifindex;
+-	key.oif		= oldkey->oif;
+-#ifdef CONFIG_IP_ROUTE_FWMARK
+-	key.fwmark	= oldkey->fwmark;
+-#endif
+-	key.scope	= (tos & RTO_ONLINK) ? RT_SCOPE_LINK :
+-						RT_SCOPE_UNIVERSE;
+ 	res.fi		= NULL;
+ #ifdef CONFIG_IP_MULTIPLE_TABLES
+ 	res.r		= NULL;
+ #endif
+ 
+-	if (oldkey->src) {
++	if (oldflp->fl4_src) {
+ 		err = -EINVAL;
+-		if (MULTICAST(oldkey->src) ||
+-		    BADCLASS(oldkey->src) ||
+-		    ZERONET(oldkey->src))
++		if (MULTICAST(oldflp->fl4_src) ||
++		    BADCLASS(oldflp->fl4_src) ||
++		    ZERONET(oldflp->fl4_src))
+ 			goto out;
+ 
+ 		/* It is equivalent to inet_addr_type(saddr) == RTN_LOCAL */
+-		dev_out = ip_dev_find(oldkey->src);
++		dev_out = ip_dev_find(oldflp->fl4_src);
+ 		if (dev_out == NULL)
+ 			goto out;
+ 
+@@ -1821,8 +1836,8 @@
+ 		      of another iface. --ANK
+ 		 */
+ 
+-		if (oldkey->oif == 0
+-		    && (MULTICAST(oldkey->dst) || oldkey->dst == 0xFFFFFFFF)) {
++		if (oldflp->oif == 0
++		    && (MULTICAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF)) {
+ 			/* Special hack: user can direct multicasts
+ 			   and limited broadcast via necessary interface
+ 			   without fiddling with IP_MULTICAST_IF or IP_PKTINFO.
+@@ -1838,15 +1853,15 @@
+ 			   Luckily, this hack is good workaround.
+ 			 */
+ 
+-			key.oif = dev_out->ifindex;
++			fl.oif = dev_out->ifindex;
+ 			goto make_route;
+ 		}
+ 		if (dev_out)
+ 			dev_put(dev_out);
+ 		dev_out = NULL;
+ 	}
+-	if (oldkey->oif) {
+-		dev_out = dev_get_by_index(oldkey->oif);
++	if (oldflp->oif) {
++		dev_out = dev_get_by_index(oldflp->oif);
+ 		err = -ENODEV;
+ 		if (dev_out == NULL)
+ 			goto out;
+@@ -1855,39 +1870,39 @@
+ 			goto out;	/* Wrong error code */
+ 		}
+ 
+-		if (LOCAL_MCAST(oldkey->dst) || oldkey->dst == 0xFFFFFFFF) {
+-			if (!key.src)
+-				key.src = inet_select_addr(dev_out, 0,
+-								RT_SCOPE_LINK);
++		if (LOCAL_MCAST(oldflp->fl4_dst) || oldflp->fl4_dst == 0xFFFFFFFF) {
++			if (!fl.fl4_src)
++				fl.fl4_src = inet_select_addr(dev_out, 0,
++							      RT_SCOPE_LINK);
+ 			goto make_route;
+ 		}
+-		if (!key.src) {
+-			if (MULTICAST(oldkey->dst))
+-				key.src = inet_select_addr(dev_out, 0,
+-								key.scope);
+-			else if (!oldkey->dst)
+-				key.src = inet_select_addr(dev_out, 0,
+-								RT_SCOPE_HOST);
++		if (!fl.fl4_src) {
++			if (MULTICAST(oldflp->fl4_dst))
++				fl.fl4_src = inet_select_addr(dev_out, 0,
++							      fl.fl4_scope);
++			else if (!oldflp->fl4_dst)
++				fl.fl4_src = inet_select_addr(dev_out, 0,
++							      RT_SCOPE_HOST);
+ 		}
+ 	}
+ 
+-	if (!key.dst) {
+-		key.dst = key.src;
+-		if (!key.dst)
+-			key.dst = key.src = htonl(INADDR_LOOPBACK);
++	if (!fl.fl4_dst) {
++		fl.fl4_dst = fl.fl4_src;
++		if (!fl.fl4_dst)
++			fl.fl4_dst = fl.fl4_src = htonl(INADDR_LOOPBACK);
+ 		if (dev_out)
+ 			dev_put(dev_out);
+ 		dev_out = &loopback_dev;
+ 		dev_hold(dev_out);
+-		key.oif = loopback_dev.ifindex;
++		fl.oif = loopback_dev.ifindex;
+ 		res.type = RTN_LOCAL;
+ 		flags |= RTCF_LOCAL;
+ 		goto make_route;
+ 	}
+ 
+-	if (fib_lookup(&key, &res)) {
++	if (fib_lookup(&fl, &res)) {
+ 		res.fi = NULL;
+-		if (oldkey->oif) {
++		if (oldflp->oif) {
+ 			/* Apparently, routing tables are wrong. Assume,
+ 			   that the destination is on link.
+ 
+@@ -1906,9 +1921,9 @@
+ 			   likely IPv6, but we do not.
+ 			 */
+ 
+-			if (key.src == 0)
+-				key.src = inet_select_addr(dev_out, 0,
+-							   RT_SCOPE_LINK);
++			if (fl.fl4_src == 0)
++				fl.fl4_src = inet_select_addr(dev_out, 0,
++							      RT_SCOPE_LINK);
+ 			res.type = RTN_UNICAST;
+ 			goto make_route;
+ 		}
+@@ -1923,13 +1938,13 @@
+ 		goto e_inval;
+ 
+ 	if (res.type == RTN_LOCAL) {
+-		if (!key.src)
+-			key.src = key.dst;
++		if (!fl.fl4_src)
++			fl.fl4_src = fl.fl4_dst;
+ 		if (dev_out)
+ 			dev_put(dev_out);
+ 		dev_out = &loopback_dev;
+ 		dev_hold(dev_out);
+-		key.oif = dev_out->ifindex;
++		fl.oif = dev_out->ifindex;
+ 		if (res.fi)
+ 			fib_info_put(res.fi);
+ 		res.fi = NULL;
+@@ -1938,36 +1953,40 @@
+ 	}
+ 
+ #ifdef CONFIG_IP_ROUTE_MULTIPATH
+-	if (res.fi->fib_nhs > 1 && key.oif == 0)
+-		fib_select_multipath(&key, &res);
++	if (res.fi->fib_nhs > 1 && fl.oif == 0)
++		fib_select_multipath(&fl, &res);
+ 	else
+ #endif
+-	if (!res.prefixlen && res.type == RTN_UNICAST && !key.oif)
+-		fib_select_default(&key, &res);
++	if (!res.prefixlen && res.type == RTN_UNICAST && !fl.oif)
++		fib_select_default(&fl, &res);
+ 
+-	if (!key.src)
+-		key.src = FIB_RES_PREFSRC(res);
++	if (!fl.fl4_src)
++		fl.fl4_src = FIB_RES_PREFSRC(res);
+ 
+ 	if (dev_out)
+ 		dev_put(dev_out);
+ 	dev_out = FIB_RES_DEV(res);
+ 	dev_hold(dev_out);
+-	key.oif = dev_out->ifindex;
++	fl.oif = dev_out->ifindex;
+ 
+ make_route:
+-	if (LOOPBACK(key.src) && !(dev_out->flags&IFF_LOOPBACK))
++	if (LOOPBACK(fl.fl4_src) && !(dev_out->flags&IFF_LOOPBACK))
+ 		goto e_inval;
+ 
+-	if (key.dst == 0xFFFFFFFF)
++	if (fl.fl4_dst == 0xFFFFFFFF)
+ 		res.type = RTN_BROADCAST;
+-	else if (MULTICAST(key.dst))
++	else if (MULTICAST(fl.fl4_dst))
+ 		res.type = RTN_MULTICAST;
+-	else if (BADCLASS(key.dst) || ZERONET(key.dst))
++	else if (BADCLASS(fl.fl4_dst) || ZERONET(fl.fl4_dst))
+ 		goto e_inval;
+ 
+ 	if (dev_out->flags & IFF_LOOPBACK)
+ 		flags |= RTCF_LOCAL;
+ 
++	in_dev = in_dev_get(dev_out);
++	if (!in_dev)
++		goto e_inval;
++
+ 	if (res.type == RTN_BROADCAST) {
+ 		flags |= RTCF_BROADCAST | RTCF_LOCAL;
+ 		if (res.fi) {
+@@ -1976,11 +1995,8 @@
+ 		}
+ 	} else if (res.type == RTN_MULTICAST) {
+ 		flags |= RTCF_MULTICAST|RTCF_LOCAL;
+-		read_lock(&inetdev_lock);
+-		if (!__in_dev_get(dev_out) ||
+-		    !ip_check_mc(__in_dev_get(dev_out),oldkey->dst,oldkey->src))
++		if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src))
+ 			flags &= ~RTCF_LOCAL;
+-		read_unlock(&inetdev_lock);
+ 		/* If multicast route do not exist use
+ 		   default one, but do not gateway in this case.
+ 		   Yes, it is hack.
+@@ -1997,25 +2013,28 @@
+ 
+ 	atomic_set(&rth->u.dst.__refcnt, 1);
+ 	rth->u.dst.flags= DST_HOST;
+-	rth->key.dst	= oldkey->dst;
+-	rth->key.tos	= tos;
+-	rth->key.src	= oldkey->src;
+-	rth->key.iif	= 0;
+-	rth->key.oif	= oldkey->oif;
++	if (in_dev->cnf.no_xfrm)
++		rth->u.dst.flags |= DST_NOXFRM;
++	if (in_dev->cnf.no_policy)
++		rth->u.dst.flags |= DST_NOPOLICY;
++	rth->fl.fl4_dst	= oldflp->fl4_dst;
++	rth->fl.fl4_tos	= tos;
++	rth->fl.fl4_src	= oldflp->fl4_src;
++	rth->fl.oif	= oldflp->oif;
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-	rth->key.fwmark	= oldkey->fwmark;
++	rth->fl.fl4_fwmark= oldflp->fl4_fwmark;
+ #endif
+-	rth->rt_dst	= key.dst;
+-	rth->rt_src	= key.src;
++	rth->rt_dst	= fl.fl4_dst;
++	rth->rt_src	= fl.fl4_src;
+ #ifdef CONFIG_IP_ROUTE_NAT
+-	rth->rt_dst_map	= key.dst;
+-	rth->rt_src_map	= key.src;
++	rth->rt_dst_map	= fl.fl4_dst;
++	rth->rt_src_map	= fl.fl4_src;
+ #endif
+-	rth->rt_iif	= oldkey->oif ? : dev_out->ifindex;
++	rth->rt_iif	= oldflp->oif ? : dev_out->ifindex;
+ 	rth->u.dst.dev	= dev_out;
+ 	dev_hold(dev_out);
+-	rth->rt_gateway = key.dst;
+-	rth->rt_spec_dst= key.src;
++	rth->rt_gateway = fl.fl4_dst;
++	rth->rt_spec_dst= fl.fl4_src;
+ 
+ 	rth->u.dst.output=ip_output;
+ 
+@@ -2023,40 +2042,39 @@
+ 
+ 	if (flags & RTCF_LOCAL) {
+ 		rth->u.dst.input = ip_local_deliver;
+-		rth->rt_spec_dst = key.dst;
++		rth->rt_spec_dst = fl.fl4_dst;
+ 	}
+ 	if (flags & (RTCF_BROADCAST | RTCF_MULTICAST)) {
+-		rth->rt_spec_dst = key.src;
++		rth->rt_spec_dst = fl.fl4_src;
+ 		if (flags & RTCF_LOCAL && !(dev_out->flags & IFF_LOOPBACK)) {
+ 			rth->u.dst.output = ip_mc_output;
+ 			rt_cache_stat[smp_processor_id()].out_slow_mc++;
+ 		}
+ #ifdef CONFIG_IP_MROUTE
+ 		if (res.type == RTN_MULTICAST) {
+-			struct in_device *in_dev = in_dev_get(dev_out);
+-			if (in_dev) {
+-				if (IN_DEV_MFORWARD(in_dev) &&
+-				    !LOCAL_MCAST(oldkey->dst)) {
+-					rth->u.dst.input = ip_mr_input;
+-					rth->u.dst.output = ip_mc_output;
+-				}
+-				in_dev_put(in_dev);
++			if (IN_DEV_MFORWARD(in_dev) &&
++			    !LOCAL_MCAST(oldflp->fl4_dst)) {
++				rth->u.dst.input = ip_mr_input;
++				rth->u.dst.output = ip_mc_output;
+ 			}
+ 		}
+ #endif
+ 	}
+ 
+ 	rt_set_nexthop(rth, &res, 0);
++	
+ 
+ 	rth->rt_flags = flags;
+ 
+-	hash = rt_hash_code(oldkey->dst, oldkey->src ^ (oldkey->oif << 5), tos);
++	hash = rt_hash_code(oldflp->fl4_dst, oldflp->fl4_src ^ (oldflp->oif << 5), tos);
+ 	err = rt_intern_hash(hash, rth, rp);
+ done:
+ 	if (free_res)
+ 		fib_res_put(&res);
+ 	if (dev_out)
+ 		dev_put(dev_out);
++	if (in_dev)
++		in_dev_put(in_dev);
+ out:	return err;
+ 
+ e_inval:
+@@ -2067,23 +2085,23 @@
+ 	goto done;
+ }
+ 
+-int ip_route_output_key(struct rtable **rp, const struct rt_key *key)
++int __ip_route_output_key(struct rtable **rp, const struct flowi *flp)
+ {
+ 	unsigned hash;
+ 	struct rtable *rth;
+ 
+-	hash = rt_hash_code(key->dst, key->src ^ (key->oif << 5), key->tos);
++	hash = rt_hash_code(flp->fl4_dst, flp->fl4_src ^ (flp->oif << 5), flp->fl4_tos);
+ 
+ 	read_lock_bh(&rt_hash_table[hash].lock);
+ 	for (rth = rt_hash_table[hash].chain; rth; rth = rth->u.rt_next) {
+-		if (rth->key.dst == key->dst &&
+-		    rth->key.src == key->src &&
+-		    rth->key.iif == 0 &&
+-		    rth->key.oif == key->oif &&
++		if (rth->fl.fl4_dst == flp->fl4_dst &&
++		    rth->fl.fl4_src == flp->fl4_src &&
++		    rth->fl.iif == 0 &&
++		    rth->fl.oif == flp->oif &&
+ #ifdef CONFIG_IP_ROUTE_FWMARK
+-		    rth->key.fwmark == key->fwmark &&
++		    rth->fl.fl4_fwmark == flp->fl4_fwmark &&
+ #endif
+-		    !((rth->key.tos ^ key->tos) &
++		    !((rth->fl.fl4_tos ^ flp->fl4_tos) &
+ 			    (IPTOS_RT_MASK | RTO_ONLINK))) {
+ 			rth->u.dst.lastuse = jiffies;
+ 			dst_hold(&rth->u.dst);
+@@ -2097,8 +2115,31 @@
+ 	}
+ 	read_unlock_bh(&rt_hash_table[hash].lock);
+ 
+-	return ip_route_output_slow(rp, key);
+-}	
++	return ip_route_output_slow(rp, flp);
++}
++
++int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags)
++{
++	int err;
++
++	if ((err = __ip_route_output_key(rp, flp)) != 0)
++		return err;
++
++	if (flp->proto) {
++		if (!flp->fl4_src)
++			flp->fl4_src = (*rp)->rt_src;
++		if (!flp->fl4_dst)
++			flp->fl4_dst = (*rp)->rt_dst;
++		return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
++	}
++
++	return 0;
++}
++
++int ip_route_output_key(struct rtable **rp, struct flowi *flp)
++{
++	return ip_route_output_flow(rp, flp, NULL, 0);
++}
+ 
+ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event,
+ 			int nowait)
+@@ -2117,7 +2158,7 @@
+ 	r->rtm_family	 = AF_INET;
+ 	r->rtm_dst_len	= 32;
+ 	r->rtm_src_len	= 0;
+-	r->rtm_tos	= rt->key.tos;
++	r->rtm_tos	= rt->fl.fl4_tos;
+ 	r->rtm_table	= RT_TABLE_MAIN;
+ 	r->rtm_type	= rt->rt_type;
+ 	r->rtm_scope	= RT_SCOPE_UNIVERSE;
+@@ -2126,9 +2167,9 @@
+ 	if (rt->rt_flags & RTCF_NOTIFY)
+ 		r->rtm_flags |= RTM_F_NOTIFY;
+ 	RTA_PUT(skb, RTA_DST, 4, &rt->rt_dst);
+-	if (rt->key.src) {
++	if (rt->fl.fl4_src) {
+ 		r->rtm_src_len = 32;
+-		RTA_PUT(skb, RTA_SRC, 4, &rt->key.src);
++		RTA_PUT(skb, RTA_SRC, 4, &rt->fl.fl4_src);
+ 	}
+ 	if (rt->u.dst.dev)
+ 		RTA_PUT(skb, RTA_OIF, sizeof(int), &rt->u.dst.dev->ifindex);
+@@ -2136,13 +2177,13 @@
+ 	if (rt->u.dst.tclassid)
+ 		RTA_PUT(skb, RTA_FLOW, 4, &rt->u.dst.tclassid);
+ #endif
+-	if (rt->key.iif)
++	if (rt->fl.iif)
+ 		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_spec_dst);
+-	else if (rt->rt_src != rt->key.src)
++	else if (rt->rt_src != rt->fl.fl4_src)
+ 		RTA_PUT(skb, RTA_PREFSRC, 4, &rt->rt_src);
+ 	if (rt->rt_dst != rt->rt_gateway)
+ 		RTA_PUT(skb, RTA_GATEWAY, 4, &rt->rt_gateway);
+-	if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
++	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+ 		goto rtattr_failure;
+ 	ci.rta_lastuse	= jiffies - rt->u.dst.lastuse;
+ 	ci.rta_used	= rt->u.dst.__use;
+@@ -2164,7 +2205,7 @@
+ 	eptr = (struct rtattr*)skb->tail;
+ #endif
+ 	RTA_PUT(skb, RTA_CACHEINFO, sizeof(ci), &ci);
+-	if (rt->key.iif) {
++	if (rt->fl.iif) {
+ #ifdef CONFIG_IP_MROUTE
+ 		u32 dst = rt->rt_dst;
+ 
+@@ -2184,7 +2225,7 @@
+ 			}
+ 		} else
+ #endif
+-			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->key.iif);
++			RTA_PUT(skb, RTA_IIF, sizeof(int), &rt->fl.iif);
+ 	}
+ 
+ 	nlh->nlmsg_len = skb->tail - b;
+@@ -2238,10 +2279,14 @@
+ 		if (!err && rt->u.dst.error)
+ 			err = -rt->u.dst.error;
+ 	} else {
++		struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst,
++							 .saddr = src,
++							 .tos = rtm->rtm_tos } } };
+ 		int oif = 0;
+ 		if (rta[RTA_OIF - 1])
+ 			memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int));
+-		err = ip_route_output(&rt, dst, src, rtm->rtm_tos, oif);
++		fl.oif = oif;
++		err = ip_route_output_key(&rt, &fl);
+ 	}
+ 	if (err)
+ 		goto out_free;
+@@ -2630,5 +2675,9 @@
+ 				rt_cache_stat_get_info);
+ #ifdef CONFIG_NET_CLS_ROUTE
+ 	create_proc_read_entry("net/rt_acct", 0, 0, ip_rt_acct_read, NULL);
++#endif
++#ifdef CONFIG_XFRM
++	xfrm_init();
++	xfrm4_init();
+ #endif
+ }
+diff -Nru a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c
+--- a/net/ipv4/syncookies.c	2005-02-13 21:25:08 +11:00
++++ b/net/ipv4/syncookies.c	2005-02-13 21:25:08 +11:00
+@@ -169,18 +169,25 @@
+ 	 * hasn't changed since we received the original syn, but I see
+ 	 * no easy way to do this. 
+ 	 */
+-	if (ip_route_output(&rt,
+-			    opt && 
+-			    opt->srr ? opt->faddr : req->af.v4_req.rmt_addr,
+-			    req->af.v4_req.loc_addr,
+-			    RT_CONN_FLAGS(sk),
+-			    0)) { 
+-		tcp_openreq_free(req);
+-		goto out; 
++	{
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = ((opt && opt->srr) ?
++							  opt->faddr :
++							  req->af.v4_req.rmt_addr),
++						.saddr = req->af.v4_req.loc_addr,
++						.tos = RT_CONN_FLAGS(sk) } },
++				    .proto = IPPROTO_TCP,
++				    .uli_u = { .ports =
++					       { .sport = skb->h.th->dest,
++						 .dport = skb->h.th->source } } };
++		if (ip_route_output_key(&rt, &fl)) {
++			tcp_openreq_free(req);
++			goto out; 
++		}
+ 	}
+ 
+ 	/* Try to redo what tcp_v4_send_synack did. */
+-	req->window_clamp = rt->u.dst.window;  
++	req->window_clamp = dst_metric(&rt->u.dst, RTAX_WINDOW);
+ 	tcp_select_initial_window(tcp_full_space(sk), req->mss,
+ 				  &req->rcv_wnd, &req->window_clamp, 
+ 				  0, &rcv_wscale);
+diff -Nru a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
+--- a/net/ipv4/sysctl_net_ipv4.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/sysctl_net_ipv4.c	2005-02-13 21:25:09 +11:00
+@@ -82,14 +82,39 @@
+ 			 void *newval, size_t newlen, 
+ 			 void **context)
+ {
++	int *valp = table->data;
+ 	int new;
++
++	if (!newval || !newlen)
++		return 0;
++
+ 	if (newlen != sizeof(int))
+ 		return -EINVAL;
+-	if (get_user(new,(int *)newval))
+-		return -EFAULT; 
+-	if (new != ipv4_devconf.forwarding) 
+-		inet_forward_change(new); 
+-	return 0; /* caller does change again and handles handles oldval */ 
++
++	if (get_user(new, (int *)newval))
++		return -EFAULT;
++
++	if (new == *valp)
++		return 0;
++
++	if (oldval && oldlenp) {
++		size_t len;
++
++		if (get_user(len, oldlenp))
++			return -EFAULT;
++
++		if (len) {
++			if (len > table->maxlen)
++				len = table->maxlen;
++			if (copy_to_user(oldval, valp, len))
++				return -EFAULT;
++			if (put_user(len, oldlenp))
++				return -EFAULT;
++		}
++	}
++
++	inet_forward_change(new);
++	return 1;
+ }
+ 
+ ctl_table ipv4_table[] = {
+@@ -110,7 +135,7 @@
+          &ipv4_sysctl_forward,&ipv4_sysctl_forward_strategy},
+         {NET_IPV4_DEFAULT_TTL, "ip_default_ttl",
+          &sysctl_ip_default_ttl, sizeof(int), 0644, NULL,
+-         &proc_dointvec},
++         &ipv4_doint_and_flush, &ipv4_doint_and_flush_strategy},
+         {NET_IPV4_AUTOCONFIG, "ip_autoconfig",
+          &ipv4_config.autoconfig, sizeof(int), 0644, NULL,
+          &proc_dointvec},
+diff -Nru a/net/ipv4/tcp.c b/net/ipv4/tcp.c
+--- a/net/ipv4/tcp.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/tcp.c	2005-02-13 21:25:09 +11:00
+@@ -204,6 +204,8 @@
+  *		Andi Kleen 	:	Make poll agree with SIGIO
+  *	Salvatore Sanfilippo	:	Support SO_LINGER with linger == 1 and
+  *					lingertime == 0 (RFC 793 ABORT Call)
++ *	Hirokazu Takahashi	:	Use copy_from_user() instead of
++ *					csum_and_copy_from_user() if possible.
+  *					
+  *		This program is free software; you can redistribute it and/or
+  *		modify it under the terms of the GNU General Public License
+@@ -256,6 +258,7 @@
+ 
+ #include <net/icmp.h>
+ #include <net/tcp.h>
++#include <net/xfrm.h>
+ 
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+@@ -955,8 +958,8 @@
+ 	return res;
+ }
+ 
+-#define TCP_PAGE(sk)	(sk->tp_pinfo.af_tcp.sndmsg_page)
+-#define TCP_OFF(sk)	(sk->tp_pinfo.af_tcp.sndmsg_off)
++#define TCP_PAGE(sk)	(inet_sk(sk)->sndmsg_page)
++#define TCP_OFF(sk)	(inet_sk(sk)->sndmsg_off)
+ 
+ static inline int
+ tcp_copy_to_page(struct sock *sk, char *from, struct sk_buff *skb,
+@@ -965,18 +968,22 @@
+ 	int err = 0;
+ 	unsigned int csum;
+ 
+-	csum = csum_and_copy_from_user(from, page_address(page)+off,
++	if (skb->ip_summed == CHECKSUM_NONE) {
++		csum = csum_and_copy_from_user(from, page_address(page) + off,
+ 				       copy, 0, &err);
+-	if (!err) {
+-		if (skb->ip_summed == CHECKSUM_NONE)
+-			skb->csum = csum_block_add(skb->csum, csum, skb->len);
+-		skb->len += copy;
+-		skb->data_len += copy;
+-		skb->truesize += copy;
+-		sk->wmem_queued += copy;
+-		sk->forward_alloc -= copy;
++		if (err) return err;
++		skb->csum = csum_block_add(skb->csum, csum, skb->len);
++	} else {
++		if (copy_from_user(page_address(page) + off, from, copy))
++			return -EFAULT;
+ 	}
+-	return err;
++
++	skb->len += copy;
++	skb->data_len += copy;
++	skb->truesize += copy;
++	sk->wmem_queued += copy;
++	sk->forward_alloc -= copy;
++	return 0;
+ }
+ 
+ static inline int
+@@ -986,11 +993,16 @@
+ 	unsigned int csum;
+ 	int off = skb->len;
+ 
+-	csum = csum_and_copy_from_user(from, skb_put(skb, copy),
++	if (skb->ip_summed == CHECKSUM_NONE) {
++		csum = csum_and_copy_from_user(from, skb_put(skb, copy),
+ 				       copy, 0, &err);
+-	if (!err) {
+-		skb->csum = csum_block_add(skb->csum, csum, off);
+-		return 0;
++		if (!err) {
++			skb->csum = csum_block_add(skb->csum, csum, off);
++			return 0;
++		}
++	} else {
++		if (!copy_from_user(skb_put(skb, copy), from, copy))
++			return 0;
+ 	}
+ 
+ 	__skb_trim(skb, off);
+@@ -1072,6 +1084,12 @@
+ 				if (skb == NULL)
+ 					goto wait_for_memory;
+ 
++				/*
++				 * Check whether we can use HW checksum.
++				 */
++				if (sk->route_caps & (NETIF_F_IP_CSUM|NETIF_F_NO_CSUM|NETIF_F_HW_CSUM))
++					skb->ip_summed = CHECKSUM_HW;
++
+ 				skb_entail(sk, tp, skb);
+ 				copy = mss_now;
+ 			}
+@@ -1896,6 +1914,8 @@
+ 	sk->prot->destroy(sk);
+ 
+ 	tcp_kill_sk_queues(sk);
++
++	xfrm_sk_free_policy(sk);
+ 
+ #ifdef INET_REFCNT_DEBUG
+ 	if (atomic_read(&sk->refcnt) != 1) {
+diff -Nru a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
+--- a/net/ipv4/tcp_input.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/tcp_input.c	2005-02-13 21:25:09 +11:00
+@@ -727,25 +727,25 @@
+ 			 * Probably, no packets returned in time.
+ 			 * Reset our results.
+ 			 */
+-			if (!(dst->mxlock&(1<<RTAX_RTT)))
+-				dst->rtt = 0;
++			if (!(dst_metric_locked(dst, RTAX_RTT)))
++				dst->metrics[RTAX_RTT-1] = 0;
+ 			return;
+ 		}
+ 
+-		m = dst->rtt - tp->srtt;
++		m = dst_metric(dst, RTAX_RTT) - tp->srtt;
+ 
+ 		/* If newly calculated rtt larger than stored one,
+ 		 * store new one. Otherwise, use EWMA. Remember,
+ 		 * rtt overestimation is always better than underestimation.
+ 		 */
+-		if (!(dst->mxlock&(1<<RTAX_RTT))) {
++		if (!(dst_metric_locked(dst, RTAX_RTT))) {
+ 			if (m <= 0)
+-				dst->rtt = tp->srtt;
++				dst->metrics[RTAX_RTT-1] = tp->srtt;
+ 			else
+-				dst->rtt -= (m>>3);
++				dst->metrics[RTAX_RTT-1] -= (m>>3);
+ 		}
+ 
+-		if (!(dst->mxlock&(1<<RTAX_RTTVAR))) {
++		if (!(dst_metric_locked(dst, RTAX_RTTVAR))) {
+ 			if (m < 0)
+ 				m = -m;
+ 
+@@ -754,67 +754,61 @@
+ 			if (m < tp->mdev)
+ 				m = tp->mdev;
+ 
+-			if (m >= dst->rttvar)
+-				dst->rttvar = m;
++			if (m >= dst_metric(dst, RTAX_RTTVAR))
++				dst->metrics[RTAX_RTTVAR-1] = m;
+ 			else
+-				dst->rttvar -= (dst->rttvar - m)>>2;
++				dst->metrics[RTAX_RTTVAR-1] -=
++					(dst->metrics[RTAX_RTTVAR-1] - m)>>2;
+ 		}
+ 
+ 		if (tp->snd_ssthresh >= 0xFFFF) {
+ 			/* Slow start still did not finish. */
+-			if (dst->ssthresh &&
+-			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
+-			    (tp->snd_cwnd>>1) > dst->ssthresh)
+-				dst->ssthresh = (tp->snd_cwnd>>1);
+-			if (!(dst->mxlock&(1<<RTAX_CWND)) &&
+-			    tp->snd_cwnd > dst->cwnd)
+-				dst->cwnd = tp->snd_cwnd;
++			if (dst_metric(dst, RTAX_SSTHRESH) &&
++			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
++			    (tp->snd_cwnd >> 1) > dst_metric(dst, RTAX_SSTHRESH))
++				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_cwnd >> 1;
++			if (!dst_metric_locked(dst, RTAX_CWND) &&
++			    tp->snd_cwnd > dst_metric(dst, RTAX_CWND))
++				dst->metrics[RTAX_CWND-1] = tp->snd_cwnd;
+ 		} else if (tp->snd_cwnd > tp->snd_ssthresh &&
+ 			   tp->ca_state == TCP_CA_Open) {
+ 			/* Cong. avoidance phase, cwnd is reliable. */
+-			if (!(dst->mxlock&(1<<RTAX_SSTHRESH)))
+-				dst->ssthresh = max(tp->snd_cwnd>>1, tp->snd_ssthresh);
+-			if (!(dst->mxlock&(1<<RTAX_CWND)))
+-				dst->cwnd = (dst->cwnd + tp->snd_cwnd)>>1;
++			if (!dst_metric_locked(dst, RTAX_SSTHRESH))
++				dst->metrics[RTAX_SSTHRESH-1] =
++					max(tp->snd_cwnd >> 1, tp->snd_ssthresh);
++			if (!dst_metric_locked(dst, RTAX_CWND))
++				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_cwnd) >> 1;
+ 		} else {
+ 			/* Else slow start did not finish, cwnd is non-sense,
+ 			   ssthresh may be also invalid.
+ 			 */
+-			if (!(dst->mxlock&(1<<RTAX_CWND)))
+-				dst->cwnd = (dst->cwnd + tp->snd_ssthresh)>>1;
+-			if (dst->ssthresh &&
+-			    !(dst->mxlock&(1<<RTAX_SSTHRESH)) &&
+-			    tp->snd_ssthresh > dst->ssthresh)
+-				dst->ssthresh = tp->snd_ssthresh;
++			if (!dst_metric_locked(dst, RTAX_CWND))
++				dst->metrics[RTAX_CWND-1] = (dst->metrics[RTAX_CWND-1] + tp->snd_ssthresh) >> 1;
++			if (dst->metrics[RTAX_SSTHRESH-1] &&
++			    !dst_metric_locked(dst, RTAX_SSTHRESH) &&
++			    tp->snd_ssthresh > dst->metrics[RTAX_SSTHRESH-1])
++				dst->metrics[RTAX_SSTHRESH-1] = tp->snd_ssthresh;
+ 		}
+ 
+-		if (!(dst->mxlock&(1<<RTAX_REORDERING))) {
+-			if (dst->reordering < tp->reordering &&
++		if (!dst_metric_locked(dst, RTAX_REORDERING)) {
++			if (dst->metrics[RTAX_REORDERING-1] < tp->reordering &&
+ 			    tp->reordering != sysctl_tcp_reordering)
+-				dst->reordering = tp->reordering;
++				dst->metrics[RTAX_REORDERING-1] = tp->reordering;
+ 		}
+ 	}
+ }
+ 
+-/* Increase initial CWND conservatively: if estimated
+- * RTT is low enough (<20msec) or if we have some preset ssthresh.
+- *
+- * Numbers are taken from RFC2414.
+- */
+-__u32 tcp_init_cwnd(struct tcp_opt *tp)
++/* Numbers are taken from RFC2414.  */
++__u32 tcp_init_cwnd(struct tcp_opt *tp, struct dst_entry *dst)
+ {
+-	__u32 cwnd;
+-
+-	if (tp->mss_cache > 1460)
+-		return 2;
+-
+-	cwnd = (tp->mss_cache > 1095) ? 3 : 4;
+-
+-	if (!tp->srtt || (tp->snd_ssthresh >= 0xFFFF && tp->srtt > ((HZ/50)<<3)))
+-		cwnd = 2;
+-	else if (cwnd > tp->snd_ssthresh)
+-		cwnd = tp->snd_ssthresh;
++	__u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0);
+ 
++	if (!cwnd) {
++		if (tp->mss_cache > 1460)
++			cwnd = 2;
++		else
++			cwnd = (tp->mss_cache > 1095) ? 3 : 4;
++	}
+ 	return min_t(__u32, cwnd, tp->snd_cwnd_clamp);
+ }
+ 
+@@ -830,22 +824,23 @@
+ 
+ 	dst_confirm(dst);
+ 
+-	if (dst->mxlock&(1<<RTAX_CWND))
+-		tp->snd_cwnd_clamp = dst->cwnd;
+-	if (dst->ssthresh) {
+-		tp->snd_ssthresh = dst->ssthresh;
++	if (dst_metric_locked(dst, RTAX_CWND))
++		tp->snd_cwnd_clamp = dst_metric(dst, RTAX_CWND);
++	if (dst_metric(dst, RTAX_SSTHRESH)) {
++		tp->snd_ssthresh = dst_metric(dst, RTAX_SSTHRESH);
+ 		if (tp->snd_ssthresh > tp->snd_cwnd_clamp)
+ 			tp->snd_ssthresh = tp->snd_cwnd_clamp;
+ 	}
+-	if (dst->reordering && tp->reordering != dst->reordering) {
++	if (dst_metric(dst, RTAX_REORDERING) &&
++	    tp->reordering != dst_metric(dst, RTAX_REORDERING)) {
+ 		tp->sack_ok &= ~2;
+-		tp->reordering = dst->reordering;
++		tp->reordering = dst_metric(dst, RTAX_REORDERING);
+ 	}
+ 
+-	if (dst->rtt == 0)
++	if (dst_metric(dst, RTAX_RTT) == 0)
+ 		goto reset;
+ 
+-	if (!tp->srtt && dst->rtt < (TCP_TIMEOUT_INIT<<3))
++	if (!tp->srtt && dst_metric(dst, RTAX_RTT) < (TCP_TIMEOUT_INIT << 3))
+ 		goto reset;
+ 
+ 	/* Initial rtt is determined from SYN,SYN-ACK.
+@@ -862,19 +857,19 @@
+ 	 * to low value, and then abruptly stops to do it and starts to delay
+ 	 * ACKs, wait for troubles.
+ 	 */
+-	if (dst->rtt > tp->srtt) {
+-		tp->srtt = dst->rtt;
++	if (dst_metric(dst, RTAX_RTT) > tp->srtt) {
++		tp->srtt = dst_metric(dst, RTAX_RTT);
+ 		tp->rtt_seq = tp->snd_nxt;
+ 	}
+-	if (dst->rttvar > tp->mdev) {
+-		tp->mdev = dst->rttvar;
++	if (dst_metric(dst, RTAX_RTTVAR) > tp->mdev) {
++		tp->mdev = dst_metric(dst, RTAX_RTTVAR);
+ 		tp->mdev_max = tp->rttvar = max(tp->mdev, TCP_RTO_MIN);
+ 	}
+ 	tcp_set_rto(tp);
+ 	tcp_bound_rto(tp);
+ 	if (tp->rto < TCP_TIMEOUT_INIT && !tp->saw_tstamp)
+ 		goto reset;
+-	tp->snd_cwnd = tcp_init_cwnd(tp);
++	tp->snd_cwnd = tcp_init_cwnd(tp, dst);
+ 	tp->snd_cwnd_stamp = tcp_time_stamp;
+ 	return;
+ 
+@@ -4430,7 +4425,24 @@
+ 
+ 		tcp_sync_mss(sk, tp->pmtu_cookie);
+ 		tcp_initialize_rcv_mss(sk);
++
++		/* Remember, tcp_poll() does not lock socket!
++		 * Change state from SYN-SENT only after copied_seq
++		 * is initialized. */
++		tp->copied_seq = tp->rcv_nxt;
++		mb();
++		tcp_set_state(sk, TCP_ESTABLISHED);
++
++		/* Make sure socket is routed, for correct metrics.  */
++		tp->af_specific->rebuild_header(sk);
++
+ 		tcp_init_metrics(sk);
++
++		/* Prevent spurious tcp_cwnd_restart() on first data
++		 * packet.
++		 */
++		tp->lsndtime = tcp_time_stamp;
++
+ 		tcp_init_buffer_space(sk);
+ 
+ 		if (sk->keepopen)
+@@ -4441,13 +4453,6 @@
+ 		else
+ 			tp->pred_flags = 0;
+ 
+-		/* Remember, tcp_poll() does not lock socket!
+-		 * Change state from SYN-SENT only after copied_seq
+-		 * is initialized. */
+-		tp->copied_seq = tp->rcv_nxt;
+-		mb();
+-		tcp_set_state(sk, TCP_ESTABLISHED);
+-
+ 		if(!sk->dead) {
+ 			sk->state_change(sk);
+ 			sk_wake_async(sk, 0, POLL_OUT);
+@@ -4695,7 +4700,18 @@
+ 				if (tp->tstamp_ok)
+ 					tp->advmss -= TCPOLEN_TSTAMP_ALIGNED;
+ 
++				/* Make sure socket is routed, for
++				 * correct metrics.
++				 */
++				tp->af_specific->rebuild_header(sk);
++
+ 				tcp_init_metrics(sk);
++
++				/* Prevent spurious tcp_cwnd_restart() on
++				 * first data packet.
++				 */
++				tp->lsndtime = tcp_time_stamp;
++
+ 				tcp_initialize_rcv_mss(sk);
+ 				tcp_init_buffer_space(sk);
+ 				tcp_fast_path_on(tp);
+diff -Nru a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
+--- a/net/ipv4/tcp_ipv4.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/tcp_ipv4.c	2005-02-13 21:25:09 +11:00
+@@ -63,13 +63,12 @@
+ #include <net/tcp.h>
+ #include <net/ipv6.h>
+ #include <net/inet_common.h>
++#include <net/xfrm.h>
+ 
+ #include <linux/inet.h>
+ #include <linux/stddef.h>
+-#include <linux/ipsec.h>
+ 
+ extern int sysctl_ip_dynaddr;
+-extern int sysctl_ip_default_ttl;
+ int sysctl_tcp_tw_reuse = 0;
+ int sysctl_tcp_low_latency = 0;
+ 
+@@ -785,7 +784,9 @@
+ 	}
+ 
+ 	tmp = ip_route_connect(&rt, nexthop, sk->saddr,
+-			       RT_CONN_FLAGS(sk), sk->bound_dev_if);
++			       RT_CONN_FLAGS(sk), sk->bound_dev_if,
++			       IPPROTO_TCP,
++			       sk->sport, usin->sin_port, sk);
+ 	if (tmp < 0)
+ 		return tmp;
+ 
+@@ -794,9 +795,6 @@
+ 		return -ENETUNREACH;
+ 	}
+ 
+-	__sk_dst_set(sk, &rt->u.dst);
+-	sk->route_caps = rt->u.dst.dev->features;
+-
+ 	if (!sk->protinfo.af_inet.opt || !sk->protinfo.af_inet.opt->srr)
+ 		daddr = rt->rt_dst;
+ 
+@@ -846,6 +844,15 @@
+ 	if (err)
+ 		goto failure;
+ 
++	err = ip_route_newports(&rt, sk->sport, sk->dport, sk);
++	if (err)
++		goto failure;
++
++	/* OK, now commit destination to socket.  */
++	__sk_dst_set(sk, &rt->u.dst);
++	sk->route_caps = rt->u.dst.dev->features;
++	tp->ext2_header_len = rt->u.dst.header_len;
++
+ 	if (!tp->write_seq)
+ 		tp->write_seq = secure_tcp_sequence_number(sk->saddr, sk->daddr,
+ 							   sk->sport, usin->sin_port);
+@@ -853,14 +860,16 @@
+ 	sk->protinfo.af_inet.id = tp->write_seq^jiffies;
+ 
+ 	err = tcp_connect(sk);
++	rt = NULL;
+ 	if (err)
+ 		goto failure;
+ 
+ 	return 0;
+ 
+ failure:
++	/* This unhashes the socket and releases the local port, if necessary. */
+ 	tcp_set_state(sk, TCP_CLOSE);
+-	__sk_dst_reset(sk);
++	ip_rt_put(rt);
+ 	sk->route_caps = 0;
+ 	sk->dport = 0;
+ 	return err;
+@@ -922,7 +931,7 @@
+ /* 
+  * This routine does path mtu discovery as defined in RFC1191.
+  */
+-static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, unsigned mtu)
++static inline void do_pmtu_discovery(struct sock *sk, struct iphdr *ip, u32 mtu)
+ {
+ 	struct dst_entry *dst;
+ 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+@@ -943,17 +952,19 @@
+ 	if ((dst = __sk_dst_check(sk, 0)) == NULL)
+ 		return;
+ 
+-	ip_rt_update_pmtu(dst, mtu);
++	dst->ops->update_pmtu(dst, mtu);
+ 
+ 	/* Something is about to be wrong... Remember soft error
+ 	 * for the case, if this connection will not able to recover.
+ 	 */
+-	if (mtu < dst->pmtu && ip_dont_fragment(sk, dst))
++	if (mtu < dst_pmtu(dst) && ip_dont_fragment(sk, dst))
+ 		sk->err_soft = EMSGSIZE;
+ 
++	mtu = dst_pmtu(dst);
++
+ 	if (sk->protinfo.af_inet.pmtudisc != IP_PMTUDISC_DONT &&
+-	    tp->pmtu_cookie > dst->pmtu) {
+-		tcp_sync_mss(sk, dst->pmtu);
++	    tp->pmtu_cookie > mtu) {
++		tcp_sync_mss(sk, mtu);
+ 
+ 		/* Resend the TCP packet because it's  
+ 		 * clear that the old packet has been
+@@ -1187,10 +1198,8 @@
+ 				      sizeof(struct tcphdr),
+ 				      IPPROTO_TCP,
+ 				      0); 
+-	arg.n_iov = 1;
+ 	arg.csumoffset = offsetof(struct tcphdr, check) / 2; 
+ 
+-	tcp_socket->sk->protinfo.af_inet.ttl = sysctl_ip_default_ttl;
+ 	ip_send_reply(tcp_socket->sk, skb, &arg, sizeof rth);
+ 
+ 	TCP_INC_STATS_BH(TcpOutSegs);
+@@ -1215,7 +1224,6 @@
+ 
+ 	arg.iov[0].iov_base = (unsigned char *)&rep; 
+ 	arg.iov[0].iov_len  = sizeof(rep.th);
+-	arg.n_iov = 1;
+ 	if (ts) {
+ 		rep.tsopt[0] = htonl((TCPOPT_NOP << 24) |
+ 				     (TCPOPT_NOP << 16) |
+@@ -1266,14 +1274,20 @@
+ static struct dst_entry* tcp_v4_route_req(struct sock *sk, struct open_request *req)
+ {
+ 	struct rtable *rt;
+-	struct ip_options *opt;
++	struct ip_options *opt = req->af.v4_req.opt;
++	struct flowi fl = { .oif = sk->bound_dev_if,
++			    .nl_u = { .ip4_u =
++				      { .daddr = ((opt && opt->srr) ?
++						  opt->faddr :
++						  req->af.v4_req.rmt_addr),
++					.saddr = req->af.v4_req.loc_addr,
++					.tos = RT_CONN_FLAGS(sk) } },
++			    .proto = IPPROTO_TCP,
++			    .uli_u = { .ports =
++				       { .sport = sk->sport,
++					 .dport = req->rmt_port } } };
+ 
+-	opt = req->af.v4_req.opt;
+-	if(ip_route_output(&rt, ((opt && opt->srr) ?
+-				 opt->faddr :
+-				 req->af.v4_req.rmt_addr),
+-			   req->af.v4_req.loc_addr,
+-			   RT_CONN_FLAGS(sk), sk->bound_dev_if)) {
++	if (ip_route_output_flow(&rt, &fl, sk, 0)) {
+ 		IP_INC_STATS_BH(IpOutNoRoutes);
+ 		return NULL;
+ 	}
+@@ -1496,7 +1510,7 @@
+ 			 (sysctl_max_syn_backlog - tcp_synq_len(sk)
+ 			  < (sysctl_max_syn_backlog>>2)) &&
+ 			 (!peer || !peer->tcp_ts_stamp) &&
+-			 (!dst || !dst->rtt)) {
++			 (!dst || !dst_metric(dst, RTAX_RTT))) {
+ 			/* Without syncookies last quarter of
+ 			 * backlog is filled with destinations, proven to be alive.
+ 			 * It means that we continue to communicate
+@@ -1568,10 +1582,11 @@
+ 	newtp->ext_header_len = 0;
+ 	if (newsk->protinfo.af_inet.opt)
+ 		newtp->ext_header_len = newsk->protinfo.af_inet.opt->optlen;
++	newtp->ext2_header_len = dst->header_len;
+ 	newsk->protinfo.af_inet.id = newtp->write_seq^jiffies;
+ 
+-	tcp_sync_mss(newsk, dst->pmtu);
+-	newtp->advmss = dst->advmss;
++	tcp_sync_mss(newsk, dst_pmtu(dst));
++	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);;
+ 	tcp_initialize_rcv_mss(newsk);
+ 
+ 	__tcp_v4_hash(newsk, 0);
+@@ -1756,12 +1771,12 @@
+ 		goto no_tcp_socket;
+ 
+ process:
+-	if(!ipsec_sk_policy(sk,skb))
+-		goto discard_and_relse;
+-
+ 	if (sk->state == TCP_TIME_WAIT)
+ 		goto do_time_wait;
+ 
++	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
++		goto discard_and_relse;
++
+ 	if (sk_filter(sk, skb, 0))
+ 		goto discard_and_relse;
+ 
+@@ -1781,6 +1796,9 @@
+ 	return ret;
+ 
+ no_tcp_socket:
++	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
++		goto discard_it;
++
+ 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ bad_packet:
+ 		TCP_INC_STATS_BH(TcpInErrs);
+@@ -1798,6 +1816,9 @@
+ 	goto discard_it;
+ 
+ do_time_wait:
++	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
++		goto discard_and_relse;
++
+ 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ 		TCP_INC_STATS_BH(TcpInErrs);
+ 		tcp_tw_put((struct tcp_tw_bucket *) sk);
+@@ -1852,12 +1873,15 @@
+ 	/* Query new route. */
+ 	err = ip_route_connect(&rt, daddr, 0,
+ 			       RT_TOS(sk->protinfo.af_inet.tos)|sk->localroute,
+-			       sk->bound_dev_if);
++			       sk->bound_dev_if,
++			       IPPROTO_TCP,
++			       sk->sport, sk->dport, sk);
+ 	if (err)
+ 		return err;
+ 
+ 	__sk_dst_set(sk, &rt->u.dst);
+ 	sk->route_caps = rt->u.dst.dev->features;
++	tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
+ 
+ 	new_saddr = rt->rt_src;
+ 
+@@ -1900,11 +1924,23 @@
+ 	if(sk->protinfo.af_inet.opt && sk->protinfo.af_inet.opt->srr)
+ 		daddr = sk->protinfo.af_inet.opt->faddr;
+ 
+-	err = ip_route_output(&rt, daddr, sk->saddr,
+-			      RT_CONN_FLAGS(sk), sk->bound_dev_if);
++	{
++		struct flowi fl = { .oif = sk->bound_dev_if,
++				    .nl_u = { .ip4_u =
++					      { .daddr = daddr,
++						.saddr = sk->saddr,
++						.tos = RT_CONN_FLAGS(sk) } },
++				    .proto = IPPROTO_TCP,
++				    .uli_u = { .ports =
++					       { .sport = sk->sport,
++						 .dport = sk->dport } } };
++						
++		err = ip_route_output_flow(&rt, &fl, sk, 0);
++	}
+ 	if (!err) {
+ 		__sk_dst_set(sk, &rt->u.dst);
+ 		sk->route_caps = rt->u.dst.dev->features;
++		tcp_sk(sk)->ext2_header_len = rt->u.dst.header_len;
+ 		return 0;
+ 	}
+ 
+@@ -2066,8 +2102,8 @@
+ 		tcp_put_port(sk);
+ 
+ 	/* If sendmsg cached page exists, toss it. */
+-	if (tp->sndmsg_page != NULL)
+-		__free_page(tp->sndmsg_page);
++	if (inet_sk(sk)->sndmsg_page)
++		__free_page(inet_sk(sk)->sndmsg_page);
+ 
+ 	atomic_dec(&tcp_sockets_allocated);
+ 
+@@ -2325,7 +2361,7 @@
+ 	if ((err=ops->create(tcp_socket, IPPROTO_TCP))<0)
+ 		panic("Failed to create the TCP control socket.\n");
+ 	tcp_socket->sk->allocation=GFP_ATOMIC;
+-	tcp_socket->sk->protinfo.af_inet.ttl = MAXTTL;
++	tcp_socket->sk->protinfo.af_inet.uc_ttl = -1;
+ 
+ 	/* Unhash it so that IP input processing does not even
+ 	 * see it, we do not wish this socket to see incoming
+diff -Nru a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
+--- a/net/ipv4/tcp_minisocks.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/tcp_minisocks.c	2005-02-13 21:25:09 +11:00
+@@ -25,6 +25,7 @@
+ #include <linux/sysctl.h>
+ #include <net/tcp.h>
+ #include <net/inet_common.h>
++#include <net/xfrm.h>
+ 
+ #ifdef CONFIG_SYSCTL
+ #define SYNC_INIT 0 /* let the user enable it */
+@@ -683,6 +684,13 @@
+ 		if ((filter = newsk->filter) != NULL)
+ 			sk_filter_charge(newsk, filter);
+ #endif
++		if (unlikely(xfrm_sk_clone_policy(newsk))) {
++			/* It is still raw copy of parent, so invalidate
++			 * destructor and make plain sk_free() */
++			newsk->destruct = NULL;
++			sk_free(newsk);
++			return NULL;
++		}
+ 
+ 		/* Now setup tcp_opt */
+ 		newtp = &(newsk->tp_pinfo.af_tcp);
+diff -Nru a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
+--- a/net/ipv4/tcp_output.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/tcp_output.c	2005-02-13 21:25:09 +11:00
+@@ -89,8 +89,8 @@
+ 	struct dst_entry *dst = __sk_dst_get(sk);
+ 	int mss = tp->advmss;
+ 
+-	if (dst && dst->advmss < mss) {
+-		mss = dst->advmss;
++	if (dst && dst_metric(dst, RTAX_ADVMSS) < mss) {
++		mss = dst_metric(dst, RTAX_ADVMSS);
+ 		tp->advmss = mss;
+ 	}
+ 
+@@ -99,10 +99,10 @@
+ 
+ /* RFC2861. Reset CWND after idle period longer RTO to "restart window".
+  * This is the first part of cwnd validation mechanism. */
+-static void tcp_cwnd_restart(struct tcp_opt *tp)
++static void tcp_cwnd_restart(struct tcp_opt *tp, struct dst_entry *dst)
+ {
+ 	s32 delta = tcp_time_stamp - tp->lsndtime;
+-	u32 restart_cwnd = tcp_init_cwnd(tp);
++	u32 restart_cwnd = tcp_init_cwnd(tp, dst);
+ 	u32 cwnd = tp->snd_cwnd;
+ 
+ 	if (tcp_is_vegas(tp)) 
+@@ -118,12 +118,12 @@
+ 	tp->snd_cwnd_used = 0;
+ }
+ 
+-static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb)
++static __inline__ void tcp_event_data_sent(struct tcp_opt *tp, struct sk_buff *skb, struct sock *sk)
+ {
+ 	u32 now = tcp_time_stamp;
+ 
+ 	if (!tp->packets_out && (s32)(now - tp->lsndtime) > tp->rto)
+-		tcp_cwnd_restart(tp);
++		tcp_cwnd_restart(tp, __sk_dst_get(sk));
+ 
+ 	tp->lsndtime = now;
+ 
+@@ -287,7 +287,7 @@
+ 			tcp_event_ack_sent(sk);
+ 
+ 		if (skb->len != tcp_header_size)
+-			tcp_event_data_sent(tp, skb);
++			tcp_event_data_sent(tp, skb, sk);
+ 
+ 		TCP_INC_STATS(TcpOutSegs);
+ 
+@@ -518,13 +518,16 @@
+ 
+ int tcp_sync_mss(struct sock *sk, u32 pmtu)
+ {
+-	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
++	struct tcp_opt *tp = tcp_sk(sk);
++	struct dst_entry *dst = __sk_dst_get(sk);
+ 	int mss_now;
+ 
++	if (dst && dst->ops->get_mss)
++		pmtu = dst->ops->get_mss(dst, pmtu);
++
+ 	/* Calculate base mss without TCP options:
+ 	   It is MMS_S - sizeof(tcphdr) of rfc1122
+ 	 */
+-
+ 	mss_now = pmtu - tp->af_specific->net_header_len - sizeof(struct tcphdr);
+ 
+ 	/* Clamp it (mss_clamp does not include tcp options) */
+@@ -532,7 +535,7 @@
+ 		mss_now = tp->mss_clamp;
+ 
+ 	/* Now subtract optional transport overhead */
+-	mss_now -= tp->ext_header_len;
++	mss_now -= tp->ext_header_len + tp->ext2_header_len;
+ 
+ 	/* Then reserve room for full set of TCP options and 8 bytes of data */
+ 	if (mss_now < 48)
+@@ -1147,10 +1150,10 @@
+ 	if (req->rcv_wnd == 0) { /* ignored for retransmitted syns */
+ 		__u8 rcv_wscale; 
+ 		/* Set this up on the first call only */
+-		req->window_clamp = tp->window_clamp ? : dst->window;
++		req->window_clamp = tp->window_clamp ? : dst_metric(dst, RTAX_WINDOW);
+ 		/* tcp_full_space because it is guaranteed to be the first packet */
+ 		tcp_select_initial_window(tcp_full_space(sk), 
+-			dst->advmss - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
++			dst_metric(dst, RTAX_ADVMSS) - (req->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0),
+ 			&req->rcv_wnd,
+ 			&req->window_clamp,
+ 			req->wscale_ok,
+@@ -1162,7 +1165,7 @@
+ 	th->window = htons(req->rcv_wnd);
+ 
+ 	TCP_SKB_CB(skb)->when = tcp_time_stamp;
+-	tcp_syn_build_options((__u32 *)(th + 1), dst->advmss, req->tstamp_ok,
++	tcp_syn_build_options((__u32 *)(th + 1), dst_metric(dst, RTAX_ADVMSS), req->tstamp_ok,
+ 			      req->sack_ok, req->wscale_ok, req->rcv_wscale,
+ 			      TCP_SKB_CB(skb)->when,
+ 			      req->ts_recent);
+@@ -1191,11 +1194,11 @@
+ 	if (tp->user_mss)
+ 		tp->mss_clamp = tp->user_mss;
+ 	tp->max_window = 0;
+-	tcp_sync_mss(sk, dst->pmtu);
++	tcp_sync_mss(sk, dst_pmtu(dst));
+ 
+ 	if (!tp->window_clamp)
+-		tp->window_clamp = dst->window;
+-	tp->advmss = dst->advmss;
++		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
++	tp->advmss = dst_metric(dst, RTAX_ADVMSS);
+ 	tcp_initialize_rcv_mss(sk);
+ 	tcp_ca_init(tp);
+ 
+diff -Nru a/net/ipv4/udp.c b/net/ipv4/udp.c
+--- a/net/ipv4/udp.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv4/udp.c	2005-02-13 21:25:09 +11:00
+@@ -11,6 +11,7 @@
+  *		Fred N. van Kempen, <waltje at uWalt.NL.Mugnet.ORG>
+  *		Arnt Gulbrandsen, <agulbra at nvg.unit.no>
+  *		Alan Cox, <Alan.Cox at linux.org>
++ *		Hirokazu Takahashi, <taka at valinux.co.jp>
+  *
+  * Fixes:
+  *		Alan Cox	:	verify_area() calls
+@@ -64,6 +65,10 @@
+  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
+  *	Alexey Kuznetsov:		allow both IPv4 and IPv6 sockets to bind
+  *					a single port at the same time.
++ *	Hirokazu Takahashi	:	HW checksumming for outgoing UDP
++ *					datagrams.
++ *	Hirokazu Takahashi	:	sendfile() on UDP works now.
++ *	Derek Atkins <derek at ihtfp.com>: Add Encapulation Support
+  *
+  *
+  *		This program is free software; you can redistribute it and/or
+@@ -97,6 +102,7 @@
+ #include <net/route.h>
+ #include <net/inet_common.h>
+ #include <net/checksum.h>
++#include <net/xfrm.h>
+ 
+ /*
+  *	Snmp MIB for the UDP layer
+@@ -371,81 +377,119 @@
+ 	sock_put(sk);
+ }
+ 
+-static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
+-{
+-	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
+-}
+-
+-struct udpfakehdr 
+-{
+-	struct udphdr uh;
+-	u32 saddr;
+-	u32 daddr;
+-	struct iovec *iov;
+-	u32 wcheck;
+-};
+-
+ /*
+- *	Copy and checksum a UDP packet from user space into a buffer.
++ * Throw away all pending data and cancel the corking. Socket is locked.
+  */
+- 
+-static int udp_getfrag(const void *p, char * to, unsigned int offset,
+-                       unsigned int fraglen, struct sk_buff *skb)
++static void udp_flush_pending_frames(struct sock *sk)
+ {
+-	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
+-	if (offset==0) {
+-		if (csum_partial_copy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
+-						   fraglen-sizeof(struct udphdr), &ufh->wcheck))
+-			return -EFAULT;
+- 		ufh->wcheck = csum_partial((char *)ufh, sizeof(struct udphdr),
+-					   ufh->wcheck);
+-		ufh->uh.check = csum_tcpudp_magic(ufh->saddr, ufh->daddr, 
+-					  ntohs(ufh->uh.len),
+-					  IPPROTO_UDP, ufh->wcheck);
+-		if (ufh->uh.check == 0)
+-			ufh->uh.check = -1;
+-		memcpy(to, ufh, sizeof(struct udphdr));
+-		return 0;
++	struct udp_opt *up = udp_sk(sk);
++
++	if (up->pending) {
++		up->len = 0;
++		up->pending = 0;
++		ip_flush_pending_frames(sk);
+ 	}
+-	if (csum_partial_copy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
+-					   fraglen, &ufh->wcheck))
+-		return -EFAULT;
+-	return 0;
+ }
+ 
+ /*
+- *	Copy a UDP packet from user space into a buffer without checksumming.
++ * Push out all pending data as one UDP datagram. Socket is locked.
+  */
+- 
+-static int udp_getfrag_nosum(const void *p, char * to, unsigned int offset,
+-                             unsigned int fraglen, struct sk_buff *skb) 
++static int udp_push_pending_frames(struct sock *sk, struct udp_opt *up)
+ {
+-	struct udpfakehdr *ufh = (struct udpfakehdr *)p;
++	struct sk_buff *skb;
++	struct udphdr *uh;
++	int err = 0;
++
++	/* Grab the skbuff where UDP header space exists. */
++	if ((skb = skb_peek(&sk->write_queue)) == NULL)
++		goto out;
+ 
+-	if (offset==0) {
+-		memcpy(to, ufh, sizeof(struct udphdr));
+-		return memcpy_fromiovecend(to+sizeof(struct udphdr), ufh->iov, offset,
+-					   fraglen-sizeof(struct udphdr));
++	/*
++	 * Create a UDP header
++	 */
++	uh = skb->h.uh;
++	uh->source = up->sport;
++	uh->dest = up->dport;
++	uh->len = htons(up->len);
++	uh->check = 0;
++
++	if (sk->no_check == UDP_CSUM_NOXMIT) {
++		skb->ip_summed = CHECKSUM_NONE;
++		goto send;
+ 	}
+-	return memcpy_fromiovecend(to, ufh->iov, offset-sizeof(struct udphdr),
+-				   fraglen);
++
++	if (skb_queue_len(&sk->write_queue) == 1) {
++		/*
++		 * Only one fragment on the socket.
++		 */
++		if (skb->ip_summed == CHECKSUM_HW) {
++			skb->csum = offsetof(struct udphdr, check);
++			uh->check = ~csum_tcpudp_magic(up->saddr, up->daddr,
++					up->len, IPPROTO_UDP, 0);
++		} else {
++			skb->csum = csum_partial((char *)uh,
++					sizeof(struct udphdr), skb->csum);
++			uh->check = csum_tcpudp_magic(up->saddr, up->daddr,
++					up->len, IPPROTO_UDP, skb->csum);
++			if (uh->check == 0)
++				uh->check = -1;
++		}
++	} else {
++		unsigned int csum = 0;
++		/*
++		 * HW-checksum won't work as there are two or more 
++		 * fragments on the socket so that all csums of sk_buffs
++		 * should be together.
++		 */
++		if (skb->ip_summed == CHECKSUM_HW) {
++			int offset = (unsigned char *)uh - skb->data;
++			skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
++
++			skb->ip_summed = CHECKSUM_NONE;
++		} else {
++			skb->csum = csum_partial((char *)uh,
++					sizeof(struct udphdr), skb->csum);
++		}
++
++		skb_queue_walk(&sk->write_queue, skb) {
++			csum = csum_add(csum, skb->csum);
++		}
++		uh->check = csum_tcpudp_magic(up->saddr, up->daddr,
++				up->len, IPPROTO_UDP, csum);
++		if (uh->check == 0)
++			uh->check = -1;
++	}
++send:
++	err = ip_push_pending_frames(sk);
++out:
++	up->len = 0;
++	up->pending = 0;
++	return err;
++}
++
++
++static unsigned short udp_check(struct udphdr *uh, int len, unsigned long saddr, unsigned long daddr, unsigned long base)
++{
++	return(csum_tcpudp_magic(saddr, daddr, len, IPPROTO_UDP, base));
+ }
+ 
+ int udp_sendmsg(struct sock *sk, struct msghdr *msg, int len)
+ {
+-	int ulen = len + sizeof(struct udphdr);
++	struct udp_opt *up = udp_sk(sk);
++	int ulen = len;
+ 	struct ipcm_cookie ipc;
+-	struct udpfakehdr ufh;
+ 	struct rtable *rt = NULL;
+ 	int free = 0;
+ 	int connected = 0;
+-	u32 daddr;
++	u32 daddr, faddr, saddr;
++	u16 dport;
+ 	u8  tos;
+ 	int err;
++	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ 
+ 	/* This check is ONLY to check for arithmetic overflow
+ 	   on integer(!) len. Not more! Real check will be made
+-	   in ip_build_xmit --ANK
++	   in ip_append_* --ANK
+ 
+ 	   BTW socket.c -> af_*.c -> ... make multiple
+ 	   invalid conversions size_t -> int. We MUST repair it f.e.
+@@ -464,10 +508,23 @@
+ 	if (msg->msg_flags&MSG_OOB)	/* Mirror BSD error message compatibility */
+ 		return -EOPNOTSUPP;
+ 
++	ipc.opt = NULL;
++
++	if (up->pending) {
++		/*
++		 * There are pending frames.
++	 	 * The socket lock must be held while it's corked.
++		 */
++		lock_sock(sk);
++		if (likely(up->pending))
++ 			goto do_append_data;
++		release_sock(sk);
++	}
++	ulen += sizeof(struct udphdr);
++
+ 	/*
+ 	 *	Get and verify the address. 
+ 	 */
+-	 
+ 	if (msg->msg_name) {
+ 		struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
+ 		if (msg->msg_namelen < sizeof(*usin))
+@@ -477,24 +534,22 @@
+ 				return -EINVAL;
+ 		}
+ 
+-		ufh.daddr = usin->sin_addr.s_addr;
+-		ufh.uh.dest = usin->sin_port;
+-		if (ufh.uh.dest == 0)
++		daddr = usin->sin_addr.s_addr;
++		dport = usin->sin_port;
++		if (dport == 0)
+ 			return -EINVAL;
+ 	} else {
+ 		if (sk->state != TCP_ESTABLISHED)
+ 			return -EDESTADDRREQ;
+-		ufh.daddr = sk->daddr;
+-		ufh.uh.dest = sk->dport;
++		daddr = sk->daddr;
++		dport = sk->dport;
+ 		/* Open fast path for connected socket.
+ 		   Route will not be used, if at least one option is set.
+ 		 */
+ 		connected = 1;
+   	}
+ 	ipc.addr = sk->saddr;
+-	ufh.uh.source = sk->sport;
+ 
+-	ipc.opt = NULL;
+ 	ipc.oif = sk->bound_dev_if;
+ 	if (msg->msg_controllen) {
+ 		err = ip_cmsg_send(msg, &ipc);
+@@ -507,13 +562,13 @@
+ 	if (!ipc.opt)
+ 		ipc.opt = sk->protinfo.af_inet.opt;
+ 
+-	ufh.saddr = ipc.addr;
+-	ipc.addr = daddr = ufh.daddr;
++	saddr = ipc.addr;
++	ipc.addr = faddr = daddr;
+ 
+ 	if (ipc.opt && ipc.opt->srr) {
+ 		if (!daddr)
+ 			return -EINVAL;
+-		daddr = ipc.opt->faddr;
++		faddr = ipc.opt->faddr;
+ 		connected = 0;
+ 	}
+ 	tos = RT_TOS(sk->protinfo.af_inet.tos);
+@@ -526,8 +581,8 @@
+ 	if (MULTICAST(daddr)) {
+ 		if (!ipc.oif)
+ 			ipc.oif = sk->protinfo.af_inet.mc_index;
+-		if (!ufh.saddr)
+-			ufh.saddr = sk->protinfo.af_inet.mc_addr;
++		if (!saddr)
++			saddr = sk->protinfo.af_inet.mc_addr;
+ 		connected = 0;
+ 	}
+ 
+@@ -535,7 +590,16 @@
+ 		rt = (struct rtable*)sk_dst_check(sk, 0);
+ 
+ 	if (rt == NULL) {
+-		err = ip_route_output(&rt, daddr, ufh.saddr, tos, ipc.oif);
++		struct flowi fl = { .oif = ipc.oif,
++				    .nl_u = { .ip4_u =
++					      { .daddr = faddr,
++						.saddr = saddr,
++						.tos = tos } },
++				    .proto = IPPROTO_UDP,
++				    .uli_u = { .ports =
++					       { .sport = sk->sport,
++						 .dport = dport } } };
++		err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT));
+ 		if (err)
+ 			goto out;
+ 
+@@ -550,23 +614,39 @@
+ 		goto do_confirm;
+ back_from_confirm:
+ 
+-	ufh.saddr = rt->rt_src;
++	saddr = rt->rt_src;
+ 	if (!ipc.addr)
+-		ufh.daddr = ipc.addr = rt->rt_dst;
+-	ufh.uh.len = htons(ulen);
+-	ufh.uh.check = 0;
+-	ufh.iov = msg->msg_iov;
+-	ufh.wcheck = 0;
+-
+-	/* RFC1122: OK.  Provides the checksumming facility (MUST) as per */
+-	/* 4.1.3.4. It's configurable by the application via setsockopt() */
+-	/* (MAY) and it defaults to on (MUST). */
+-
+-	err = ip_build_xmit(sk,
+-			    (sk->no_check == UDP_CSUM_NOXMIT ?
+-			     udp_getfrag_nosum :
+-			     udp_getfrag),
+-			    &ufh, ulen, &ipc, rt, msg->msg_flags);
++		daddr = ipc.addr = rt->rt_dst;
++
++	lock_sock(sk);
++	if (unlikely(up->pending)) {
++		/* The socket is already corked while preparing it. */
++		/* ... which is an evident application bug. --ANK */
++		release_sock(sk);
++
++		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
++		err = -EINVAL;
++		goto out;
++	}
++	/*
++	 *	Now cork the socket to pend data.
++	 */
++	up->daddr = daddr;
++	up->dport = dport;
++	up->saddr = saddr;
++	up->sport = sk->sport;
++	up->pending = 1;
++
++do_append_data:
++	up->len += ulen;
++	err = ip_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, 
++			sizeof(struct udphdr), &ipc, rt, 
++			corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
++	if (err)
++		udp_flush_pending_frames(sk);
++	else if (!corkreq)
++		err = udp_push_pending_frames(sk, up);
++	release_sock(sk);
+ 
+ out:
+ 	ip_rt_put(rt);
+@@ -586,6 +666,52 @@
+ 	goto out;
+ }
+ 
++int udp_sendpage(struct sock *sk, struct page *page, int offset, size_t size, int flags)
++{
++	struct udp_opt *up = udp_sk(sk);
++	int ret;
++
++	if (!up->pending) {
++		struct msghdr msg = {	.msg_flags = flags|MSG_MORE };
++
++		/* Call udp_sendmsg to specify destination address which
++		 * sendpage interface can't pass.
++		 * This will succeed only when the socket is connected.
++		 */
++		ret = udp_sendmsg(sk, &msg, 0);
++		if (ret < 0)
++			return ret;
++	}
++
++	lock_sock(sk);
++
++	if (unlikely(!up->pending)) {
++		release_sock(sk);
++
++		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 3\n"));
++		return -EINVAL;
++	}
++
++	ret = ip_append_page(sk, page, offset, size, flags);
++	if (ret == -EOPNOTSUPP) {
++		release_sock(sk);
++		return sock_no_sendpage(sk->socket, page, offset, size, flags);
++	}
++	if (ret < 0) {
++		udp_flush_pending_frames(sk);
++		goto out;
++	}
++
++	up->len += size;
++	if (!(up->corkflag || (flags&MSG_MORE)))
++		ret = udp_push_pending_frames(sk, up);
++	if (!ret)
++		ret = size;
++out:
++	release_sock(sk);
++	return ret;
++}
++
+ /*
+  *	IOCTL requests applicable to the UDP protocol
+  */
+@@ -807,7 +933,9 @@
+ 			saddr = sk->protinfo.af_inet.mc_addr;
+ 	}
+ 	err = ip_route_connect(&rt, usin->sin_addr.s_addr, saddr,
+-			       RT_CONN_FLAGS(sk), oif);
++			       RT_CONN_FLAGS(sk), oif,
++			       IPPROTO_UDP,
++			       sk->sport, usin->sin_port, sk);
+ 	if (err)
+ 		return err;
+ 	if ((rt->rt_flags&RTCF_BROADCAST) && !sk->broadcast) {
+@@ -858,11 +986,138 @@
+ 	inet_sock_release(sk);
+ }
+ 
++/* return:
++ * 	1  if the the UDP system should process it
++ *	0  if we should drop this packet
++ * 	-1 if it should get processed by xfrm4_rcv_encap
++ */
++static int udp_encap_rcv(struct sock * sk, struct sk_buff *skb)
++{
++#ifndef CONFIG_XFRM
++	return 1; 
++#else
++	struct udp_opt *up = udp_sk(sk);
++  	struct udphdr *uh = skb->h.uh;
++	struct iphdr *iph;
++	int iphlen, len;
++  
++	__u8 *udpdata = (__u8 *)uh + sizeof(struct udphdr);
++	__u32 *udpdata32 = (__u32 *)udpdata;
++	__u16 encap_type = up->encap_type;
++
++	/* if we're overly short, let UDP handle it */
++	if (udpdata > skb->tail)
++		return 1;
++
++	/* if this is not encapsulated socket, then just return now */
++	if (!encap_type)
++		return 1;
++
++	len = skb->tail - udpdata;
++
++	switch (encap_type) {
++	default:
++	case UDP_ENCAP_ESPINUDP:
++		/* Check if this is a keepalive packet.  If so, eat it. */
++		if (len == 1 && udpdata[0] == 0xff) {
++			return 0;
++		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0 ) {
++			/* ESP Packet without Non-ESP header */
++			len = sizeof(struct udphdr);
++		} else
++			/* Must be an IKE packet.. pass it through */
++			return 1;
++		break;
++	case UDP_ENCAP_ESPINUDP_NON_IKE:
++		/* Check if this is a keepalive packet.  If so, eat it. */
++		if (len == 1 && udpdata[0] == 0xff) {
++			return 0;
++		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
++			   udpdata32[0] == 0 && udpdata32[1] == 0) {
++			
++			/* ESP Packet with Non-IKE marker */
++			len = sizeof(struct udphdr) + 2 * sizeof(u32);
++		} else
++			/* Must be an IKE packet.. pass it through */
++			return 1;
++		break;
++	}
++
++	/* At this point we are sure that this is an ESPinUDP packet,
++	 * so we need to remove 'len' bytes from the packet (the UDP
++	 * header and optional ESP marker bytes) and then modify the
++	 * protocol to ESP, and then call into the transform receiver.
++	 */
++
++	/* Now we can update and verify the packet length... */
++	iph = skb->nh.iph;
++	iphlen = iph->ihl << 2;
++	iph->tot_len = htons(ntohs(iph->tot_len) - len);
++	if (skb->len < iphlen + len) {
++		/* packet is too small!?! */
++		return 0;
++	}
++
++	/* pull the data buffer up to the ESP header and set the
++	 * transport header to point to ESP.  Keep UDP on the stack
++	 * for later.
++	 */
++	skb->h.raw = skb_pull(skb, len);
++
++	/* modify the protocol (it's ESP!) */
++	iph->protocol = IPPROTO_ESP;
++
++	/* and let the caller know to send this into the ESP processor... */
++	return -1;
++#endif
++}
++
++/* returns:
++ *  -1: error
++ *   0: success
++ *  >0: "udp encap" protocol resubmission
++ *
++ * Note that in the success and error cases, the skb is assumed to
++ * have either been requeued or freed.
++ */
+ static int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+ {
++	struct udp_opt *up = udp_sk(sk);
++
+ 	/*
+ 	 *	Charge it to the socket, dropping if the queue is full.
+ 	 */
++	if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
++		kfree_skb(skb);
++		return -1;
++	}
++
++	if (up->encap_type) {
++		/*
++		 * This is an encapsulation socket, so let's see if this is
++		 * an encapsulated packet.
++		 * If it's a keepalive packet, then just eat it.
++		 * If it's an encapsulateed packet, then pass it to the
++		 * IPsec xfrm input and return the response
++		 * appropriately.  Otherwise, just fall through and
++		 * pass this up the UDP socket.
++		 */
++		int ret;
++
++		ret = udp_encap_rcv(sk, skb);
++		if (ret == 0) {
++			/* Eat the packet .. */
++			kfree_skb(skb);
++			return 0;
++		}
++		if (ret < 0) {
++			/* process the ESP packet */
++			ret = xfrm4_rcv_encap(skb, up->encap_type);
++			UDP_INC_STATS_BH(UdpInDatagrams);
++			return -ret;
++		}
++		/* FALLTHROUGH -- it's a UDP Packet */
++	}
+ 
+ #if defined(CONFIG_FILTER)
+ 	if (sk->filter && skb->ip_summed != CHECKSUM_UNNECESSARY) {
+@@ -915,8 +1170,13 @@
+ 			if(sknext)
+ 				skb1 = skb_clone(skb, GFP_ATOMIC);
+ 
+-			if(skb1)
+-				udp_queue_rcv_skb(sk, skb1);
++			if(skb1) {
++				int ret = udp_queue_rcv_skb(sk, skb1);
++				if (ret > 0)
++					/* we should probably re-process instead
++					 * of dropping packets here. */
++					kfree_skb(skb1);
++			}
+ 			sk = sknext;
+ 		} while(sknext);
+ 	} else
+@@ -991,11 +1251,20 @@
+ 	sk = udp_v4_lookup(saddr, uh->source, daddr, uh->dest, skb->dev->ifindex);
+ 
+ 	if (sk != NULL) {
+-		udp_queue_rcv_skb(sk, skb);
++		int ret = udp_queue_rcv_skb(sk, skb);
+ 		sock_put(sk);
++
++		/* a return value > 0 means to resubmit the input, but
++		 * it it wants the return to be -protocol, or 0
++		 */
++		if (ret > 0)
++			return -ret;
+ 		return 0;
+ 	}
+ 
++	if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
++		goto drop;
++
+ 	/* No socket. Drop packet silently, if checksum is wrong */
+ 	if (udp_checksum_complete(skb))
+ 		goto csum_error;
+@@ -1036,6 +1305,7 @@
+ 			NIPQUAD(daddr),
+ 			ntohs(uh->dest),
+ 			ulen));
++drop:
+ 	UDP_INC_STATS_BH(UdpInErrors);
+ 	kfree_skb(skb);
+ 	return(0);
+@@ -1100,16 +1370,116 @@
+ 	return len;
+ }
+ 
++static int udp_destroy_sock(struct sock *sk)
++{
++	lock_sock(sk);
++	udp_flush_pending_frames(sk);
++	release_sock(sk);
++	return 0;
++}
++
++/*
++ *	Socket option code for UDP
++ */
++static int udp_setsockopt(struct sock *sk, int level, int optname, 
++			  char *optval, int optlen)
++{
++	struct udp_opt *up = udp_sk(sk);
++	int val;
++	int err = 0;
++
++	if (level != SOL_UDP)
++		return ip_setsockopt(sk, level, optname, optval, optlen);
++
++	if(optlen<sizeof(int))
++		return -EINVAL;
++
++	if (get_user(val, (int *)optval))
++		return -EFAULT;
++
++	switch(optname) {
++	case UDP_CORK:
++		if (val != 0) {
++			up->corkflag = 1;
++		} else {
++			up->corkflag = 0;
++			lock_sock(sk);
++			udp_push_pending_frames(sk, up);
++			release_sock(sk);
++		}
++		break;
++		
++	case UDP_ENCAP:
++		switch (val) {
++		case 0:
++		case UDP_ENCAP_ESPINUDP:
++		case UDP_ENCAP_ESPINUDP_NON_IKE:
++			up->encap_type = val;
++			break;
++		default:
++			err = -ENOPROTOOPT;
++			break;
++		}
++		break;
++
++	default:
++		err = -ENOPROTOOPT;
++		break;
++	};
++
++	return err;
++}
++
++static int udp_getsockopt(struct sock *sk, int level, int optname, 
++			  char *optval, int *optlen)
++{
++	struct udp_opt *up = udp_sk(sk);
++	int val, len;
++
++	if (level != SOL_UDP)
++		return ip_getsockopt(sk, level, optname, optval, optlen);
++
++	if(get_user(len,optlen))
++		return -EFAULT;
++
++	len = min_t(unsigned int, len, sizeof(int));
++	
++	if(len < 0)
++		return -EINVAL;
++
++	switch(optname) {
++	case UDP_CORK:
++		val = up->corkflag;
++		break;
++
++	case UDP_ENCAP:
++		val = up->encap_type;
++		break;
++
++	default:
++		return -ENOPROTOOPT;
++	};
++
++  	if(put_user(len, optlen))
++  		return -EFAULT;
++	if(copy_to_user(optval, &val,len))
++		return -EFAULT;
++  	return 0;
++}
++
++
+ struct proto udp_prot = {
+  	name:		"UDP",
+ 	close:		udp_close,
+ 	connect:	udp_connect,
+ 	disconnect:	udp_disconnect,
+ 	ioctl:		udp_ioctl,
+-	setsockopt:	ip_setsockopt,
+-	getsockopt:	ip_getsockopt,
++	destroy:	udp_destroy_sock,
++	setsockopt:	udp_setsockopt,
++	getsockopt:	udp_getsockopt,
+ 	sendmsg:	udp_sendmsg,
+ 	recvmsg:	udp_recvmsg,
++	sendpage:	udp_sendpage,
+ 	backlog_rcv:	udp_queue_rcv_skb,
+ 	hash:		udp_v4_hash,
+ 	unhash:		udp_v4_unhash,
+diff -Nru a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/xfrm4_input.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,159 @@
++/*
++ * xfrm4_input.c
++ *
++ * Changes:
++ *	YOSHIFUJI Hideaki @USAGI
++ *		Split up af-specific portion
++ *	Derek Atkins <derek at ihtfp.com>
++ *		Add Encapsulation support
++ * 	
++ */
++
++#include <linux/module.h>
++#include <linux/string.h>
++#include <net/inet_ecn.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++
++int xfrm4_rcv(struct sk_buff *skb)
++{
++	return xfrm4_rcv_encap(skb, 0);
++}
++
++EXPORT_SYMBOL(xfrm4_rcv);
++
++static inline void ipip_ecn_decapsulate(struct sk_buff *skb)
++{
++	struct iphdr *outer_iph = skb->nh.iph;
++	struct iphdr *inner_iph = skb->h.ipiph;
++
++	if (INET_ECN_is_ce(outer_iph->tos) &&
++	    INET_ECN_is_not_ce(inner_iph->tos))
++		IP_ECN_set_ce(inner_iph);
++}
++
++static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
++{
++	switch (nexthdr) {
++	case IPPROTO_IPIP:
++		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
++			return -EINVAL;
++		*spi = skb->nh.iph->saddr;
++		*seq = 0;
++		return 0;
++	}
++
++	return xfrm_parse_spi(skb, nexthdr, spi, seq);
++}
++
++int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type)
++{
++	int err;
++	u32 spi, seq;
++	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
++	struct xfrm_state *x;
++	int xfrm_nr = 0;
++	int decaps = 0;
++
++	if ((err = xfrm4_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) != 0)
++		goto drop;
++
++	do {
++		struct iphdr *iph = skb->nh.iph;
++
++		if (xfrm_nr == XFRM_MAX_DEPTH)
++			goto drop;
++
++		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, iph->protocol, AF_INET);
++		if (x == NULL)
++			goto drop;
++
++		spin_lock(&x->lock);
++		if (unlikely(x->km.state != XFRM_STATE_VALID))
++			goto drop_unlock;
++
++		if (x->props.replay_window && xfrm_replay_check(x, seq))
++			goto drop_unlock;
++
++		if (xfrm_state_check_expire(x))
++			goto drop_unlock;
++
++		xfrm_vec[xfrm_nr].decap.decap_type = encap_type;
++		if (x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb))
++			goto drop_unlock;
++
++		/* only the first xfrm gets the encap type */
++		encap_type = 0;
++
++		if (x->props.replay_window)
++			xfrm_replay_advance(x, seq);
++
++		x->curlft.bytes += skb->len;
++		x->curlft.packets++;
++
++		spin_unlock(&x->lock);
++
++		xfrm_vec[xfrm_nr++].xvec = x;
++
++		iph = skb->nh.iph;
++
++		if (x->props.mode) {
++			if (iph->protocol != IPPROTO_IPIP)
++				goto drop;
++			if (!pskb_may_pull(skb, sizeof(struct iphdr)))
++				goto drop;
++			if (skb_cloned(skb) &&
++			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++				goto drop;
++			if (!(x->props.flags & XFRM_STATE_NOECN))
++				ipip_ecn_decapsulate(skb);
++			skb->mac.raw = memmove(skb->data - skb->mac_len,
++					       skb->mac.raw, skb->mac_len);
++			skb->nh.raw = skb->data;
++			memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
++			decaps = 1;
++			break;
++		}
++
++		if ((err = xfrm_parse_spi(skb, skb->nh.iph->protocol, &spi, &seq)) < 0)
++			goto drop;
++	} while (!err);
++
++	/* Allocate new secpath or COW existing one. */
++
++	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
++		struct sec_path *sp;
++		sp = secpath_dup(skb->sp);
++		if (!sp)
++			goto drop;
++		if (skb->sp)
++			secpath_put(skb->sp);
++		skb->sp = sp;
++	}
++	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
++		goto drop;
++
++	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
++	skb->sp->len += xfrm_nr;
++
++	if (decaps) {
++		if (!(skb->dev->flags&IFF_LOOPBACK)) {
++			dst_release(skb->dst);
++			skb->dst = NULL;
++		}
++		netif_rx(skb);
++		return 0;
++	} else {
++		return -skb->nh.iph->protocol;
++	}
++
++drop_unlock:
++	spin_unlock(&x->lock);
++	xfrm_state_put(x);
++drop:
++	while (--xfrm_nr >= 0)
++		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
++
++	kfree_skb(skb);
++	return 0;
++}
+diff -Nru a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/xfrm4_output.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,138 @@
++/*
++ * xfrm4_output.c - Common IPsec encapsulation code for IPv4.
++ * Copyright (c) 2004 Herbert Xu <herbert at gondor.apana.org.au>
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#include <linux/skbuff.h>
++#include <linux/spinlock.h>
++#include <net/inet_ecn.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/icmp.h>
++
++/* Add encapsulation header.
++ *
++ * In transport mode, the IP header will be moved forward to make space
++ * for the encapsulation header.
++ *
++ * In tunnel mode, the top IP header will be constructed per RFC 2401.
++ * The following fields in it shall be filled in by x->type->output:
++ *	tot_len
++ *	check
++ *
++ * On exit, skb->h will be set to the start of the payload to be processed
++ * by x->type->output and skb->nh will be set to the top IP header.
++ */
++static void xfrm4_encap(struct sk_buff *skb)
++{
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x = dst->xfrm;
++	struct iphdr *iph, *top_iph;
++
++	iph = skb->nh.iph;
++	skb->h.ipiph = iph;
++
++	skb->nh.raw = skb_push(skb, x->props.header_len);
++	top_iph = skb->nh.iph;
++
++	if (!x->props.mode) {
++		skb->h.raw += iph->ihl*4;
++		memmove(top_iph, iph, iph->ihl*4);
++		return;
++	}
++
++	top_iph->ihl = 5;
++	top_iph->version = 4;
++
++	/* DS disclosed */
++	top_iph->tos = INET_ECN_encapsulate(iph->tos, iph->tos);
++	if (x->props.flags & XFRM_STATE_NOECN)
++		IP_ECN_clear(top_iph);
++
++	top_iph->frag_off = iph->frag_off & htons(IP_DF);
++	if (!top_iph->frag_off)
++		__ip_select_ident(top_iph, dst);
++
++	top_iph->ttl = dst_path_metric(dst, RTAX_HOPLIMIT);
++
++	top_iph->saddr = x->props.saddr.a4;
++	top_iph->daddr = x->id.daddr.a4;
++	top_iph->protocol = IPPROTO_IPIP;
++
++	memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
++}
++
++static int xfrm4_tunnel_check_size(struct sk_buff *skb)
++{
++	int mtu, ret = 0;
++	struct dst_entry *dst;
++	struct iphdr *iph = skb->nh.iph;
++
++	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
++		goto out;
++
++	IPCB(skb)->flags |= IPSKB_XFRM_TUNNEL_SIZE;
++	
++	if (!(iph->frag_off & htons(IP_DF)))
++		goto out;
++
++	dst = skb->dst;
++	mtu = dst_pmtu(dst) - dst->header_len - dst->trailer_len;
++	if (skb->len > mtu) {
++		icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
++		ret = -EMSGSIZE;
++	}
++out:
++	return ret;
++}
++
++int xfrm4_output(struct sk_buff *skb)
++{
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x = dst->xfrm;
++	int err;
++	
++	if (skb->ip_summed == CHECKSUM_HW)
++		skb_checksum_help(skb);
++
++	spin_lock_bh(&x->lock);
++	err = xfrm_state_check(x, skb);
++	if (err)
++		goto error;
++
++	if (x->props.mode) {
++		err = xfrm4_tunnel_check_size(skb);
++		if (err)
++			goto error;
++	}
++
++	xfrm4_encap(skb);
++
++	err = x->type->output(skb);
++	if (err)
++		goto error;
++
++	x->curlft.bytes += skb->len;
++	x->curlft.packets++;
++
++	spin_unlock_bh(&x->lock);
++	
++	if (!(skb->dst = dst_pop(dst))) {
++		err = -EHOSTUNREACH;
++		goto error_nolock;
++	}
++	err = NET_XMIT_BYPASS;
++
++out_exit:
++	return err;
++error:
++	spin_unlock_bh(&x->lock);
++error_nolock:
++	kfree_skb(skb);
++	goto out_exit;
++}
+diff -Nru a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/xfrm4_policy.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,288 @@
++/* 
++ * xfrm4_policy.c
++ *
++ * Changes:
++ *	Kazunori MIYAZAWA @USAGI
++ * 	YOSHIFUJI Hideaki @USAGI
++ *		Split up af-specific portion
++ * 	
++ */
++
++#include <linux/config.h>
++#include <net/xfrm.h>
++#include <net/ip.h>
++
++static struct dst_ops xfrm4_dst_ops;
++static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
++
++static struct xfrm_type_map xfrm4_type_map = { .lock = RW_LOCK_UNLOCKED };
++
++static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
++{
++	return __ip_route_output_key((struct rtable**)dst, fl);
++}
++
++/* Check that the bundle accepts the flow and its components are
++ * still valid.
++ */
++
++static int __xfrm4_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl)
++{
++	do {
++		if (xdst->u.dst.ops != &xfrm4_dst_ops)
++			return 1;
++
++		if (!xfrm_selector_match(&xdst->u.dst.xfrm->sel, fl, AF_INET))
++			return 0;
++		if (xdst->u.dst.xfrm->km.state != XFRM_STATE_VALID ||
++		    xdst->u.dst.path->obsolete > 0)
++			return 0;
++		xdst = (struct xfrm_dst*)xdst->u.dst.child;
++	} while (xdst);
++	return 0;
++}
++
++static struct dst_entry *
++__xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
++{
++	struct dst_entry *dst;
++
++	read_lock_bh(&policy->lock);
++	for (dst = policy->bundles; dst; dst = dst->next) {
++		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
++		if (xdst->u.rt.fl.oif == fl->oif &&	/*XXX*/
++		    xdst->u.rt.fl.fl4_dst == fl->fl4_dst &&
++	    	    xdst->u.rt.fl.fl4_src == fl->fl4_src &&
++		    __xfrm4_bundle_ok(xdst, fl)) {
++			dst_clone(dst);
++			break;
++		}
++	}
++	read_unlock_bh(&policy->lock);
++	return dst;
++}
++
++/* Allocate chain of dst_entry's, attach known xfrm's, calculate
++ * all the metrics... Shortly, bundle a bundle.
++ */
++
++static int
++__xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
++		      struct flowi *fl, struct dst_entry **dst_p)
++{
++	struct dst_entry *dst, *dst_prev;
++	struct rtable *rt0 = (struct rtable*)(*dst_p);
++	struct rtable *rt = rt0;
++	u32 remote = fl->fl4_dst;
++	u32 local  = fl->fl4_src;
++	int i;
++	int err;
++	int header_len = 0;
++	int trailer_len = 0;
++
++	dst = dst_prev = NULL;
++
++	for (i = 0; i < nx; i++) {
++		struct dst_entry *dst1 = dst_alloc(&xfrm4_dst_ops);
++
++		if (unlikely(dst1 == NULL)) {
++			err = -ENOBUFS;
++			goto error;
++		}
++
++		if (!dst)
++			dst = dst1;
++		else {
++			dst_prev->child = dst1;
++			dst1->flags |= DST_NOHASH;
++			dst_clone(dst1);
++		}
++		dst_prev = dst1;
++		if (xfrm[i]->props.mode) {
++			remote = xfrm[i]->id.daddr.a4;
++			local  = xfrm[i]->props.saddr.a4;
++		}
++		header_len += xfrm[i]->props.header_len;
++		trailer_len += xfrm[i]->props.trailer_len;
++	}
++
++	if (remote != fl->fl4_dst) {
++		struct flowi fl_tunnel = { .nl_u = { .ip4_u =
++						     { .daddr = remote,
++						       .saddr = local }
++					           }
++				         };
++		err = xfrm_dst_lookup((struct xfrm_dst**)&rt, &fl_tunnel, AF_INET);
++		if (err)
++			goto error;
++	} else {
++		dst_hold(&rt->u.dst);
++	}
++	dst_prev->child = &rt->u.dst;
++	i = 0;
++	for (dst_prev = dst; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
++		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
++		x->u.rt.fl = *fl;
++
++		dst_prev->xfrm = xfrm[i++];
++		dst_prev->dev = rt->u.dst.dev;
++		if (rt->u.dst.dev)
++			dev_hold(rt->u.dst.dev);
++		dst_prev->obsolete	= -1;
++		dst_prev->flags	       |= DST_HOST;
++		dst_prev->lastuse	= jiffies;
++		dst_prev->header_len	= header_len;
++		dst_prev->trailer_len	= trailer_len;
++		memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics));
++		dst_prev->path		= &rt->u.dst;
++
++		/* Copy neighbout for reachability confirmation */
++		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
++		dst_prev->input		= rt->u.dst.input;
++		dst_prev->output	= xfrm4_output;
++		if (rt->peer)
++			atomic_inc(&rt->peer->refcnt);
++		x->u.rt.peer = rt->peer;
++		/* Sheit... I remember I did this right. Apparently,
++		 * it was magically lost, so this code needs audit */
++		x->u.rt.rt_flags = rt0->rt_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL);
++		x->u.rt.rt_type = rt->rt_type;
++		x->u.rt.rt_src = rt0->rt_src;
++		x->u.rt.rt_dst = rt0->rt_dst;
++		x->u.rt.rt_gateway = rt->rt_gateway;
++		x->u.rt.rt_spec_dst = rt0->rt_spec_dst;
++		header_len -= x->u.dst.xfrm->props.header_len;
++		trailer_len -= x->u.dst.xfrm->props.trailer_len;
++	}
++	*dst_p = dst;
++	return 0;
++
++error:
++	if (dst)
++		dst_free(dst);
++	return err;
++}
++
++static void
++_decode_session4(struct sk_buff *skb, struct flowi *fl)
++{
++	struct iphdr *iph = skb->nh.iph;
++	u8 *xprth = skb->nh.raw + iph->ihl*4;
++
++	memset(fl, 0, sizeof(struct flowi));
++	if (!(iph->frag_off & htons(IP_MF | IP_OFFSET))) {
++		switch (iph->protocol) {
++		case IPPROTO_UDP:
++		case IPPROTO_TCP:
++		case IPPROTO_SCTP:
++			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
++				u16 *ports = (u16 *)xprth;
++
++				fl->fl_ip_sport = ports[0];
++				fl->fl_ip_dport = ports[1];
++			}
++			break;
++
++		case IPPROTO_ICMP:
++			if (pskb_may_pull(skb, xprth + 2 - skb->data)) {
++				u8 *icmp = xprth;
++
++				fl->fl_icmp_type = icmp[0];
++				fl->fl_icmp_code = icmp[1];
++			}
++			break;
++
++		case IPPROTO_ESP:
++			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
++				u32 *ehdr = (u32 *)xprth;
++
++				fl->fl_ipsec_spi = ehdr[0];
++			}
++			break;
++
++		case IPPROTO_AH:
++			if (pskb_may_pull(skb, xprth + 8 - skb->data)) {
++				u32 *ah_hdr = (u32*)xprth;
++
++				fl->fl_ipsec_spi = ah_hdr[1];
++			}
++			break;
++
++		case IPPROTO_COMP:
++			if (pskb_may_pull(skb, xprth + 4 - skb->data)) {
++				u16 *ipcomp_hdr = (u16 *)xprth;
++
++				fl->fl_ipsec_spi = ntohl(ntohs(ipcomp_hdr[1]));
++			}
++			break;
++		default:
++			fl->fl_ipsec_spi = 0;
++			break;
++		};
++	}
++	fl->proto = iph->protocol;
++	fl->fl4_dst = iph->daddr;
++	fl->fl4_src = iph->saddr;
++}
++
++static inline int xfrm4_garbage_collect(void)
++{
++	read_lock(&xfrm4_policy_afinfo.lock);
++	xfrm4_policy_afinfo.garbage_collect();
++	read_unlock(&xfrm4_policy_afinfo.lock);
++	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
++}
++
++static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
++{
++	struct dst_entry *path = dst->path;
++
++	if (mtu < 68 + dst->header_len)
++		return;
++
++	path->ops->update_pmtu(path, mtu);
++}
++
++static struct dst_ops xfrm4_dst_ops = {
++	.family =		AF_INET,
++	.protocol =		__constant_htons(ETH_P_IP),
++	.gc =			xfrm4_garbage_collect,
++	.update_pmtu =		xfrm4_update_pmtu,
++	.gc_thresh =		1024,
++	.entry_size =		sizeof(struct xfrm_dst),
++};
++
++static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
++	.family = 		AF_INET,
++	.lock = 		RW_LOCK_UNLOCKED,
++	.type_map = 		&xfrm4_type_map,
++	.dst_ops =		&xfrm4_dst_ops,
++	.dst_lookup =		xfrm4_dst_lookup,
++	.find_bundle = 		__xfrm4_find_bundle,
++	.bundle_create =	__xfrm4_bundle_create,
++	.decode_session =	_decode_session4,
++};
++
++static void __init xfrm4_policy_init(void)
++{
++	xfrm_policy_register_afinfo(&xfrm4_policy_afinfo);
++}
++
++static void __exit xfrm4_policy_fini(void)
++{
++	xfrm_policy_unregister_afinfo(&xfrm4_policy_afinfo);
++}
++
++void __init xfrm4_init(void)
++{
++	xfrm4_state_init();
++	xfrm4_policy_init();
++}
++
++void __exit xfrm4_fini(void)
++{
++	//xfrm4_input_fini();
++	xfrm4_policy_fini();
++	xfrm4_state_fini();
++}
++
+diff -Nru a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/xfrm4_state.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,126 @@
++/*
++ * xfrm4_state.c
++ *
++ * Changes:
++ * 	YOSHIFUJI Hideaki @USAGI
++ * 		Split up af-specific portion
++ *
++ */
++
++#include <net/xfrm.h>
++#include <linux/pfkeyv2.h>
++#include <linux/ipsec.h>
++
++extern struct xfrm_state_afinfo xfrm4_state_afinfo;
++
++static void
++__xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl,
++		     struct xfrm_tmpl *tmpl,
++		     xfrm_address_t *daddr, xfrm_address_t *saddr)
++{
++	x->sel.daddr.a4 = fl->fl4_dst;
++	x->sel.saddr.a4 = fl->fl4_src;
++	x->sel.dport = fl->fl_ip_dport;
++	x->sel.dport_mask = ~0;
++	x->sel.sport = fl->fl_ip_sport;
++	x->sel.sport_mask = ~0;
++	x->sel.prefixlen_d = 32;
++	x->sel.prefixlen_s = 32;
++	x->sel.proto = fl->proto;
++	x->sel.ifindex = fl->oif;
++	x->id = tmpl->id;
++	if (x->id.daddr.a4 == 0)
++		x->id.daddr.a4 = daddr->a4;
++	x->props.saddr = tmpl->saddr;
++	if (x->props.saddr.a4 == 0)
++		x->props.saddr.a4 = saddr->a4;
++	x->props.mode = tmpl->mode;
++	x->props.reqid = tmpl->reqid;
++	x->props.family = AF_INET;
++}
++
++static struct xfrm_state *
++__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
++{
++	unsigned h = __xfrm4_spi_hash(daddr, spi, proto);
++	struct xfrm_state *x;
++
++	list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) {
++		if (x->props.family == AF_INET &&
++		    spi == x->id.spi &&
++		    daddr->a4 == x->id.daddr.a4 &&
++		    proto == x->id.proto) {
++			xfrm_state_hold(x);
++			return x;
++		}
++	}
++	return NULL;
++}
++
++static struct xfrm_state *
++__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, 
++		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
++		 int create)
++{
++	struct xfrm_state *x, *x0;
++	unsigned h = __xfrm4_dst_hash(daddr);
++
++	x0 = NULL;
++
++	list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) {
++		if (x->props.family == AF_INET &&
++		    daddr->a4 == x->id.daddr.a4 &&
++		    mode == x->props.mode &&
++		    proto == x->id.proto &&
++		    saddr->a4 == x->props.saddr.a4 &&
++		    reqid == x->props.reqid &&
++		    x->km.state == XFRM_STATE_ACQ &&
++		    !x->id.spi) {
++			    x0 = x;
++			    break;
++		    }
++	}
++	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
++		x0->sel.daddr.a4 = daddr->a4;
++		x0->sel.saddr.a4 = saddr->a4;
++		x0->sel.prefixlen_d = 32;
++		x0->sel.prefixlen_s = 32;
++		x0->props.saddr.a4 = saddr->a4;
++		x0->km.state = XFRM_STATE_ACQ;
++		x0->id.daddr.a4 = daddr->a4;
++		x0->id.proto = proto;
++		x0->props.family = AF_INET;
++		x0->props.mode = mode;
++		x0->props.reqid = reqid;
++		x0->props.family = AF_INET;
++		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
++		xfrm_state_hold(x0);
++		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
++		add_timer(&x0->timer);
++		xfrm_state_hold(x0);
++		list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h);
++		wake_up(&km_waitq);
++	}
++	if (x0)
++		xfrm_state_hold(x0);
++	return x0;
++}
++
++static struct xfrm_state_afinfo xfrm4_state_afinfo = {
++	.family			= AF_INET,
++	.lock			= RW_LOCK_UNLOCKED,
++	.init_tempsel		= __xfrm4_init_tempsel,
++	.state_lookup		= __xfrm4_state_lookup,
++	.find_acq		= __xfrm4_find_acq,
++};
++
++void __init xfrm4_state_init(void)
++{
++	xfrm_state_register_afinfo(&xfrm4_state_afinfo);
++}
++
++void __exit xfrm4_state_fini(void)
++{
++	xfrm_state_unregister_afinfo(&xfrm4_state_afinfo);
++}
++
+diff -Nru a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv4/xfrm4_tunnel.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,144 @@
++/* xfrm4_tunnel.c: Generic IP tunnel transformer.
++ *
++ * Copyright (C) 2003 David S. Miller (davem at redhat.com)
++ */
++
++#include <linux/skbuff.h>
++#include <linux/module.h>
++#include <net/xfrm.h>
++#include <net/ip.h>
++#include <net/protocol.h>
++
++static int ipip_output(struct sk_buff *skb)
++{
++	struct iphdr *iph;
++	
++	iph = skb->nh.iph;
++	iph->tot_len = htons(skb->len);
++	ip_send_check(iph);
++
++	return 0;
++}
++
++static int ipip_xfrm_rcv(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	return 0;
++}
++
++static struct xfrm_tunnel *ipip_handler;
++static DECLARE_MUTEX(xfrm4_tunnel_sem);
++
++int xfrm4_tunnel_register(struct xfrm_tunnel *handler)
++{
++	int ret;
++
++	down(&xfrm4_tunnel_sem);
++	ret = 0;
++	if (ipip_handler != NULL)
++		ret = -EINVAL;
++	if (!ret)
++		ipip_handler = handler;
++	up(&xfrm4_tunnel_sem);
++
++	return ret;
++}
++
++EXPORT_SYMBOL(xfrm4_tunnel_register);
++
++int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler)
++{
++	int ret;
++
++	down(&xfrm4_tunnel_sem);
++	ret = 0;
++	if (ipip_handler != handler)
++		ret = -EINVAL;
++	if (!ret)
++		ipip_handler = NULL;
++	up(&xfrm4_tunnel_sem);
++
++	synchronize_net();
++
++	return ret;
++}
++
++EXPORT_SYMBOL(xfrm4_tunnel_deregister);
++
++static int ipip_rcv(struct sk_buff *skb)
++{
++	struct xfrm_tunnel *handler = ipip_handler;
++
++	/* Tunnel devices take precedence.  */
++	if (handler && handler->handler(skb) == 0)
++		return 0;
++
++	return xfrm4_rcv(skb);
++}
++
++static void ipip_err(struct sk_buff *skb, u32 info)
++{
++	struct xfrm_tunnel *handler = ipip_handler;
++	u32 arg = info;
++
++	if (handler)
++		handler->err_handler(skb, &arg);
++}
++
++static int ipip_init_state(struct xfrm_state *x, void *args)
++{
++	if (!x->props.mode)
++		return -EINVAL;
++
++	if (x->encap)
++		return -EINVAL;
++
++	x->props.header_len = sizeof(struct iphdr);
++
++	return 0;
++}
++
++static void ipip_destroy(struct xfrm_state *x)
++{
++}
++
++static struct xfrm_type ipip_type = {
++	.description	= "IPIP",
++	.owner		= THIS_MODULE,
++	.proto	     	= IPPROTO_IPIP,
++	.init_state	= ipip_init_state,
++	.destructor	= ipip_destroy,
++	.input		= ipip_xfrm_rcv,
++	.output		= ipip_output
++};
++
++static struct inet_protocol ipip_protocol = {
++	.handler	=	ipip_rcv,
++	.err_handler	=	ipip_err,
++	.no_policy	=	1,
++};
++
++static int __init ipip_init(void)
++{
++	if (xfrm_register_type(&ipip_type, AF_INET) < 0) {
++		printk(KERN_INFO "ipip init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++	if (inet_add_protocol(&ipip_protocol, IPPROTO_IPIP) < 0) {
++		printk(KERN_INFO "ipip init: can't add protocol\n");
++		xfrm_unregister_type(&ipip_type, AF_INET);
++		return -EAGAIN;
++	}
++	return 0;
++}
++
++static void __exit ipip_fini(void)
++{
++	if (inet_del_protocol(&ipip_protocol, IPPROTO_IPIP) < 0)
++		printk(KERN_INFO "ipip close: can't remove protocol\n");
++	if (xfrm_unregister_type(&ipip_type, AF_INET) < 0)
++		printk(KERN_INFO "ipip close: can't remove xfrm type\n");
++}
++
++module_init(ipip_init);
++module_exit(ipip_fini);
++MODULE_LICENSE("GPL");
+diff -Nru a/net/ipv6/Config.in b/net/ipv6/Config.in
+--- a/net/ipv6/Config.in	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/Config.in	2005-02-13 21:25:09 +11:00
+@@ -2,9 +2,23 @@
+ # IPv6 configuration
+ # 
+ 
+-#bool '    IPv6: flow policy support' CONFIG_RT6_POLICY
+-#bool '    IPv6: firewall support' CONFIG_IPV6_FIREWALL
++bool 'IPv6: Privacy Extensions (RFC 3041) support' CONFIG_IPV6_PRIVACY
+ 
+ if [ "$CONFIG_NETFILTER" != "n" ]; then
+    source net/ipv6/netfilter/Config.in
++fi
++
++tristate 'IPv6: AH transformation' CONFIG_INET6_AH
++tristate 'IPv6: ESP transformation' CONFIG_INET6_ESP
++tristate 'IPv6: IPComp transformation' CONFIG_INET6_IPCOMP
++
++tristate 'IPv6: IPv6-in-IPv6 tunnel' CONFIG_IPV6_TUNNEL
++if [ "$CONFIG_IPV6_TUNNEL" = "y" -o "$CONFIG_INET6_IPCOMP" = "y" ]; then
++   define_tristate CONFIG_INET6_TUNNEL y
++else
++   if [ "$CONFIG_IPV6_TUNNEL" = "m" -o "$CONFIG_INET6_IPCOMP" = "m" ]; then
++      define_tristate CONFIG_INET6_TUNNEL m
++   else
++      tristate 'IPv6: tunnel transformation' CONFIG_INET6_TUNNEL
++   fi
+ fi
+diff -Nru a/net/ipv6/Makefile b/net/ipv6/Makefile
+--- a/net/ipv6/Makefile	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/Makefile	2005-02-13 21:25:09 +11:00
+@@ -9,16 +9,45 @@
+ 
+ O_TARGET := ipv6.o
+ 
+-obj-y :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
++mod-subdirs := netfilter
++
++ifeq ($(CONFIG_IPV6),m)
++obj-m += ipv6.o
++endif
++
++ipv6-objs :=	af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \
+ 		route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o raw.o \
+ 		protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
+ 		exthdrs.o sysctl_net_ipv6.o datagram.o proc.o \
+ 		ip6_flowlabel.o ipv6_syms.o
+ 
+-export-objs := ipv6_syms.o
++export-objs := ipv6_syms.o xfrm6_input.o xfrm6_tunnel.o
++
++ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \
++	xfrm6_output.o
++ipv6-objs += $(ipv6-y)
++
++obj-$(CONFIG_INET6_AH) += ah6.o
++obj-$(CONFIG_INET6_ESP) += esp6.o
++obj-$(CONFIG_INET6_IPCOMP) += ipcomp6.o
++obj-$(CONFIG_INET6_TUNNEL) += xfrm6_tunnel.o 
+ 
+-obj-m  := $(O_TARGET)
++obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
+ 
+-#obj-$(CONFIG_IPV6_FIREWALL) += ip6_fw.o
++subdir-$(CONFIG_NETFILTER) += netfilter
++
++ifeq ($(CONFIG_NETFILTER),y)
++obj-y += netfilter/netfilter.o
++endif
++
++ifeq ($(CONFIG_IPV6),y)
++obj-y += $(ipv6-objs)
++endif
+ 
+ include $(TOPDIR)/Rules.make
++
++
++ifeq ($(CONFIG_IPV6),m)
++ipv6.o: $(ipv6-objs)
++	$(LD) -r -o $@ $(ipv6-objs)
++endif
+diff -Nru a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
+--- a/net/ipv6/addrconf.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv6/addrconf.c	2005-02-13 21:25:10 +11:00
+@@ -28,6 +28,8 @@
+  *						packets.
+  *	YOSHIFUJI Hideaki @USAGI	:	improved accuracy of
+  *						address validation timer.
++ *	YOSHIFUJI Hideaki @USAGI	:	Privacy Extensions (RFC3041)
++ *						support.
+  *	Yuji SEKIYA @USAGI		:	Don't assign a same IPv6
+  *						address on a same interface.
+  *	YOSHIFUJI Hideaki @USAGI	:	ARCnet support
+@@ -66,6 +68,12 @@
+ #include <linux/if_tunnel.h>
+ #include <linux/rtnetlink.h>
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++#include <linux/random.h>
++#include <linux/crypto.h>
++#include <asm/scatterlist.h>
++#endif
++
+ #include <asm/uaccess.h>
+ 
+ #define IPV6_MAX_ADDRESSES 16
+@@ -87,6 +95,18 @@
+ int inet6_dev_count;
+ int inet6_ifa_count;
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++static int __ipv6_regen_rndid(struct inet6_dev *idev);
++static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); 
++static void ipv6_regen_rndid(unsigned long data);
++
++static int desync_factor = MAX_DESYNC_FACTOR * HZ;
++static struct crypto_tfm *md5_tfm;
++static spinlock_t md5_tfm_lock = SPIN_LOCK_UNLOCKED;
++#endif
++
++static int ipv6_count_addresses(struct inet6_dev *idev);
++
+ /*
+  *	Configured unicast address hash table
+  */
+@@ -125,6 +145,13 @@
+ 	MAX_RTR_SOLICITATIONS,		/* router solicits	*/
+ 	RTR_SOLICITATION_INTERVAL,	/* rtr solicit interval	*/
+ 	MAX_RTR_SOLICITATION_DELAY,	/* rtr solicit delay	*/
++#ifdef CONFIG_IPV6_PRIVACY
++	.use_tempaddr 			= 0,
++	.temp_valid_lft			= TEMP_VALID_LIFETIME,
++	.temp_prefered_lft		= TEMP_PREFERRED_LIFETIME,
++	.regen_max_retry		= REGEN_MAX_RETRY,
++	.max_desync_factor		= MAX_DESYNC_FACTOR,
++#endif
+ };
+ 
+ static struct ipv6_devconf ipv6_devconf_dflt =
+@@ -139,6 +166,13 @@
+ 	MAX_RTR_SOLICITATIONS,		/* router solicits	*/
+ 	RTR_SOLICITATION_INTERVAL,	/* rtr solicit interval	*/
+ 	MAX_RTR_SOLICITATION_DELAY,	/* rtr solicit delay	*/
++#ifdef CONFIG_IPV6_PRIVACY
++	.use_tempaddr			= 0,
++	.temp_valid_lft			= TEMP_VALID_LIFETIME,
++	.temp_prefered_lft		= TEMP_PREFERRED_LIFETIME,
++	.regen_max_retry		= REGEN_MAX_RETRY,
++	.max_desync_factor		= MAX_DESYNC_FACTOR,
++#endif
+ };
+ 
+ /* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
+@@ -170,15 +204,8 @@
+ 		};
+ 		return type;
+ 	}
+-	/* check for reserved anycast addresses */
+-	
+-	if ((st & htonl(0xE0000000)) &&
+-	    ((addr->s6_addr32[2] == htonl(0xFDFFFFFF) &&
+-	    (addr->s6_addr32[3] | htonl(0x7F)) == (u32)~0) ||
+-	    (addr->s6_addr32[2] == 0 && addr->s6_addr32[3] == 0)))
+-		type = IPV6_ADDR_ANYCAST;
+-	else
+-		type = IPV6_ADDR_UNICAST;
++
++	type = IPV6_ADDR_UNICAST;
+ 
+ 	/* Consider all addresses with the first three bits different of
+ 	   000 and 111 as finished.
+@@ -299,10 +326,32 @@
+ 		/* We refer to the device */
+ 		dev_hold(dev);
+ 
++		/* One reference from device.  We must do this before
++		 * we invoke __ipv6_regen_rndid().
++		 */
++		in6_dev_hold(ndev);
++
++#ifdef CONFIG_IPV6_PRIVACY
++		get_random_bytes(ndev->rndid, sizeof(ndev->rndid));
++		get_random_bytes(ndev->entropy, sizeof(ndev->entropy));
++		init_timer(&ndev->regen_timer);
++		ndev->regen_timer.function = ipv6_regen_rndid;
++		ndev->regen_timer.data = (unsigned long) ndev;
++		if ((dev->flags&IFF_LOOPBACK) ||
++		    dev->type == ARPHRD_TUNNEL ||
++		    dev->type == ARPHRD_SIT) {
++			printk(KERN_INFO
++				"Disabled Privacy Extensions on device %p(%s)\n",
++				dev, dev->name);
++			ndev->cnf.use_tempaddr = -1;
++		} else {
++			in6_dev_hold(ndev);
++			ipv6_regen_rndid((unsigned long) ndev);
++		}
++#endif
++
+ 		write_lock_bh(&addrconf_lock);
+ 		dev->ip6_ptr = ndev;
+-		/* One reference from device */
+-		in6_dev_hold(ndev);
+ 		write_unlock_bh(&addrconf_lock);
+ 
+ 		ipv6_mc_init_dev(ndev);
+@@ -330,38 +379,6 @@
+ 	return idev;
+ }
+ 
+-void ipv6_addr_prefix(struct in6_addr *prefix,
+-	struct in6_addr *addr, int prefix_len)
+-{
+-	unsigned long mask;
+-	int ncopy, nbits;
+-
+-	memset(prefix, 0, sizeof(*prefix));
+-
+-	if (prefix_len <= 0)
+-		return;
+-	if (prefix_len > 128)
+-		prefix_len = 128;
+-
+-	ncopy = prefix_len / 32;
+-	switch (ncopy) {
+-	case 4:	prefix->s6_addr32[3] = addr->s6_addr32[3];
+-	case 3:	prefix->s6_addr32[2] = addr->s6_addr32[2];
+-	case 2:	prefix->s6_addr32[1] = addr->s6_addr32[1];
+-	case 1:	prefix->s6_addr32[0] = addr->s6_addr32[0];
+-	case 0:	break;
+-	}
+-	nbits = prefix_len % 32;
+-	if (nbits == 0)
+-		return;
+-
+-	mask = ~((1 << (32 - nbits)) - 1);
+-	mask = htonl(mask);
+-
+-	prefix->s6_addr32[ncopy] = addr->s6_addr32[ncopy] & mask;
+-}
+-
+-
+ static void dev_forward_change(struct inet6_dev *idev)
+ {
+ 	struct net_device *dev;
+@@ -501,6 +518,18 @@
+ 	/* Add to inet6_dev unicast addr list. */
+ 	ifa->if_next = idev->addr_list;
+ 	idev->addr_list = ifa;
++
++#ifdef CONFIG_IPV6_PRIVACY
++	ifa->regen_count = 0;
++	if (ifa->flags&IFA_F_TEMPORARY) {
++		ifa->tmp_next = idev->tempaddr_list;
++		idev->tempaddr_list = ifa;
++		in6_ifa_hold(ifa);
++	} else {
++		ifa->tmp_next = NULL;
++	}
++#endif
++
+ 	in6_ifa_hold(ifa);
+ 	write_unlock_bh(&idev->lock);
+ 	read_unlock(&addrconf_lock);
+@@ -523,6 +552,15 @@
+ 
+ 	ifp->dead = 1;
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++	spin_lock_bh(&ifp->lock);
++	if (ifp->ifpub) {
++		__in6_ifa_put(ifp->ifpub);
++		ifp->ifpub = NULL;
++	}
++	spin_unlock_bh(&ifp->lock);
++#endif
++
+ 	write_lock_bh(&addrconf_hash_lock);
+ 	for (ifap = &inet6_addr_lst[hash]; (ifa=*ifap) != NULL;
+ 	     ifap = &ifa->lst_next) {
+@@ -536,6 +574,24 @@
+ 	write_unlock_bh(&addrconf_hash_lock);
+ 
+ 	write_lock_bh(&idev->lock);
++#ifdef CONFIG_IPV6_PRIVACY
++	if (ifp->flags&IFA_F_TEMPORARY) {
++		for (ifap = &idev->tempaddr_list; (ifa=*ifap) != NULL;
++		     ifap = &ifa->tmp_next) {
++			if (ifa == ifp) {
++				*ifap = ifa->tmp_next;
++				if (ifp->ifpub) {
++					__in6_ifa_put(ifp->ifpub);
++					ifp->ifpub = NULL;
++				}
++				__in6_ifa_put(ifp);
++				ifa->tmp_next = NULL;
++				break;
++			}
++		}
++	}
++#endif
++
+ 	for (ifap = &idev->addr_list; (ifa=*ifap) != NULL;
+ 	     ifap = &ifa->if_next) {
+ 		if (ifa == ifp) {
+@@ -556,6 +612,96 @@
+ 	in6_ifa_put(ifp);
+ }
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift)
++{
++	struct inet6_dev *idev;
++	struct in6_addr addr, *tmpaddr;
++	unsigned long tmp_prefered_lft, tmp_valid_lft;
++	int tmp_plen;
++	int ret = 0;
++
++	if (ift) {
++		spin_lock_bh(&ift->lock);
++		memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
++		spin_unlock_bh(&ift->lock);
++		tmpaddr = &addr;
++	} else {
++		tmpaddr = NULL;
++	}
++retry:
++	spin_lock_bh(&ifp->lock);
++	in6_ifa_hold(ifp);
++	idev = ifp->idev;
++	in6_dev_hold(idev);
++	memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
++	write_lock(&idev->lock);
++	if (idev->cnf.use_tempaddr <= 0) {
++		write_unlock(&idev->lock);
++		spin_unlock_bh(&ifp->lock);
++		printk(KERN_INFO
++			"ipv6_create_tempaddr(): use_tempaddr is disabled.\n");
++		in6_dev_put(idev);
++		in6_ifa_put(ifp);
++		ret = -1;
++		goto out;
++	}
++	if (ifp->regen_count++ >= idev->cnf.regen_max_retry) {
++		idev->cnf.use_tempaddr = -1;	/*XXX*/
++		write_unlock(&idev->lock);
++		spin_unlock_bh(&ifp->lock);
++		printk(KERN_WARNING
++			"ipv6_create_tempaddr(): regeneration time exceeded. disabled temporary address support.\n");
++		in6_dev_put(idev);
++		in6_ifa_put(ifp);
++		ret = -1;
++		goto out;
++	}
++	if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) {
++		write_unlock(&idev->lock);
++		spin_unlock_bh(&ifp->lock);
++		printk(KERN_WARNING
++			"ipv6_create_tempaddr(): regeneration of randomized interface id failed.\n");
++		in6_dev_put(idev);
++		in6_ifa_put(ifp);
++		ret = -1;
++		goto out;
++	}
++	memcpy(&addr.s6_addr[8], idev->rndid, 8);
++	tmp_valid_lft = min_t(__u32,
++			      ifp->valid_lft,
++			      idev->cnf.temp_valid_lft);
++	tmp_prefered_lft = min_t(__u32, 
++				 ifp->prefered_lft, 
++				 idev->cnf.temp_prefered_lft - desync_factor / HZ);
++	tmp_plen = ifp->prefix_len;
++	write_unlock(&idev->lock);
++	spin_unlock_bh(&ifp->lock);
++	ift = ipv6_count_addresses(idev) < IPV6_MAX_ADDRESSES ?
++		ipv6_add_addr(idev, &addr, tmp_plen,
++			      ipv6_addr_type(&addr)&IPV6_ADDR_SCOPE_MASK, IFA_F_TEMPORARY) : 0;
++	if (!ift || IS_ERR(ift)) {
++		in6_dev_put(idev);
++		in6_ifa_put(ifp);
++		printk(KERN_INFO
++			"ipv6_create_tempaddr(): retry temporary address regeneration.\n");
++		tmpaddr = &addr;
++		goto retry;
++	}
++	spin_lock_bh(&ift->lock);
++	ift->ifpub = ifp;
++	ift->valid_lft = tmp_valid_lft;
++	ift->prefered_lft = tmp_prefered_lft;
++	ift->tstamp = ifp->tstamp;
++	spin_unlock_bh(&ift->lock);
++	addrconf_dad_start(ift, 0);
++	in6_ifa_put(ift);
++	in6_dev_put(idev);
++out:
++	return ret;
++}
++#endif
++
+ /*
+  *	Choose an apropriate source address
+  *	should do:
+@@ -564,6 +710,22 @@
+  *		an address of the attached interface 
+  *	iii)	don't use deprecated addresses
+  */
++static int inline ipv6_saddr_pref(const struct inet6_ifaddr *ifp, u8 invpref)
++{
++	int pref;
++	pref = ifp->flags&IFA_F_DEPRECATED ? 0 : 2;
++#ifdef CONFIG_IPV6_PRIVACY
++	pref |= (ifp->flags^invpref)&IFA_F_TEMPORARY ? 0 : 1;
++#endif
++	return pref;
++}
++
++#ifdef CONFIG_IPV6_PRIVACY
++#define IPV6_GET_SADDR_MAXSCORE(score)	((score) == 3)
++#else
++#define IPV6_GET_SADDR_MAXSCORE(score)	(score)
++#endif
++
+ int ipv6_dev_get_saddr(struct net_device *dev,
+ 		   struct in6_addr *daddr, struct in6_addr *saddr, int onlink)
+ {
+@@ -572,6 +734,7 @@
+ 	struct inet6_dev *idev;
+ 	int scope;
+ 	int err;
++	int hiscore = -1, score;
+ 
+ 
+ 	if (!onlink)
+@@ -594,17 +757,27 @@
+ 			read_lock_bh(&idev->lock);
+ 			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ 				if (ifp->scope == scope) {
+-					if (!(ifp->flags & (IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
+-						in6_ifa_hold(ifp);
++					if (ifp->flags&IFA_F_TENTATIVE)
++						continue;
++#ifdef CONFIG_IPV6_PRIVACY
++					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
++#else
++					score = ipv6_saddr_pref(ifp, 0);
++#endif
++					if (score <= hiscore)
++						continue;
++
++					if (match)
++						in6_ifa_put(match);
++					match = ifp;
++					hiscore = score;
++					in6_ifa_hold(ifp);
++
++					if (IPV6_GET_SADDR_MAXSCORE(score)) {
+ 						read_unlock_bh(&idev->lock);
+ 						read_unlock(&addrconf_lock);
+ 						goto out;
+ 					}
+-
+-					if (!match && !(ifp->flags & IFA_F_TENTATIVE)) {
+-						match = ifp;
+-						in6_ifa_hold(ifp);
+-					}
+ 				}
+ 			}
+ 			read_unlock_bh(&idev->lock);
+@@ -627,16 +800,26 @@
+ 			read_lock_bh(&idev->lock);
+ 			for (ifp=idev->addr_list; ifp; ifp=ifp->if_next) {
+ 				if (ifp->scope == scope) {
+-					if (!(ifp->flags&(IFA_F_DEPRECATED|IFA_F_TENTATIVE))) {
+-						in6_ifa_hold(ifp);
++					if (ifp->flags&IFA_F_TENTATIVE)
++						continue;
++#ifdef CONFIG_IPV6_PRIVACY
++					score = ipv6_saddr_pref(ifp, idev->cnf.use_tempaddr > 1 ? IFA_F_TEMPORARY : 0);
++#else
++					score = ipv6_saddr_pref(ifp, 0);
++#endif
++					if (score <= hiscore)
++						continue;
++
++					if (match)
++						in6_ifa_put(match);
++					match = ifp;
++					hiscore = score;
++					in6_ifa_hold(ifp);
++
++					if (IPV6_GET_SADDR_MAXSCORE(score)) {
+ 						read_unlock_bh(&idev->lock);
+ 						goto out_unlock_base;
+ 					}
+-
+-					if (!match && !(ifp->flags&IFA_F_TENTATIVE)) {
+-						match = ifp;
+-						in6_ifa_hold(ifp);
+-					}
+ 				}
+ 			}
+ 			read_unlock_bh(&idev->lock);
+@@ -648,24 +831,16 @@
+ 	read_unlock(&dev_base_lock);
+ 
+ out:
+-	if (ifp == NULL) {
+-		ifp = match;
+-		match = NULL;
+-	}
+-
+ 	err = -EADDRNOTAVAIL;
+-	if (ifp) {
+-		ipv6_addr_copy(saddr, &ifp->addr);
++	if (match) {
++		ipv6_addr_copy(saddr, &match->addr);
+ 		err = 0;
+-		in6_ifa_put(ifp);
+-	}
+-	if (match)
+ 		in6_ifa_put(match);
++	}
+ 
+ 	return err;
+ }
+ 
+-
+ int ipv6_get_saddr(struct dst_entry *dst,
+ 		   struct in6_addr *daddr, struct in6_addr *saddr)
+ {
+@@ -706,7 +881,7 @@
+ 	return err;
+ }
+ 
+-int ipv6_count_addresses(struct inet6_dev *idev)
++static int ipv6_count_addresses(struct inet6_dev *idev)
+ {
+ 	int cnt = 0;
+ 	struct inet6_ifaddr *ifp;
+@@ -785,6 +960,21 @@
+ 		ifp->flags |= IFA_F_TENTATIVE;
+ 		spin_unlock_bh(&ifp->lock);
+ 		in6_ifa_put(ifp);
++#ifdef CONFIG_IPV6_PRIVACY
++	} else if (ifp->flags&IFA_F_TEMPORARY) {
++		struct inet6_ifaddr *ifpub;
++		spin_lock_bh(&ifp->lock);
++		ifpub = ifp->ifpub;
++		if (ifpub) {
++			in6_ifa_hold(ifpub);
++			spin_unlock_bh(&ifp->lock);
++			ipv6_create_tempaddr(ifpub, ifp);
++			in6_ifa_put(ifpub);
++		} else {
++			spin_unlock_bh(&ifp->lock);
++		}
++		ipv6_del_addr(ifp);
++#endif
+ 	} else
+ 		ipv6_del_addr(ifp);
+ }
+@@ -857,6 +1047,110 @@
+ 	return err;
+ }
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
++static int __ipv6_regen_rndid(struct inet6_dev *idev)
++{
++	struct net_device *dev;
++	struct scatterlist sg[2];
++
++	sg[0].page = virt_to_page(idev->entropy);
++	sg[0].offset = ((long) idev->entropy & ~PAGE_MASK);
++	sg[0].length = 8;
++	sg[1].page = virt_to_page(idev->work_eui64);
++	sg[1].offset = ((long) idev->work_eui64 & ~PAGE_MASK);
++	sg[1].length = 8;
++
++	dev = idev->dev;
++
++	if (ipv6_generate_eui64(idev->work_eui64, dev)) {
++		printk(KERN_INFO
++			"__ipv6_regen_rndid(idev=%p): cannot get EUI64 identifier; use random bytes.\n",
++			idev);
++		get_random_bytes(idev->work_eui64, sizeof(idev->work_eui64));
++	}
++regen:
++	spin_lock(&md5_tfm_lock);
++	if (unlikely(md5_tfm == NULL)) {
++		spin_unlock(&md5_tfm_lock);
++		return -1;
++	}
++	crypto_digest_init(md5_tfm);
++	crypto_digest_update(md5_tfm, sg, 2);
++	crypto_digest_final(md5_tfm, idev->work_digest);
++	spin_unlock(&md5_tfm_lock);
++
++	memcpy(idev->rndid, &idev->work_digest[0], 8);
++	idev->rndid[0] &= ~0x02;
++	memcpy(idev->entropy, &idev->work_digest[8], 8);
++
++	/*
++	 * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
++	 * check if generated address is not inappropriate
++	 *
++	 *  - Reserved subnet anycast (RFC 2526)
++	 *	11111101 11....11 1xxxxxxx
++	 *  - ISATAP (draft-ietf-ngtrans-isatap-01.txt) 4.3
++	 *	00-00-5E-FE-xx-xx-xx-xx
++	 *  - value 0
++	 *  - XXX: already assigned to an address on the device
++	 */
++	if (idev->rndid[0] == 0xfd && 
++	    (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) &&
++	    (idev->rndid[7]&0x80))
++		goto regen;
++	if ((idev->rndid[0]|idev->rndid[1]) == 0) {
++		if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
++			goto regen;
++		if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
++			goto regen;
++	}
++
++	return 0;
++}
++
++static void ipv6_regen_rndid(unsigned long data)
++{
++	struct inet6_dev *idev = (struct inet6_dev *) data;
++	unsigned long expires;
++
++	read_lock_bh(&addrconf_lock);
++	write_lock_bh(&idev->lock);
++
++	if (idev->dead)
++		goto out;
++
++	if (__ipv6_regen_rndid(idev) < 0)
++		goto out;
++	
++	expires = jiffies +
++		idev->cnf.temp_prefered_lft * HZ - 
++		idev->cnf.regen_max_retry * idev->cnf.dad_transmits * idev->nd_parms->retrans_time - desync_factor;
++	if (time_before(expires, jiffies)) {
++		printk(KERN_WARNING
++			"ipv6_regen_rndid(): too short regeneration interval; timer disabled for %s.\n",
++			idev->dev->name);
++		goto out;
++	}
++
++	if (!mod_timer(&idev->regen_timer, expires))
++		in6_dev_hold(idev);
++
++out:
++	write_unlock_bh(&idev->lock);
++	read_unlock_bh(&addrconf_lock);
++	in6_dev_put(idev);
++}
++
++static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) {
++	int ret = 0;
++
++	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
++		ret = __ipv6_regen_rndid(idev);
++	return ret;
++}
++#endif
++
+ /*
+  *	Add prefix route.
+  */
+@@ -883,7 +1177,7 @@
+ 	if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
+ 		rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
+ 
+-	ip6_route_add(&rtmsg, NULL);
++	ip6_route_add(&rtmsg, NULL, NULL);
+ }
+ 
+ /* Create "default" multicast route to the interface */
+@@ -900,7 +1194,7 @@
+ 	rtmsg.rtmsg_ifindex = dev->ifindex;
+ 	rtmsg.rtmsg_flags = RTF_UP;
+ 	rtmsg.rtmsg_type = RTMSG_NEWROUTE;
+-	ip6_route_add(&rtmsg, NULL);
++	ip6_route_add(&rtmsg, NULL, NULL);
+ }
+ 
+ static void sit_route_add(struct net_device *dev)
+@@ -917,7 +1211,7 @@
+ 	rtmsg.rtmsg_flags	= RTF_UP|RTF_NONEXTHOP;
+ 	rtmsg.rtmsg_ifindex	= dev->ifindex;
+ 
+-	ip6_route_add(&rtmsg, NULL);
++	ip6_route_add(&rtmsg, NULL, NULL);
+ }
+ 
+ static void addrconf_add_lroute(struct net_device *dev)
+@@ -948,7 +1242,6 @@
+ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
+ {
+ 	struct prefix_info *pinfo;
+-	struct rt6_info *rt;
+ 	__u32 valid_lft;
+ 	__u32 prefered_lft;
+ 	int addr_type;
+@@ -1004,32 +1297,33 @@
+ 	else
+ 		rt_expires = jiffies + valid_lft * HZ;
+ 
+-	rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
+-
+-	if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
+-		if (rt->rt6i_flags&RTF_EXPIRES) {
+-			if (pinfo->onlink == 0 || valid_lft == 0) {
+-				ip6_del_rt(rt, NULL);
+-				rt = NULL;
+-			} else {
+-				rt->rt6i_expires = rt_expires;
++	if (pinfo->onlink) {
++		struct rt6_info *rt;
++		rt = rt6_lookup(&pinfo->prefix, NULL, dev->ifindex, 1);
++
++		if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
++			if (rt->rt6i_flags&RTF_EXPIRES) {
++				if (valid_lft == 0) {
++					ip6_del_rt(rt, NULL, NULL);
++					rt = NULL;
++				} else {
++					rt->rt6i_expires = rt_expires;
++				}
+ 			}
++		} else if (valid_lft) {
++			addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
++					      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
+ 		}
+-	} else if (pinfo->onlink && valid_lft) {
+-		addrconf_prefix_route(&pinfo->prefix, pinfo->prefix_len,
+-				      dev, rt_expires, RTF_ADDRCONF|RTF_EXPIRES|RTF_PREFIX_RT);
++		if (rt)
++			dst_release(&rt->u.dst);
+ 	}
+-	if (rt)
+-		dst_release(&rt->u.dst);
+ 
+ 	/* Try to figure out our local address for this prefix */
+ 
+ 	if (pinfo->autoconf && in6_dev->cnf.autoconf) {
+ 		struct inet6_ifaddr * ifp;
+ 		struct in6_addr addr;
+-		int plen;
+-
+-		plen = pinfo->prefix_len >> 3;
++		int create = 0, update_lft = 0;
+ 
+ 		if (pinfo->prefix_len == 64) {
+ 			memcpy(&addr, &pinfo->prefix, 8);
+@@ -1058,33 +1352,95 @@
+ 				ifp = ipv6_add_addr(in6_dev, &addr, pinfo->prefix_len,
+ 						    addr_type&IPV6_ADDR_SCOPE_MASK, 0);
+ 
+-			if (IS_ERR(ifp)) {
++			if (!ifp || IS_ERR(ifp)) {
+ 				in6_dev_put(in6_dev);
+ 				return;
+ 			}
+ 
++			update_lft = create = 1;
+ 			addrconf_dad_start(ifp, RTF_ADDRCONF|RTF_PREFIX_RT);
+ 		}
+ 
+-		if (ifp && valid_lft == 0) {
+-			ipv6_del_addr(ifp);
+-			ifp = NULL;
+-		}
+-
+ 		if (ifp) {
+ 			int flags;
++			unsigned long now;
++#ifdef CONFIG_IPV6_PRIVACY
++			struct inet6_ifaddr *ift;
++#endif
++			u32 stored_lft;
+ 
++			/* update lifetime (RFC2462 5.5.3 e) */
+ 			spin_lock(&ifp->lock);
+-			ifp->valid_lft = valid_lft;
+-			ifp->prefered_lft = prefered_lft;
+-			ifp->tstamp = jiffies;
+-			flags = ifp->flags;
+-			ifp->flags &= ~IFA_F_DEPRECATED;
+-			spin_unlock(&ifp->lock);
+-
+-			if (!(flags&IFA_F_TENTATIVE))
+-				ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ?
+-						0 : RTM_NEWADDR, ifp);
++			now = jiffies;
++			if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
++				stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
++			else
++				stored_lft = 0;
++			if (!update_lft && stored_lft) {
++				if (valid_lft > MIN_VALID_LIFETIME ||
++				    valid_lft > stored_lft)
++					update_lft = 1;
++				else if (stored_lft <= MIN_VALID_LIFETIME) {
++					/* valid_lft <= stored_lft is always true */
++					/* XXX: IPsec */
++					update_lft = 0;
++				} else {
++					valid_lft = MIN_VALID_LIFETIME;
++					if (valid_lft < prefered_lft)
++						prefered_lft = valid_lft;
++					update_lft = 1;
++				}
++			}
++
++			if (update_lft) {
++				ifp->valid_lft = valid_lft;
++				ifp->prefered_lft = prefered_lft;
++				ifp->tstamp = now;
++				flags = ifp->flags;
++				ifp->flags &= ~IFA_F_DEPRECATED;
++				spin_unlock(&ifp->lock);
++
++				if (!(flags&IFA_F_TENTATIVE))
++					ipv6_ifa_notify((flags&IFA_F_DEPRECATED) ?
++							0 : RTM_NEWADDR, ifp);
++			} else
++				spin_unlock(&ifp->lock);
++
++#ifdef CONFIG_IPV6_PRIVACY
++			read_lock_bh(&in6_dev->lock);
++			/* update all temporary addresses in the list */
++			for (ift=in6_dev->tempaddr_list; ift; ift=ift->tmp_next) {
++				/*
++				 * When adjusting the lifetimes of an existing
++				 * temporary address, only lower the lifetimes.
++				 * Implementations must not increase the
++				 * lifetimes of an existing temporary address
++				 * when processing a Prefix Information Option.
++				 */
++				spin_lock(&ift->lock);
++				flags = ift->flags;
++				if (ift->valid_lft > valid_lft &&
++				    ift->valid_lft - valid_lft > (jiffies - ift->tstamp) / HZ)
++					ift->valid_lft = valid_lft + (jiffies - ift->tstamp) / HZ;
++				if (ift->prefered_lft > prefered_lft &&
++				    ift->prefered_lft - prefered_lft > (jiffies - ift->tstamp) / HZ)
++					ift->prefered_lft = prefered_lft + (jiffies - ift->tstamp) / HZ;
++				spin_unlock(&ift->lock);
++				if (!(flags&IFA_F_TENTATIVE))
++					ipv6_ifa_notify(0, ift);
++			}
++
++			if (create && in6_dev->cnf.use_tempaddr > 0) {
++				/*
++				 * When a new public address is created as described in [ADDRCONF],
++				 * also create a new temporary address.
++				 */
++				read_unlock_bh(&in6_dev->lock); 
++				ipv6_create_tempaddr(ifp, NULL);
++			} else {
++				read_unlock_bh(&in6_dev->lock);
++			}
++#endif
+ 			in6_ifa_put(ifp);
+ 			addrconf_verify(0);
+ 		}
+@@ -1407,6 +1763,54 @@
+ 		sit_route_add(dev);
+ }
+ 
++static inline int
++ipv6_inherit_linklocal(struct inet6_dev *idev, struct net_device *link_dev)
++{
++	struct in6_addr lladdr;
++
++	if (!ipv6_get_lladdr(link_dev, &lladdr)) {
++		addrconf_add_linklocal(idev, &lladdr);
++		return 0;
++	}
++	return -1;
++}
++
++static void ip6_tnl_add_linklocal(struct inet6_dev *idev)
++{
++	struct net_device *link_dev;
++
++	/* first try to inherit the link-local address from the link device */
++	if (idev->dev->iflink &&
++	    (link_dev = __dev_get_by_index(idev->dev->iflink))) {
++		if (!ipv6_inherit_linklocal(idev, link_dev))
++			return;
++	}
++	/* then try to inherit it from any device */
++	for (link_dev = dev_base; link_dev; link_dev = link_dev->next) {
++		if (!ipv6_inherit_linklocal(idev, link_dev))
++			return;
++	}
++	printk(KERN_DEBUG "init ip6-ip6: add_linklocal failed\n");
++}
++
++/*
++ * Autoconfigure tunnel with a link-local address so routing protocols,
++ * DHCPv6, MLD etc. can be run over the virtual link
++ */
++
++static void addrconf_ip6_tnl_config(struct net_device *dev)
++{
++	struct inet6_dev *idev;
++
++	ASSERT_RTNL();
++
++	if ((idev = addrconf_add_dev(dev)) == NULL) {
++		printk(KERN_DEBUG "init ip6-ip6: add_dev failed\n");
++		return;
++	}
++	ip6_tnl_add_linklocal(idev);
++	addrconf_add_mroute(dev);
++}
+ 
+ int addrconf_notify(struct notifier_block *this, unsigned long event, 
+ 		    void * data)
+@@ -1420,7 +1824,9 @@
+ 		case ARPHRD_SIT:
+ 			addrconf_sit_config(dev);
+ 			break;
+-
++		case ARPHRD_TUNNEL6:
++			addrconf_ip6_tnl_config(dev);
++			break;
+ 		case ARPHRD_LOOPBACK:
+ 			init_loopback(dev);
+ 			break;
+@@ -1515,6 +1921,27 @@
+ 	/* Step 3: clear address list */
+ 
+ 	write_lock_bh(&idev->lock);
++#ifdef CONFIG_IPV6_PRIVACY
++	if (how == 1 && del_timer(&idev->regen_timer))
++		in6_dev_put(idev);
++
++	/* clear tempaddr list */
++	while ((ifa = idev->tempaddr_list) != NULL) {
++		idev->tempaddr_list = ifa->tmp_next;
++		ifa->tmp_next = NULL;
++		ifa->dead = 1;
++		write_unlock_bh(&idev->lock);
++		spin_lock_bh(&ifa->lock);
++
++		if (ifa->ifpub) {
++			in6_ifa_put(ifa->ifpub);
++			ifa->ifpub = NULL;
++		}
++		spin_unlock_bh(&ifa->lock);
++		in6_ifa_put(ifa);
++		write_lock_bh(&idev->lock);
++	}
++#endif
+ 	while ((ifa = idev->addr_list) != NULL) {
+ 		idev->addr_list = ifa->if_next;
+ 		ifa->if_next = NULL;
+@@ -1539,10 +1966,11 @@
+ 	/* Shot the device (if unregistered) */
+ 
+ 	if (how == 1) {
+-		neigh_parms_release(&nd_tbl, idev->nd_parms);
+ #ifdef CONFIG_SYSCTL
+ 		addrconf_sysctl_unregister(&idev->cnf);
++		neigh_sysctl_unregister(idev->nd_parms);
+ #endif
++		neigh_parms_release(&nd_tbl, idev->nd_parms);
+ 		in6_dev_put(idev);
+ 	}
+ 	return 0;
+@@ -1592,7 +2020,7 @@
+ 
+ 		rtmsg.rtmsg_ifindex = ifp->idev->dev->ifindex;
+ 
+-		ip6_route_add(&rtmsg, NULL);
++		ip6_route_add(&rtmsg, NULL, NULL);
+ 	}
+ 
+ out:
+@@ -1612,7 +2040,8 @@
+ 	addrconf_join_solict(dev, &ifp->addr);
+ 
+ 	if (ifp->prefix_len != 128 && (ifp->flags&IFA_F_PERMANENT))
+-		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0, flags);
++		addrconf_prefix_route(&ifp->addr, ifp->prefix_len, dev, 0,
++					flags);
+ 
+ 	net_srandom(ifp->addr.s6_addr32[3]);
+ 	rand_num = net_random() % (ifp->idev->cnf.rtr_solicit_delay ? : 1);
+@@ -1682,6 +2111,7 @@
+ 	 */
+ 
+ 	if (ifp->idev->cnf.forwarding == 0 &&
++	    ifp->idev->cnf.rtr_solicits > 0 &&
+ 	    (dev->flags&IFF_LOOPBACK) == 0 &&
+ 	    (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)) {
+ 		struct in6_addr all_routers;
+@@ -1787,6 +2217,9 @@
+ 		write_lock(&addrconf_hash_lock);
+ 		for (ifp=inet6_addr_lst[i]; ifp; ifp=ifp->lst_next) {
+ 			unsigned long age;
++#ifdef CONFIG_IPV6_PRIVACY
++			unsigned long regen_advance;
++#endif
+ 
+ 			if (ifp->flags & IFA_F_PERMANENT)
+ 				continue;
+@@ -1794,6 +2227,12 @@
+ 			spin_lock(&ifp->lock);
+ 			age = (now - ifp->tstamp) / HZ;
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++			regen_advance = ifp->idev->cnf.regen_max_retry * 
++					ifp->idev->cnf.dad_transmits * 
++					ifp->idev->nd_parms->retrans_time / HZ;
++#endif
++
+ 			if (age >= ifp->valid_lft) {
+ 				spin_unlock(&ifp->lock);
+ 				in6_ifa_hold(ifp);
+@@ -1822,6 +2261,28 @@
+ 					in6_ifa_put(ifp);
+ 					goto restart;
+ 				}
++#ifdef CONFIG_IPV6_PRIVACY
++			} else if ((ifp->flags&IFA_F_TEMPORARY) &&
++				   !(ifp->flags&IFA_F_TENTATIVE)) {
++				if (age >= ifp->prefered_lft - regen_advance) {
++					struct inet6_ifaddr *ifpub = ifp->ifpub;
++					if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
++						next = ifp->tstamp + ifp->prefered_lft * HZ;
++					if (!ifp->regen_count && ifpub) {
++						ifp->regen_count++;
++						in6_ifa_hold(ifp);
++						in6_ifa_hold(ifpub);
++						spin_unlock(&ifp->lock);
++						write_unlock(&addrconf_hash_lock);
++						ipv6_create_tempaddr(ifpub, ifp);
++						in6_ifa_put(ifpub);
++						in6_ifa_put(ifp);
++						goto restart;
++					}
++				} else if (time_before(ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ, next))
++					next = ifp->tstamp + ifp->prefered_lft * HZ - regen_advance * HZ;
++				spin_unlock(&ifp->lock);
++#endif
+ 			} else {
+ 				/* ifp->prefered_lft <= ifp->valid_lft */
+ 				if (time_before(ifp->tstamp + ifp->prefered_lft * HZ, next))
+@@ -2106,7 +2567,7 @@
+ 
+ 	switch (event) {
+ 	case RTM_NEWADDR:
+-		ip6_rt_addr_add(&ifp->addr, ifp->idev->dev);
++		ip6_rt_addr_add(&ifp->addr, ifp->idev->dev, 0);
+ 		break;
+ 	case RTM_DELADDR:
+ 		addrconf_leave_solict(ifp->idev->dev, &ifp->addr);
+@@ -2157,7 +2618,7 @@
+ static struct addrconf_sysctl_table
+ {
+ 	struct ctl_table_header *sysctl_header;
+-	ctl_table addrconf_vars[11];
++	ctl_table addrconf_vars[16];
+ 	ctl_table addrconf_dev[2];
+ 	ctl_table addrconf_conf_dir[2];
+ 	ctl_table addrconf_proto_dir[2];
+@@ -2204,6 +2665,28 @@
+          &ipv6_devconf.rtr_solicit_delay, sizeof(int), 0644, NULL,
+          &proc_dointvec_jiffies},
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++	{NET_IPV6_USE_TEMPADDR, "use_tempaddr",
++	 &ipv6_devconf.use_tempaddr, sizeof(int), 0644, NULL,
++	 &proc_dointvec},
++
++	{NET_IPV6_TEMP_VALID_LFT, "temp_valid_lft",
++	 &ipv6_devconf.temp_valid_lft, sizeof(int), 0644, NULL,
++	 &proc_dointvec},
++
++	{NET_IPV6_TEMP_PREFERED_LFT, "temp_prefered_lft",
++	 &ipv6_devconf.temp_prefered_lft, sizeof(int), 0644, NULL,
++	 &proc_dointvec},
++
++	{NET_IPV6_REGEN_MAX_RETRY, "regen_max_retry",
++	 &ipv6_devconf.regen_max_retry, sizeof(int), 0644, NULL,
++	 &proc_dointvec},
++
++	{NET_IPV6_MAX_DESYNC_FACTOR, "max_desync_factor",
++	 &ipv6_devconf.max_desync_factor, sizeof(int), 0644, NULL,
++	 &proc_dointvec},
++#endif
++
+ 	{0}},
+ 
+ 	{{NET_PROTO_CONF_ALL, "all", NULL, 0, 0555, addrconf_sysctl.addrconf_vars},{0}},
+@@ -2222,7 +2705,7 @@
+ 	if (t == NULL)
+ 		return;
+ 	memcpy(t, &addrconf_sysctl, sizeof(*t));
+-	for (i=0; i<sizeof(t->addrconf_vars)/sizeof(t->addrconf_vars[0])-1; i++) {
++	for (i=0; t->addrconf_vars[i].data; i++) {
+ 		t->addrconf_vars[i].data += (char*)p - (char*)&ipv6_devconf;
+ 		t->addrconf_vars[i].de = NULL;
+ 		t->addrconf_vars[i].extra1 = idev; /* embedded; no ref */
+@@ -2285,7 +2768,16 @@
+ {
+ #ifdef MODULE
+ 	struct net_device *dev;
++#endif
+ 
++#ifdef CONFIG_IPV6_PRIVACY
++	md5_tfm = crypto_alloc_tfm("md5", 0);
++	if (unlikely(md5_tfm == NULL))
++		printk(KERN_WARNING
++			"failed to load transform for md5\n");
++#endif
++
++#ifdef MODULE
+ 	/* This takes sense only during module load. */
+ 	rtnl_lock();
+ 	for (dev = dev_base; dev; dev = dev->next) {
+@@ -2370,6 +2862,13 @@
+ 	del_timer(&addr_chk_timer);
+ 
+ 	rtnl_unlock();
++
++#ifdef CONFIG_IPV6_PRIVACY
++	if (likely(md5_tfm != NULL)) {
++		crypto_free_tfm(md5_tfm);
++		md5_tfm = NULL;
++	}
++#endif
+ 
+ #ifdef CONFIG_PROC_FS
+ 	proc_net_remove("if_inet6");
+diff -Nru a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
+--- a/net/ipv6/af_inet6.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/af_inet6.c	2005-02-13 21:25:09 +11:00
+@@ -58,6 +58,9 @@
+ #include <net/transp_v6.h>
+ #include <net/ip6_route.h>
+ #include <net/addrconf.h>
++#if CONFIG_IPV6_TUNNEL
++#include <net/ip6_tunnel.h>
++#endif
+ 
+ #include <asm/uaccess.h>
+ #include <asm/system.h>
+@@ -181,7 +184,7 @@
+ 	/* Init the ipv4 part of the socket since we can have sockets
+ 	 * using v6 API for ipv4.
+ 	 */
+-	sk->protinfo.af_inet.ttl	= 64;
++	sk->protinfo.af_inet.uc_ttl	= -1;
+ 
+ 	sk->protinfo.af_inet.mc_loop	= 1;
+ 	sk->protinfo.af_inet.mc_ttl	= 1;
+@@ -651,6 +654,11 @@
+ 	 */
+ 	inet6_register_protosw(&rawv6_protosw);
+ 
++	/* Register the family here so that the init calls below will
++	 * be able to create sockets. (?? is this dangerous ??)
++	 */
++	(void) sock_register(&inet6_family_ops);
++
+ 	/*
+ 	 *	ipngwg API draft makes clear that the correct semantics
+ 	 *	for TCP and UDP is to consider one TCP and UDP instance
+@@ -667,6 +675,11 @@
+ 	err = ndisc_init(&inet6_family_ops);
+ 	if (err)
+ 		goto ndisc_fail;
++#ifdef CONFIG_IPV6_TUNNEL
++	err = ip6_tunnel_init();
++	if (err)
++		goto ip6_tunnel_fail;
++#endif
+ 	err = igmp6_init(&inet6_family_ops);
+ 	if (err)
+ 		goto igmp_fail;
+@@ -692,15 +705,17 @@
+ 	ip6_flowlabel_init();
+ 	addrconf_init();
+ 	sit_init();
++
++	/* Init v6 extention headers. */
++	ipv6_rthdr_init();
+ 	ipv6_frag_init();
++	ipv6_nodata_init();
++	ipv6_destopt_init();
+ 
+ 	/* Init v6 transport protocols. */
+ 	udpv6_init();
+ 	tcpv6_init();
+ 
+-	/* Now the userspace is allowed to create INET6 sockets. */
+-	(void) sock_register(&inet6_family_ops);
+-	
+ 	return 0;
+ 
+ #ifdef CONFIG_PROC_FS
+@@ -718,6 +733,10 @@
+ 	igmp6_cleanup();
+ #endif
+ igmp_fail:
++#ifdef CONFIG_IPV6_TUNNEL
++	ip6_tunnel_cleanup();
++ip6_tunnel_fail:
++#endif
+ 	ndisc_cleanup();
+ ndisc_fail:
+ 	icmpv6_cleanup();
+@@ -751,6 +770,9 @@
+ 	ip6_route_cleanup();
+ 	ipv6_packet_cleanup();
+ 	igmp6_cleanup();
++#ifdef CONFIG_IPV6_TUNNEL
++	ip6_tunnel_cleanup();
++#endif
+ 	ndisc_cleanup();
+ 	icmpv6_cleanup();
+ #ifdef CONFIG_SYSCTL
+diff -Nru a/net/ipv6/ah6.c b/net/ipv6/ah6.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/ah6.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,481 @@
++/*
++ * Copyright (C)2002 USAGI/WIDE Project
++ * 
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * 
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ * 
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ * Authors
++ *
++ *	Mitsuru KANDA @USAGI       : IPv6 Support 
++ * 	Kazunori MIYAZAWA @USAGI   :
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ * 	
++ * 	This file is derived from net/ipv4/ah.c.
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/ah.h>
++#include <linux/crypto.h>
++#include <linux/pfkeyv2.h>
++#include <linux/string.h>
++#include <net/icmp.h>
++#include <net/ipv6.h>
++#include <net/xfrm.h>
++#include <asm/scatterlist.h>
++
++static int zero_out_mutable_opts(struct ipv6_opt_hdr *opthdr)
++{
++	u8 *opt = (u8 *)opthdr;
++	int len = ipv6_optlen(opthdr);
++	int off = 0;
++	int optlen = 0;
++
++	off += 2;
++	len -= 2;
++
++	while (len > 0) {
++
++		switch (opt[off]) {
++
++		case IPV6_TLV_PAD0:
++			optlen = 1;
++			break;
++		default:
++			if (len < 2) 
++				goto bad;
++			optlen = opt[off+1]+2;
++			if (len < optlen)
++				goto bad;
++			if (opt[off] & 0x20)
++				memset(&opt[off+2], 0, opt[off+1]);
++			break;
++		}
++
++		off += optlen;
++		len -= optlen;
++	}
++	if (len == 0)
++		return 1;
++
++bad:
++	return 0;
++}
++
++/**
++ *	ipv6_rearrange_rthdr - rearrange IPv6 routing header
++ *	@iph: IPv6 header
++ *	@rthdr: routing header
++ *
++ *	Rearrange the destination address in @iph and the addresses in @rthdr
++ *	so that they appear in the order they will at the final destination.
++ *	See Appendix A2 of RFC 2402 for details.
++ */
++static void ipv6_rearrange_rthdr(struct ipv6hdr *iph, struct ipv6_rt_hdr *rthdr)
++{
++	int segments, segments_left;
++	struct in6_addr *addrs;
++	struct in6_addr final_addr;
++
++	segments_left = rthdr->segments_left;
++	if (segments_left == 0)
++		return;
++	rthdr->segments_left = 0; 
++
++	/* The value of rthdr->hdrlen has been verified either by the system
++	 * call if it is locally generated, or by ipv6_rthdr_rcv() for incoming
++	 * packets.  So we can assume that it is even and that segments is
++	 * greater than or equal to segments_left.
++	 *
++	 * For the same reason we can assume that this option is of type 0.
++	 */
++	segments = rthdr->hdrlen >> 1;
++
++	addrs = ((struct rt0_hdr *)rthdr)->addr;
++	ipv6_addr_copy(&final_addr, addrs + segments - 1);
++
++	addrs += segments - segments_left;
++	memmove(addrs + 1, addrs, (segments_left - 1) * sizeof(*addrs));
++
++	ipv6_addr_copy(addrs, &iph->daddr);
++	ipv6_addr_copy(&iph->daddr, &final_addr);
++}
++
++static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len)
++{
++	union {
++		struct ipv6hdr *iph;
++		struct ipv6_opt_hdr *opth;
++		struct ipv6_rt_hdr *rth;
++		char *raw;
++	} exthdr = { .iph = iph };
++	char *end = exthdr.raw + len;
++	int nexthdr = iph->nexthdr;
++
++	exthdr.iph++;
++
++	while (exthdr.raw < end) {
++		switch (nexthdr) {
++		case NEXTHDR_HOP:
++		case NEXTHDR_DEST:
++			if (!zero_out_mutable_opts(exthdr.opth)) {
++				if (net_ratelimit())
++					printk(KERN_WARNING "overrun %sopts\n",
++						nexthdr == NEXTHDR_HOP ?
++							"hop" : "dest");
++				return -EINVAL;
++			}
++			break;
++
++		case NEXTHDR_ROUTING:
++			ipv6_rearrange_rthdr(iph, exthdr.rth);
++			break;
++
++		default :
++			return 0;
++		}
++
++		nexthdr = exthdr.opth->nexthdr;
++		exthdr.raw += ipv6_optlen(exthdr.opth);
++	}
++
++	return 0;
++}
++
++static int ah6_output(struct sk_buff *skb)
++{
++	int err;
++	int extlen;
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x  = dst->xfrm;
++	struct ipv6hdr *top_iph;
++	struct ip_auth_hdr *ah;
++	struct ah_data *ahp;
++	u8 nexthdr;
++	char tmp_base[8];
++	struct {
++		struct in6_addr daddr;
++		char hdrs[0];
++	} *tmp_ext;
++
++	top_iph = (struct ipv6hdr *)skb->data;
++	top_iph->payload_len = htons(skb->len - sizeof(*top_iph));
++
++	nexthdr = *skb->nh.raw;
++	*skb->nh.raw = IPPROTO_AH;
++
++	/* When there are no extension headers, we only need to save the first
++	 * 8 bytes of the base IP header.
++	 */
++	memcpy(tmp_base, top_iph, sizeof(tmp_base));
++
++	tmp_ext = NULL;
++	extlen = skb->h.raw - (unsigned char *)(top_iph + 1);
++	if (extlen) {
++		extlen += sizeof(*tmp_ext);
++		tmp_ext = kmalloc(extlen, GFP_ATOMIC);
++		if (!tmp_ext) {
++			err = -ENOMEM;
++			goto error;
++		}
++		memcpy(tmp_ext, &top_iph->daddr, extlen);
++		err = ipv6_clear_mutable_options(top_iph,
++						 extlen - sizeof(*tmp_ext) +
++						 sizeof(*top_iph));
++		if (err)
++			goto error_free_iph;
++	}
++
++	ah = (struct ip_auth_hdr *)skb->h.raw;
++	ah->nexthdr = nexthdr;
++
++	top_iph->priority    = 0;
++	top_iph->flow_lbl[0] = 0;
++	top_iph->flow_lbl[1] = 0;
++	top_iph->flow_lbl[2] = 0;
++	top_iph->hop_limit   = 0;
++
++	ahp = x->data;
++	ah->hdrlen  = (XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + 
++				   ahp->icv_trunc_len) >> 2) - 2;
++
++	ah->reserved = 0;
++	ah->spi = x->id.spi;
++	ah->seq_no = htonl(++x->replay.oseq);
++	ahp->icv(ahp, skb, ah->auth_data);
++
++	err = 0;
++
++	memcpy(top_iph, tmp_base, sizeof(tmp_base));
++	if (tmp_ext) {
++		memcpy(&top_iph->daddr, tmp_ext, extlen);
++error_free_iph:
++		kfree(tmp_ext);
++	}
++
++error:
++	return err;
++}
++
++static int ah6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	/*
++	 * Before process AH
++	 * [IPv6][Ext1][Ext2][AH][Dest][Payload]
++	 * |<-------------->| hdr_len
++	 *
++	 * To erase AH:
++	 * Keeping copy of cleared headers. After AH processing,
++	 * Moving the pointer of skb->nh.raw by using skb_pull as long as AH
++	 * header length. Then copy back the copy as long as hdr_len
++	 * If destination header following AH exists, copy it into after [Ext2].
++	 * 
++	 * |<>|[IPv6][Ext1][Ext2][Dest][Payload]
++	 * There is offset of AH before IPv6 header after the process.
++	 */
++
++	struct ipv6_auth_hdr *ah;
++	struct ah_data *ahp;
++	unsigned char *tmp_hdr = NULL;
++	u16 hdr_len;
++	u16 ah_hlen;
++	int nexthdr;
++
++	if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr)))
++		goto out;
++
++	/* We are going to _remove_ AH header to keep sockets happy,
++	 * so... Later this can change. */
++	if (skb_cloned(skb) &&
++	    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++		goto out;
++
++	hdr_len = skb->data - skb->nh.raw;
++	ah = (struct ipv6_auth_hdr*)skb->data;
++	ahp = x->data;
++	nexthdr = ah->nexthdr;
++	ah_hlen = (ah->hdrlen + 2) << 2;
++
++        if (ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_full_len) &&
++            ah_hlen != XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len))
++                goto out;
++
++	if (!pskb_may_pull(skb, ah_hlen))
++		goto out;
++
++	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
++	if (!tmp_hdr)
++		goto out;
++	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
++	if (ipv6_clear_mutable_options(skb->nh.ipv6h, hdr_len))
++		goto out;
++	skb->nh.ipv6h->priority    = 0;
++	skb->nh.ipv6h->flow_lbl[0] = 0;
++	skb->nh.ipv6h->flow_lbl[1] = 0;
++	skb->nh.ipv6h->flow_lbl[2] = 0;
++	skb->nh.ipv6h->hop_limit   = 0;
++
++        {
++		u8 auth_data[MAX_AH_AUTH_LEN];
++
++		memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len);
++		memset(ah->auth_data, 0, ahp->icv_trunc_len);
++		skb_push(skb, skb->data - skb->nh.raw);
++		ahp->icv(ahp, skb, ah->auth_data);
++		if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) {
++			if (net_ratelimit())
++				printk(KERN_WARNING "ipsec ah authentication error\n");
++			x->stats.integrity_failed++;
++			goto free_out;
++		}
++	}
++
++	skb->nh.raw = skb_pull(skb, ah_hlen);
++	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
++	skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++	skb_pull(skb, hdr_len);
++	skb->h.raw = skb->data;
++
++
++	kfree(tmp_hdr);
++
++	return nexthdr;
++
++free_out:
++	kfree(tmp_hdr);
++out:
++	return -EINVAL;
++}
++
++static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, 
++                    int type, int code, int offset, __u32 info)
++{
++	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
++	struct ip_auth_hdr *ah = (struct ip_auth_hdr*)(skb->data+offset);
++	struct xfrm_state *x;
++
++	if (type != ICMPV6_DEST_UNREACH &&
++	    type != ICMPV6_PKT_TOOBIG)
++		return;
++
++	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6);
++	if (!x)
++		return;
++
++	printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/"
++			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
++	       ntohl(ah->spi), NIP6(iph->daddr));
++
++	xfrm_state_put(x);
++}
++
++static int ah6_init_state(struct xfrm_state *x, void *args)
++{
++	struct ah_data *ahp = NULL;
++	struct xfrm_algo_desc *aalg_desc;
++
++	if (!x->aalg)
++		goto error;
++
++	/* null auth can use a zero length key */
++	if (x->aalg->alg_key_len > 512)
++		goto error;
++
++	if (x->encap)
++		goto error;
++
++	ahp = kmalloc(sizeof(*ahp), GFP_KERNEL);
++	if (ahp == NULL)
++		return -ENOMEM;
++
++	memset(ahp, 0, sizeof(*ahp));
++
++	ahp->key = x->aalg->alg_key;
++	ahp->key_len = (x->aalg->alg_key_len+7)/8;
++	ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
++	if (!ahp->tfm)
++		goto error;
++	ahp->icv = ah_hmac_digest;
++	
++	/*
++	 * Lookup the algorithm description maintained by xfrm_algo,
++	 * verify crypto transform properties, and store information
++	 * we need for AH processing.  This lookup cannot fail here
++	 * after a successful crypto_alloc_tfm().
++	 */
++	aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
++	BUG_ON(!aalg_desc);
++
++	if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
++	    crypto_tfm_alg_digestsize(ahp->tfm)) {
++		printk(KERN_INFO "AH: %s digestsize %u != %hu\n",
++		       x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm),
++		       aalg_desc->uinfo.auth.icv_fullbits/8);
++		goto error;
++	}
++	
++	ahp->icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
++	ahp->icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
++	
++	BUG_ON(ahp->icv_trunc_len > MAX_AH_AUTH_LEN);
++	
++	ahp->work_icv = kmalloc(ahp->icv_full_len, GFP_KERNEL);
++	if (!ahp->work_icv)
++		goto error;
++	
++	x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len);
++	if (x->props.mode)
++		x->props.header_len += sizeof(struct ipv6hdr);
++	x->data = ahp;
++
++	return 0;
++
++error:
++	if (ahp) {
++		if (ahp->work_icv)
++			kfree(ahp->work_icv);
++		if (ahp->tfm)
++			crypto_free_tfm(ahp->tfm);
++		kfree(ahp);
++	}
++	return -EINVAL;
++}
++
++static void ah6_destroy(struct xfrm_state *x)
++{
++	struct ah_data *ahp = x->data;
++
++	if (!ahp)
++		return;
++
++	if (ahp->work_icv) {
++		kfree(ahp->work_icv);
++		ahp->work_icv = NULL;
++	}
++	if (ahp->tfm) {
++		crypto_free_tfm(ahp->tfm);
++		ahp->tfm = NULL;
++	}
++	kfree(ahp);
++}
++
++static struct xfrm_type ah6_type =
++{
++	.description	= "AH6",
++	.owner		= THIS_MODULE,
++	.proto	     	= IPPROTO_AH,
++	.init_state	= ah6_init_state,
++	.destructor	= ah6_destroy,
++	.input		= ah6_input,
++	.output		= ah6_output
++};
++
++static struct inet6_protocol ah6_protocol = {
++	.handler	=	xfrm6_rcv,
++	.err_handler	=	ah6_err,
++	.flags		=	INET6_PROTO_NOPOLICY,
++};
++
++static int __init ah6_init(void)
++{
++	if (xfrm_register_type(&ah6_type, AF_INET6) < 0) {
++		printk(KERN_INFO "ipv6 ah init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++
++	if (inet6_add_protocol(&ah6_protocol, IPPROTO_AH) < 0) {
++		printk(KERN_INFO "ipv6 ah init: can't add protocol\n");
++		xfrm_unregister_type(&ah6_type, AF_INET6);
++		return -EAGAIN;
++	}
++
++	return 0;
++}
++
++static void __exit ah6_fini(void)
++{
++	if (inet6_del_protocol(&ah6_protocol, IPPROTO_AH) < 0)
++		printk(KERN_INFO "ipv6 ah close: can't remove protocol\n");
++
++	if (xfrm_unregister_type(&ah6_type, AF_INET6) < 0)
++		printk(KERN_INFO "ipv6 ah close: can't remove xfrm type\n");
++
++}
++
++module_init(ah6_init);
++module_exit(ah6_fini);
++
++MODULE_LICENSE("GPL");
+diff -Nru a/net/ipv6/anycast.c b/net/ipv6/anycast.c
+--- a/net/ipv6/anycast.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv6/anycast.c	2005-02-13 21:25:10 +11:00
+@@ -95,7 +95,6 @@
+ 	return onlink;
+ }
+ 
+-
+ /*
+  *	socket join an anycast group
+  */
+@@ -109,8 +108,12 @@
+ 	int	ishost = !ipv6_devconf.forwarding;
+ 	int	err = 0;
+ 
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
+ 	if (ipv6_addr_type(addr) & IPV6_ADDR_MULTICAST)
+ 		return -EINVAL;
++	if (ipv6_chk_addr(addr, NULL))
++		return -EINVAL;
+ 
+ 	pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL);
+ 	if (pac == NULL)
+@@ -160,21 +163,12 @@
+ 	 * For hosts, allow link-local or matching prefix anycasts.
+ 	 * This obviates the need for propagating anycast routes while
+ 	 * still allowing some non-router anycast participation.
+-	 *
+-	 * allow anyone to join anycasts that don't require a special route
+-	 * and can't be spoofs of unicast addresses (reserved anycast only)
+ 	 */
+ 	if (!ip6_onlink(addr, dev)) {
+ 		if (ishost)
+ 			err = -EADDRNOTAVAIL;
+-		else if (!capable(CAP_NET_ADMIN))
+-			err = -EPERM;
+ 		if (err)
+ 			goto out_dev_put;
+-	} else if (!(ipv6_addr_type(addr) & IPV6_ADDR_ANYCAST) &&
+-		   !capable(CAP_NET_ADMIN)) {
+-		err = -EPERM;
+-		goto out_dev_put;
+ 	}
+ 
+ 	err = ipv6_dev_ac_inc(dev, addr);
+@@ -265,6 +259,13 @@
+ 		dev_put(dev);
+ }
+ 
++#if 0
++/* The function is not used, which is funny. Apparently, author
++ * supposed to use it to filter out datagrams inside udp/raw but forgot.
++ *
++ * It is OK, anycasts are not special comparing to delivery to unicasts.
++ */
++
+ int inet6_ac_check(struct sock *sk, struct in6_addr *addr, int ifindex)
+ {
+ 	struct ipv6_ac_socklist *pac;
+@@ -285,6 +286,8 @@
+ 	return found;
+ }
+ 
++#endif
++
+ static void aca_put(struct ifacaddr6 *ac)
+ {
+ 	if (atomic_dec_and_test(&ac->aca_refcnt)) {
+@@ -346,7 +349,7 @@
+ 	idev->ac_list = aca;
+ 	write_unlock_bh(&idev->lock);
+ 
+-	ip6_rt_addr_add(&aca->aca_addr, dev);
++	ip6_rt_addr_add(&aca->aca_addr, dev, 1);
+ 
+ 	addrconf_join_solict(dev, &aca->aca_addr);
+ 
+diff -Nru a/net/ipv6/datagram.c b/net/ipv6/datagram.c
+--- a/net/ipv6/datagram.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/datagram.c	2005-02-13 21:25:09 +11:00
+@@ -78,7 +78,7 @@
+ 
+ 	iph = (struct ipv6hdr*)skb_put(skb, sizeof(struct ipv6hdr));
+ 	skb->nh.ipv6h = iph;
+-	memcpy(&iph->daddr, fl->fl6_dst, 16);
++	ipv6_addr_copy(&iph->daddr, &fl->fl6_dst);
+ 
+ 	serr = SKB_EXT_ERR(skb);
+ 	serr->ee.ee_errno = err;
+@@ -89,7 +89,7 @@
+ 	serr->ee.ee_info = info;
+ 	serr->ee.ee_data = 0;
+ 	serr->addr_offset = (u8*)&iph->daddr - skb->nh.raw;
+-	serr->port = fl->uli_u.ports.dport;
++	serr->port = fl->fl_ip_dport;
+ 
+ 	skb->h.raw = skb->tail;
+ 	__skb_pull(skb, skb->tail - skb->data);
+@@ -289,7 +289,8 @@
+ 					goto exit_f;
+ 				}
+ 
+-				fl->fl6_src = &src_info->ipi6_addr;
++				ipv6_addr_copy(&fl->fl6_src,
++					       &src_info->ipi6_addr);
+ 			}
+ 
+ 			break;
+diff -Nru a/net/ipv6/esp6.c b/net/ipv6/esp6.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/esp6.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,432 @@
++/*
++ * Copyright (C)2002 USAGI/WIDE Project
++ * 
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * 
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ * 
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ * Authors
++ *
++ *	Mitsuru KANDA @USAGI       : IPv6 Support 
++ * 	Kazunori MIYAZAWA @USAGI   :
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ * 	
++ * 	This file is derived from net/ipv4/esp.c
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/esp.h>
++#include <asm/scatterlist.h>
++#include <linux/crypto.h>
++#include <linux/pfkeyv2.h>
++#include <linux/random.h>
++#include <net/icmp.h>
++#include <net/ipv6.h>
++#include <linux/icmpv6.h>
++
++static int esp6_output(struct sk_buff *skb)
++{
++	int err;
++	int hdr_len;
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x  = dst->xfrm;
++	struct ipv6hdr *top_iph;
++	struct ipv6_esp_hdr *esph;
++	struct crypto_tfm *tfm;
++	struct esp_data *esp;
++	struct sk_buff *trailer;
++	int blksize;
++	int clen;
++	int alen;
++	int nfrags;
++
++	esp = x->data;
++	hdr_len = skb->h.raw - skb->data +
++		  sizeof(*esph) + esp->conf.ivlen;
++
++	/* Strip IP+ESP header. */
++	__skb_pull(skb, hdr_len);
++
++	/* Now skb is pure payload to encrypt */
++	err = -ENOMEM;
++
++	/* Round to block size */
++	clen = skb->len;
++
++	alen = esp->auth.icv_trunc_len;
++	tfm = esp->conf.tfm;
++	blksize = (crypto_tfm_alg_blocksize(tfm) + 3) & ~3;
++	clen = (clen + 2 + blksize-1)&~(blksize-1);
++	if (esp->conf.padlen)
++		clen = (clen + esp->conf.padlen-1)&~(esp->conf.padlen-1);
++
++	if ((nfrags = skb_cow_data(skb, clen-skb->len+alen, &trailer)) < 0) {
++		goto error;
++	}
++
++	/* Fill padding... */
++	do {
++		int i;
++		for (i=0; i<clen-skb->len - 2; i++)
++			*(u8*)(trailer->tail + i) = i+1;
++	} while (0);
++	*(u8*)(trailer->tail + clen-skb->len - 2) = (clen - skb->len)-2;
++	pskb_put(skb, trailer, clen - skb->len);
++
++	top_iph = (struct ipv6hdr *)__skb_push(skb, hdr_len);
++	esph = (struct ipv6_esp_hdr *)skb->h.raw;
++	top_iph->payload_len = htons(skb->len + alen - sizeof(*top_iph));
++	*(u8*)(trailer->tail - 1) = *skb->nh.raw;
++	*skb->nh.raw = IPPROTO_ESP;
++
++	esph->spi = x->id.spi;
++	esph->seq_no = htonl(++x->replay.oseq);
++
++	if (esp->conf.ivlen)
++		crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
++
++	do {
++		struct scatterlist *sg = &esp->sgbuf[0];
++
++		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
++			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
++			if (!sg)
++				goto error;
++		}
++		skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen);
++		crypto_cipher_encrypt(tfm, sg, sg, clen);
++		if (unlikely(sg != &esp->sgbuf[0]))
++			kfree(sg);
++	} while (0);
++
++	if (esp->conf.ivlen) {
++		memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
++		crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm));
++	}
++
++	if (esp->auth.icv_full_len) {
++		esp->auth.icv(esp, skb, (u8*)esph-skb->data,
++			sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail);
++		pskb_put(skb, trailer, alen);
++	}
++
++	err = 0;
++
++error:
++	return err;
++}
++
++static int esp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	struct ipv6hdr *iph;
++	struct ipv6_esp_hdr *esph;
++	struct esp_data *esp = x->data;
++	struct sk_buff *trailer;
++	int blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
++	int alen = esp->auth.icv_trunc_len;
++	int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen;
++
++	int hdr_len = skb->h.raw - skb->nh.raw;
++	int nfrags;
++	unsigned char *tmp_hdr = NULL;
++	int ret = 0;
++
++	if (!pskb_may_pull(skb, sizeof(struct ipv6_esp_hdr))) {
++		ret = -EINVAL;
++		goto out_nofree;
++	}
++
++	if (elen <= 0 || (elen & (blksize-1))) {
++		ret = -EINVAL;
++		goto out_nofree;
++	}
++
++	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
++	if (!tmp_hdr) {
++		ret = -ENOMEM;
++		goto out_nofree;
++	}
++	memcpy(tmp_hdr, skb->nh.raw, hdr_len);
++
++	/* If integrity check is required, do this. */
++        if (esp->auth.icv_full_len) {
++		u8 sum[esp->auth.icv_full_len];
++		u8 sum1[alen];
++
++		esp->auth.icv(esp, skb, 0, skb->len-alen, sum);
++
++		if (skb_copy_bits(skb, skb->len-alen, sum1, alen))
++			BUG();
++
++		if (unlikely(memcmp(sum, sum1, alen))) {
++			x->stats.integrity_failed++;
++			ret = -EINVAL;
++			goto out;
++		}
++	}
++
++	if ((nfrags = skb_cow_data(skb, 0, &trailer)) < 0) {
++		ret = -EINVAL;
++		goto out;
++	}
++
++	skb->ip_summed = CHECKSUM_NONE;
++
++	esph = (struct ipv6_esp_hdr*)skb->data;
++	iph = skb->nh.ipv6h;
++
++	/* Get ivec. This can be wrong, check against another impls. */
++	if (esp->conf.ivlen)
++		crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm));
++
++        {
++		u8 nexthdr[2];
++		struct scatterlist *sg = &esp->sgbuf[0];
++		u8 padlen;
++
++		if (unlikely(nfrags > ESP_NUM_FAST_SG)) {
++			sg = kmalloc(sizeof(struct scatterlist)*nfrags, GFP_ATOMIC);
++			if (!sg) {
++				ret = -ENOMEM;
++				goto out;
++			}
++		}
++		skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen);
++		crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen);
++		if (unlikely(sg != &esp->sgbuf[0]))
++			kfree(sg);
++
++		if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2))
++			BUG();
++
++		padlen = nexthdr[0];
++		if (padlen+2 >= elen) {
++			if (net_ratelimit()) {
++				printk(KERN_WARNING "ipsec esp packet is garbage padlen=%d, elen=%d\n", padlen+2, elen);
++			}
++			ret = -EINVAL;
++			goto out;
++		}
++		/* ... check padding bits here. Silly. :-) */ 
++
++		pskb_trim(skb, skb->len - alen - padlen - 2);
++		skb->h.raw = skb_pull(skb, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen);
++		skb->nh.raw += sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
++		memcpy(skb->nh.raw, tmp_hdr, hdr_len);
++		skb->nh.ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++		ret = nexthdr[1];
++	}
++
++out:
++	kfree(tmp_hdr);
++out_nofree:
++	return ret;
++}
++
++static u32 esp6_get_max_size(struct xfrm_state *x, int mtu)
++{
++	struct esp_data *esp = x->data;
++	u32 blksize = crypto_tfm_alg_blocksize(esp->conf.tfm);
++
++	if (x->props.mode) {
++		mtu = (mtu + 2 + blksize-1)&~(blksize-1);
++	} else {
++		/* The worst case. */
++		mtu += 2 + blksize;
++	}
++	if (esp->conf.padlen)
++		mtu = (mtu + esp->conf.padlen-1)&~(esp->conf.padlen-1);
++
++	return mtu + x->props.header_len + esp->auth.icv_full_len;
++}
++
++static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
++                     int type, int code, int offset, __u32 info)
++{
++	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
++	struct ipv6_esp_hdr *esph = (struct ipv6_esp_hdr*)(skb->data+offset);
++	struct xfrm_state *x;
++
++	if (type != ICMPV6_DEST_UNREACH && 
++	    type != ICMPV6_PKT_TOOBIG)
++		return;
++
++	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6);
++	if (!x)
++		return;
++	printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/"
++			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
++			ntohl(esph->spi), NIP6(iph->daddr));
++	xfrm_state_put(x);
++}
++
++static void esp6_destroy(struct xfrm_state *x)
++{
++	struct esp_data *esp = x->data;
++
++	if (!esp)
++		return;
++
++	if (esp->conf.tfm) {
++		crypto_free_tfm(esp->conf.tfm);
++		esp->conf.tfm = NULL;
++	}
++	if (esp->conf.ivec) {
++		kfree(esp->conf.ivec);
++		esp->conf.ivec = NULL;
++	}
++	if (esp->auth.tfm) {
++		crypto_free_tfm(esp->auth.tfm);
++		esp->auth.tfm = NULL;
++	}
++	if (esp->auth.work_icv) {
++		kfree(esp->auth.work_icv);
++		esp->auth.work_icv = NULL;
++	}
++	kfree(esp);
++}
++
++static int esp6_init_state(struct xfrm_state *x, void *args)
++{
++	struct esp_data *esp = NULL;
++
++	/* null auth and encryption can have zero length keys */
++	if (x->aalg) {
++		if (x->aalg->alg_key_len > 512)
++			goto error;
++	}
++	if (x->ealg == NULL)
++		goto error;
++
++	if (x->encap)
++		goto error;
++
++	esp = kmalloc(sizeof(*esp), GFP_KERNEL);
++	if (esp == NULL)
++		return -ENOMEM;
++
++	memset(esp, 0, sizeof(*esp));
++
++	if (x->aalg) {
++		struct xfrm_algo_desc *aalg_desc;
++
++		esp->auth.key = x->aalg->alg_key;
++		esp->auth.key_len = (x->aalg->alg_key_len+7)/8;
++		esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0);
++		if (esp->auth.tfm == NULL)
++			goto error;
++		esp->auth.icv = esp_hmac_digest;
++ 
++		aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name);
++		BUG_ON(!aalg_desc);
++ 
++		if (aalg_desc->uinfo.auth.icv_fullbits/8 !=
++			crypto_tfm_alg_digestsize(esp->auth.tfm)) {
++				printk(KERN_INFO "ESP: %s digestsize %u != %hu\n",
++					x->aalg->alg_name,
++					crypto_tfm_alg_digestsize(esp->auth.tfm),
++					aalg_desc->uinfo.auth.icv_fullbits/8);
++				goto error;
++		}
++ 
++		esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8;
++		esp->auth.icv_trunc_len = aalg_desc->uinfo.auth.icv_truncbits/8;
++ 
++		esp->auth.work_icv = kmalloc(esp->auth.icv_full_len, GFP_KERNEL);
++		if (!esp->auth.work_icv)
++			goto error;
++	}
++	esp->conf.key = x->ealg->alg_key;
++	esp->conf.key_len = (x->ealg->alg_key_len+7)/8;
++	if (x->props.ealgo == SADB_EALG_NULL)
++		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB);
++	else
++		esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC);
++	if (esp->conf.tfm == NULL)
++		goto error;
++	esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm);
++	esp->conf.padlen = 0;
++	if (esp->conf.ivlen) {
++		esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL);
++		if (unlikely(esp->conf.ivec == NULL))
++			goto error;
++		get_random_bytes(esp->conf.ivec, esp->conf.ivlen);
++	}
++	crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len);
++	x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen;
++	if (x->props.mode)
++		x->props.header_len += sizeof(struct ipv6hdr);
++	x->data = esp;
++	return 0;
++
++error:
++	if (esp) {
++		if (esp->auth.tfm)
++			crypto_free_tfm(esp->auth.tfm);
++		if (esp->auth.work_icv)
++			kfree(esp->auth.work_icv);
++		if (esp->conf.tfm)
++			crypto_free_tfm(esp->conf.tfm);
++		kfree(esp);
++	}
++	return -EINVAL;
++}
++
++static struct xfrm_type esp6_type =
++{
++	.description	= "ESP6",
++	.owner	     	= THIS_MODULE,
++	.proto	     	= IPPROTO_ESP,
++	.init_state	= esp6_init_state,
++	.destructor	= esp6_destroy,
++	.get_max_size	= esp6_get_max_size,
++	.input		= esp6_input,
++	.output		= esp6_output
++};
++
++static struct inet6_protocol esp6_protocol = {
++	.handler 	=	xfrm6_rcv,
++	.err_handler	=	esp6_err,
++	.flags		=	INET6_PROTO_NOPOLICY,
++};
++
++static int __init esp6_init(void)
++{
++	if (xfrm_register_type(&esp6_type, AF_INET6) < 0) {
++		printk(KERN_INFO "ipv6 esp init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++	if (inet6_add_protocol(&esp6_protocol, IPPROTO_ESP) < 0) {
++		printk(KERN_INFO "ipv6 esp init: can't add protocol\n");
++		xfrm_unregister_type(&esp6_type, AF_INET6);
++		return -EAGAIN;
++	}
++
++	return 0;
++}
++
++static void __exit esp6_fini(void)
++{
++	if (inet6_del_protocol(&esp6_protocol, IPPROTO_ESP) < 0)
++		printk(KERN_INFO "ipv6 esp close: can't remove protocol\n");
++	if (xfrm_unregister_type(&esp6_type, AF_INET6) < 0)
++		printk(KERN_INFO "ipv6 esp close: can't remove xfrm type\n");
++}
++
++module_init(esp6_init);
++module_exit(esp6_fini);
++
++MODULE_LICENSE("GPL");
+diff -Nru a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
+--- a/net/ipv6/exthdrs.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/exthdrs.c	2005-02-13 21:25:09 +11:00
+@@ -18,6 +18,9 @@
+ /* Changes:
+  *	yoshfuji		: ensure not to overrun while parsing 
+  *				  tlv options.
++ *	Mitsuru KANDA @USAGI and: Remove ipv6_parse_exthdrs().
++ *	YOSHIFUJI Hideaki @USAGI  Register inbound extention header
++ *				  handlers as inet6_protocol{}.
+  */
+ 
+ #include <linux/errno.h>
+@@ -44,20 +47,6 @@
+ #include <asm/uaccess.h>
+ 
+ /*
+- *	Parsing inbound headers.
+- *
+- *	Parsing function "func" returns offset wrt skb->nh of the place,
+- *	where next nexthdr value is stored or NULL, if parsing
+- *	failed. It should also update skb->h tp point at the next header.
+- */
+-
+-struct hdrtype_proc
+-{
+-	int	type;
+-	int	(*func) (struct sk_buff **, int offset);
+-};
+-
+-/*
+  *	Parsing tlv encoded headers.
+  *
+  *	Parsing function "func" returns 1, if parsing succeed
+@@ -164,9 +153,9 @@
+ 	{-1,			NULL}
+ };
+ 
+-static int ipv6_dest_opt(struct sk_buff **skb_ptr, int nhoff)
++static int ipv6_destopt_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+ {
+-	struct sk_buff *skb=*skb_ptr;
++	struct sk_buff *skb = *skbp;
+ 	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+ 
+ 	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8) ||
+@@ -179,29 +168,56 @@
+ 
+ 	if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) {
+ 		skb->h.raw += ((skb->h.raw[1]+1)<<3);
+-		return opt->dst1;
++		*nhoffp = opt->dst1;
++		return 1;
+ 	}
+ 
+ 	return -1;
+ }
+ 
++static struct inet6_protocol destopt_protocol =
++{
++	.handler	=	ipv6_destopt_rcv,
++	.flags		=	INET6_PROTO_NOPOLICY,
++};
++
++void __init ipv6_destopt_init(void)
++{
++	if (inet6_add_protocol(&destopt_protocol, IPPROTO_DSTOPTS) < 0)
++		printk(KERN_ERR "ipv6_destopt_init: Could not register protocol\n");
++}
++
+ /********************************
+   NONE header. No data in packet.
+  ********************************/
+ 
+-static int ipv6_nodata(struct sk_buff **skb_ptr, int nhoff)
++static int ipv6_nodata_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+ {
+-	kfree_skb(*skb_ptr);
+-	return -1;
++	struct sk_buff *skb = *skbp;
++
++	kfree_skb(skb);
++	return 0;
++}
++
++static struct inet6_protocol nodata_protocol =
++{
++	.handler	=	ipv6_nodata_rcv,
++	.flags		=	INET6_PROTO_NOPOLICY,
++};
++
++void __init ipv6_nodata_init(void)
++{
++	if (inet6_add_protocol(&nodata_protocol, IPPROTO_NONE) < 0)
++		printk(KERN_ERR "ipv6_nodata_init: Could not register protocol\n");
+ }
+ 
+ /********************************
+   Routing header.
+  ********************************/
+ 
+-static int ipv6_routing_header(struct sk_buff **skb_ptr, int nhoff)
++static int ipv6_rthdr_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+ {
+-	struct sk_buff *skb = *skb_ptr;
++	struct sk_buff *skb = *skbp;
+ 	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+ 	struct in6_addr *addr;
+ 	struct in6_addr daddr;
+@@ -232,7 +248,8 @@
+ 		skb->h.raw += (hdr->hdrlen + 1) << 3;
+ 		opt->dst0 = opt->dst1;
+ 		opt->dst1 = 0;
+-		return (&hdr->nexthdr) - skb->nh.raw;
++		*nhoffp = (&hdr->nexthdr) - skb->nh.raw;
++		return 1;
+ 	}
+ 
+ 	if (hdr->type != IPV6_SRCRT_TYPE_0) {
+@@ -247,7 +264,7 @@
+ 
+ 	/*
+ 	 *	This is the routing header forwarding algorithm from
+-	 *	RFC 1883, page 17.
++	 *	RFC 2460, page 16.
+ 	 */
+ 
+ 	n = hdr->hdrlen >> 1;
+@@ -265,7 +282,7 @@
+ 		kfree_skb(skb);
+ 		if (skb2 == NULL)
+ 			return -1;
+-		*skb_ptr = skb = skb2;
++		*skbp = skb = skb2;
+ 		opt = (struct inet6_skb_parm *)skb2->cb;
+ 		hdr = (struct ipv6_rt_hdr *) skb2->h.raw;
+ 	}
+@@ -294,7 +311,7 @@
+ 	ip6_route_input(skb);
+ 	if (skb->dst->error) {
+ 		skb_push(skb, skb->data - skb->nh.raw);
+-		skb->dst->input(skb);
++		dst_input(skb);
+ 		return -1;
+ 	}
+ 
+@@ -310,10 +327,22 @@
+ 	}
+ 
+ 	skb_push(skb, skb->data - skb->nh.raw);
+-	skb->dst->input(skb);
++	dst_input(skb);
+ 	return -1;
+ }
+ 
++static struct inet6_protocol rthdr_protocol =
++{
++	.handler	=	ipv6_rthdr_rcv,
++	.flags		=	INET6_PROTO_NOPOLICY,
++};
++
++void __init ipv6_rthdr_init(void)
++{
++	if (inet6_add_protocol(&rthdr_protocol, IPPROTO_ROUTING) < 0)
++		printk(KERN_ERR "ipv6_rthdr_init: Could not register protocol\n");
++};
++
+ /*
+    This function inverts received rthdr.
+    NOTE: specs allow to make it automatically only if
+@@ -379,97 +408,6 @@
+ 	return opt;
+ }
+ 
+-/********************************
+-  AUTH header.
+- ********************************/
+-
+-/*
+-   rfc1826 said, that if a host does not implement AUTH header
+-   it MAY ignore it. We use this hole 8)
+-
+-   Actually, now we can implement OSPFv6 without kernel IPsec.
+-   Authentication for poors may be done in user space with the same success.
+-
+-   Yes, it means, that we allow application to send/receive
+-   raw authentication header. Apparently, we suppose, that it knows
+-   what it does and calculates authentication data correctly.
+-   Certainly, it is possible only for udp and raw sockets, but not for tcp.
+-
+-   AUTH header has 4byte granular length, which kills all the idea
+-   behind AUTOMATIC 64bit alignment of IPv6. Now we will lose
+-   cpu ticks, checking that sender did not something stupid
+-   and opt->hdrlen is even. Shit!		--ANK (980730)
+- */
+-
+-static int ipv6_auth_hdr(struct sk_buff **skb_ptr, int nhoff)
+-{
+-	struct sk_buff *skb=*skb_ptr;
+-	struct inet6_skb_parm *opt = (struct inet6_skb_parm *)skb->cb;
+-	int len;
+-
+-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+8))
+-		goto fail;
+-
+-	/*
+-	 * RFC2402 2.2 Payload Length
+-	 * The 8-bit field specifies the length of AH in 32-bit words 
+-	 * (4-byte units), minus "2".
+-	 * -- Noriaki Takamiya @USAGI Project
+-	 */
+-	len = (skb->h.raw[1]+2)<<2;
+-
+-	if (len&7)
+-		goto fail;
+-
+-	if (!pskb_may_pull(skb, (skb->h.raw-skb->data)+len))
+-		goto fail;
+-
+-	opt->auth = skb->h.raw - skb->nh.raw;
+-	skb->h.raw += len;
+-	return opt->auth;
+-
+-fail:
+-	kfree_skb(skb);
+-	return -1;
+-}
+-
+-/* This list MUST NOT contain entry for NEXTHDR_HOP.
+-   It is parsed immediately after packet received
+-   and if it occurs somewhere in another place we must
+-   generate error.
+- */
+-
+-struct hdrtype_proc hdrproc_lst[] = {
+-	{NEXTHDR_FRAGMENT,	ipv6_reassembly},
+-	{NEXTHDR_ROUTING,	ipv6_routing_header},
+-	{NEXTHDR_DEST,		ipv6_dest_opt},
+-	{NEXTHDR_NONE,		ipv6_nodata},
+-	{NEXTHDR_AUTH,		ipv6_auth_hdr},
+-   /*
+-	{NEXTHDR_ESP,		ipv6_esp_hdr},
+-    */
+-	{-1,			NULL}
+-};
+-
+-int ipv6_parse_exthdrs(struct sk_buff **skb_in, int nhoff)
+-{
+-	struct hdrtype_proc *hdrt;
+-	u8 nexthdr = (*skb_in)->nh.raw[nhoff];
+-
+-restart:
+-	for (hdrt=hdrproc_lst; hdrt->type >= 0; hdrt++) {
+-		if (hdrt->type == nexthdr) {
+-			if ((nhoff = hdrt->func(skb_in, nhoff)) >= 0) {
+-				nexthdr = (*skb_in)->nh.raw[nhoff];
+-				goto restart;
+-			}
+-			return -1;
+-		}
+-	}
+-	return nhoff;
+-}
+-
+-
+ /**********************************
+   Hop-by-hop options.
+  **********************************/
+@@ -501,7 +439,7 @@
+ 	}
+ 
+ 	pkt_len = ntohl(*(u32*)(skb->nh.raw+optoff+2));
+-	if (pkt_len < 0x10000) {
++	if (pkt_len <= IPV6_MAXPLEN) {
+ 		icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, optoff+2);
+ 		return 0;
+ 	}
+diff -Nru a/net/ipv6/icmp.c b/net/ipv6/icmp.c
+--- a/net/ipv6/icmp.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/icmp.c	2005-02-13 21:25:09 +11:00
+@@ -26,6 +26,7 @@
+  *	yoshfuji		:	ensure to sent parameter problem for
+  *					fragments.
+  *	YOSHIFUJI Hideaki @USAGI:	added sysctl for icmp rate limit.
++ *	Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
+  */
+ 
+ #include <linux/module.h>
+@@ -74,17 +75,11 @@
+ #define icmpv6_socket	__icmpv6_socket[smp_processor_id()]
+ #define icmpv6_socket_cpu(X) __icmpv6_socket[(X)]
+ 
+-int icmpv6_rcv(struct sk_buff *skb);
++static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp);
+ 
+-static struct inet6_protocol icmpv6_protocol = 
+-{
+-	icmpv6_rcv,		/* handler		*/
+-	NULL,			/* error control	*/
+-	NULL,			/* next			*/
+-	IPPROTO_ICMPV6,		/* protocol ID		*/
+-	0,			/* copy			*/
+-	NULL,			/* data			*/
+-	"ICMPv6"	       	/* name			*/
++static struct inet6_protocol icmpv6_protocol = {
++	.handler	=	icmpv6_rcv,
++	.flags		=	INET6_PROTO_FINAL,
+ };
+ 
+ struct icmpv6_msg {
+@@ -116,40 +111,6 @@
+ 	spin_unlock_bh(&icmpv6_socket->sk->lock.slock);
+ }
+ 
+-/*
+- *	getfrag callback
+- */
+-
+-static int icmpv6_getfrag(const void *data, struct in6_addr *saddr, 
+-			   char *buff, unsigned int offset, unsigned int len)
+-{
+-	struct icmpv6_msg *msg = (struct icmpv6_msg *) data;
+-	struct icmp6hdr *icmph;
+-	__u32 csum;
+-
+-	if (offset) {
+-		csum = skb_copy_and_csum_bits(msg->skb, msg->offset +
+-					      (offset - sizeof(struct icmp6hdr)),
+-					      buff, len, msg->csum);
+-		msg->csum = csum;
+-		return 0;
+-	}
+-
+-	csum = csum_partial_copy_nocheck((void *) &msg->icmph, buff,
+-					 sizeof(struct icmp6hdr), msg->csum);
+-
+-	csum = skb_copy_and_csum_bits(msg->skb, msg->offset,
+-				      buff + sizeof(struct icmp6hdr),
+-				      len - sizeof(struct icmp6hdr), csum);
+-
+-	icmph = (struct icmp6hdr *) buff;
+-
+-	icmph->icmp6_cksum = csum_ipv6_magic(saddr, msg->daddr, msg->len,
+-					     IPPROTO_ICMPV6, csum);
+-	return 0; 
+-}
+-
+-
+ /* 
+  * Slightly more convenient version of icmpv6_send.
+  */
+@@ -252,21 +213,74 @@
+ 	return (optval&0xC0) == 0x80;
+ }
+ 
++int icmpv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct icmp6hdr *thdr, int len)
++{
++	struct sk_buff *skb;
++	struct icmp6hdr *icmp6h;
++	int err = 0;
++
++	if ((skb = skb_peek(&sk->write_queue)) == NULL)
++		goto out;
++
++	icmp6h = (struct icmp6hdr*) skb->h.raw;
++	memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
++	icmp6h->icmp6_cksum = 0;
++
++	if (skb_queue_len(&sk->write_queue) == 1) {
++		skb->csum = csum_partial((char *)icmp6h,
++					sizeof(struct icmp6hdr), skb->csum);
++		icmp6h->icmp6_cksum = csum_ipv6_magic(&fl->fl6_src,
++						      &fl->fl6_dst,
++						      len, fl->proto,
++						      skb->csum);
++	} else {
++		u32 tmp_csum = 0;
++
++		skb_queue_walk(&sk->write_queue, skb) {
++			tmp_csum = csum_add(tmp_csum, skb->csum);
++		}
++
++		tmp_csum = csum_partial((char *)icmp6h,
++					sizeof(struct icmp6hdr), tmp_csum);
++		tmp_csum = csum_ipv6_magic(&fl->fl6_src,
++					   &fl->fl6_dst,
++					   len, fl->proto, tmp_csum);
++		icmp6h->icmp6_cksum = tmp_csum;
++	}
++	if (icmp6h->icmp6_cksum == 0)
++		icmp6h->icmp6_cksum = -1;
++	ip6_push_pending_frames(sk);
++out:
++	return err;
++}
++
++static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
++{
++	struct sk_buff *org_skb = (struct sk_buff *)from;
++	__u32 csum = 0;
++	csum = skb_copy_and_csum_bits(org_skb, offset, to, len, csum);
++	skb->csum = csum_block_add(skb->csum, csum, odd);
++	return 0;
++}
++
+ /*
+  *	Send an ICMP message in response to a packet in error
+  */
+-
+ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, 
+ 		 struct net_device *dev)
+ {
+ 	struct ipv6hdr *hdr = skb->nh.ipv6h;
+ 	struct sock *sk = icmpv6_socket->sk;
++	struct ipv6_pinfo *np = inet6_sk(sk);
+ 	struct in6_addr *saddr = NULL;
+-	int iif = 0;
+-	struct icmpv6_msg msg;
++	struct dst_entry *dst;
++	struct icmp6hdr tmp_hdr;
+ 	struct flowi fl;
++	int iif = 0;
+ 	int addr_type = 0;
+-	int len;
++	int len, plen;
++	int hlimit = -1;
++	int err = 0;
+ 
+ 	if ((u8*)hdr < skb->head || (u8*)(hdr+1) > skb->tail)
+ 		return;
+@@ -324,13 +338,14 @@
+ 		return;
+ 	}
+ 
++	memset(&fl, 0, sizeof(fl));
+ 	fl.proto = IPPROTO_ICMPV6;
+-	fl.nl_u.ip6_u.daddr = &hdr->saddr;
+-	fl.nl_u.ip6_u.saddr = saddr;
++	ipv6_addr_copy(&fl.fl6_dst, &hdr->saddr);
++	if (saddr)
++		ipv6_addr_copy(&fl.fl6_src, saddr);
+ 	fl.oif = iif;
+-	fl.fl6_flowlabel = 0;
+-	fl.uli_u.icmpt.type = type;
+-	fl.uli_u.icmpt.code = code;
++	fl.fl_icmp_type = type;
++	fl.fl_icmp_code = code;
+ 
+ 	if (icmpv6_xmit_lock())
+ 		return;
+@@ -338,37 +353,52 @@
+ 	if (!icmpv6_xrlim_allow(sk, type, &fl))
+ 		goto out;
+ 
+-	/*
+-	 *	ok. kick it. checksum will be provided by the 
+-	 *	getfrag_t callback.
+-	 */
++	tmp_hdr.icmp6_type = type;
++	tmp_hdr.icmp6_code = code;
++	tmp_hdr.icmp6_cksum = 0;
++	tmp_hdr.icmp6_pointer = htonl(info);
++
++	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
++		fl.oif = np->mcast_oif;
+ 
+-	msg.icmph.icmp6_type = type;
+-	msg.icmph.icmp6_code = code;
+-	msg.icmph.icmp6_cksum = 0;
+-	msg.icmph.icmp6_pointer = htonl(info);
+-
+-	msg.skb = skb;
+-	msg.offset = skb->nh.raw - skb->data;
+-	msg.csum = 0;
+-	msg.daddr = &hdr->saddr;
++	err = ip6_dst_lookup(sk, &dst, &fl);
++	if (err)
++		goto out;
+ 
+-	len = skb->len - msg.offset + sizeof(struct icmp6hdr);
+-	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr));
++	if (hlimit < 0) {
++		if (ipv6_addr_is_multicast(&fl.fl6_dst))
++			hlimit = np->mcast_hops;
++		else
++			hlimit = np->hop_limit;
++		if (hlimit < 0)
++			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
++	}
+ 
++	plen = skb->nh.raw - skb->data;
++	__skb_pull(skb, plen);
++	len = skb->len;
++	len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) -sizeof(struct icmp6hdr));
+ 	if (len < 0) {
+ 		if (net_ratelimit())
+ 			printk(KERN_DEBUG "icmp: len problem\n");
+-		goto out;
++		__skb_push(skb, plen);
++		goto out_dst_release;
+ 	}
+ 
+-	msg.len = len;
++	err = ip6_append_data(sk, icmpv6_getfrag, skb, len + sizeof(struct icmp6hdr), sizeof(struct icmp6hdr),
++				hlimit, NULL, &fl, (struct rt6_info*)dst, MSG_DONTWAIT);
++	if (err) {
++		ip6_flush_pending_frames(sk);
++		goto out_dst_release;
++	}
++	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, len + sizeof(struct icmp6hdr));
++	__skb_push(skb, plen);
+ 
+-	ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, len, NULL, -1,
+-		       MSG_DONTWAIT);
+ 	if (type >= ICMPV6_DEST_UNREACH && type <= ICMPV6_PARAMPROB)
+ 		(&(icmpv6_statistics[smp_processor_id()*2].Icmp6OutDestUnreachs))[type-1]++;
+ 	ICMP6_INC_STATS_BH(Icmp6OutMsgs);
++out_dst_release:
++	dst_release(dst);
+ out:
+ 	icmpv6_xmit_unlock();
+ }
+@@ -376,45 +406,66 @@
+ static void icmpv6_echo_reply(struct sk_buff *skb)
+ {
+ 	struct sock *sk = icmpv6_socket->sk;
++	struct ipv6_pinfo *np = inet6_sk(sk);
++	struct in6_addr *saddr = NULL;
+ 	struct icmp6hdr *icmph = (struct icmp6hdr *) skb->h.raw;
+-	struct in6_addr *saddr;
+-	struct icmpv6_msg msg;
++	struct icmp6hdr tmp_hdr;
+ 	struct flowi fl;
++	struct dst_entry *dst;
++	int err = 0;
++	int hlimit = -1;
+ 
+ 	saddr = &skb->nh.ipv6h->daddr;
+ 
+-	if (ipv6_addr_type(saddr) & IPV6_ADDR_MULTICAST ||
+-	    ipv6_chk_acast_addr(0, saddr)) 
++	if (!ipv6_unicast_destination(skb))
+ 		saddr = NULL;
+ 
+-	msg.icmph.icmp6_type = ICMPV6_ECHO_REPLY;
+-	msg.icmph.icmp6_code = 0;
+-	msg.icmph.icmp6_cksum = 0;
+-	msg.icmph.icmp6_identifier = icmph->icmp6_identifier;
+-	msg.icmph.icmp6_sequence = icmph->icmp6_sequence;
+-
+-	msg.skb = skb;
+-	msg.offset = 0;
+-	msg.csum = 0;
+-	msg.len = skb->len + sizeof(struct icmp6hdr);
+-	msg.daddr =  &skb->nh.ipv6h->saddr;
++	memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
++	tmp_hdr.icmp6_type = ICMPV6_ECHO_REPLY;
+ 
++	memset(&fl, 0, sizeof(fl));
+ 	fl.proto = IPPROTO_ICMPV6;
+-	fl.nl_u.ip6_u.daddr = msg.daddr;
+-	fl.nl_u.ip6_u.saddr = saddr;
++	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
++	if (saddr)
++		ipv6_addr_copy(&fl.fl6_src, saddr);
+ 	fl.oif = skb->dev->ifindex;
+-	fl.fl6_flowlabel = 0;
+-	fl.uli_u.icmpt.type = ICMPV6_ECHO_REPLY;
+-	fl.uli_u.icmpt.code = 0;
++	fl.fl_icmp_type = ICMPV6_ECHO_REPLY;
+ 
+ 	if (icmpv6_xmit_lock())
+ 		return;
+ 
+-	ip6_build_xmit(sk, icmpv6_getfrag, &msg, &fl, msg.len, NULL, -1,
+-		       MSG_DONTWAIT);
+-	ICMP6_INC_STATS_BH(Icmp6OutEchoReplies);
+-	ICMP6_INC_STATS_BH(Icmp6OutMsgs);
++	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
++		fl.oif = np->mcast_oif;
++
++	err = ip6_dst_lookup(sk, &dst, &fl);
++	if (err)
++		goto out;
+ 
++	if (hlimit < 0) {
++		if (ipv6_addr_is_multicast(&fl.fl6_dst))
++			hlimit = np->mcast_hops;
++		else
++			hlimit = np->hop_limit;
++		if (hlimit < 0)
++			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
++	}
++
++	err = ip6_append_data(sk, icmpv6_getfrag, skb, skb->len + sizeof(struct icmp6hdr),
++				sizeof(struct icmp6hdr), hlimit, NULL, &fl,
++				(struct rt6_info*)dst, MSG_DONTWAIT);
++  
++	if (err) {
++		ip6_flush_pending_frames(sk);
++		goto out_dst_release;
++	}
++	err = icmpv6_push_pending_frames(sk, &fl, &tmp_hdr, skb->len + sizeof(struct icmp6hdr));
++
++        ICMP6_INC_STATS_BH(Icmp6OutEchoReplies);
++        ICMP6_INC_STATS_BH(Icmp6OutMsgs);
++
++out_dst_release:
++	dst_release(dst);
++out: 
+ 	icmpv6_xmit_unlock();
+ }
+ 
+@@ -456,15 +507,9 @@
+ 
+ 	hash = nexthdr & (MAX_INET_PROTOS - 1);
+ 
+-	for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; 
+-	     ipprot != NULL; 
+-	     ipprot=(struct inet6_protocol *)ipprot->next) {
+-		if (ipprot->protocol != nexthdr)
+-			continue;
+-
+-		if (ipprot->err_handler)
+-			ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
+-	}
++	ipprot = inet6_protos[hash];
++	if (ipprot && ipprot->err_handler)
++		ipprot->err_handler(skb, NULL, type, code, inner_offset, info);
+ 
+ 	read_lock(&raw_v6_lock);
+ 	if ((sk = raw_v6_htable[hash]) != NULL) {
+@@ -480,8 +525,9 @@
+  *	Handle icmp messages
+  */
+ 
+-int icmpv6_rcv(struct sk_buff *skb)
++static int icmpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+ {
++	struct sk_buff *skb = *pskb;
+ 	struct net_device *dev = skb->dev;
+ 	struct in6_addr *saddr, *daddr;
+ 	struct ipv6hdr *orig_hdr;
+@@ -508,22 +554,7 @@
+ 				    skb_checksum(skb, 0, skb->len, 0))) {
+ 			if (net_ratelimit())
+ 				printk(KERN_DEBUG "ICMPv6 checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n",
+-				       ntohs(saddr->s6_addr16[0]),
+-				       ntohs(saddr->s6_addr16[1]),
+-				       ntohs(saddr->s6_addr16[2]),
+-				       ntohs(saddr->s6_addr16[3]),
+-				       ntohs(saddr->s6_addr16[4]),
+-				       ntohs(saddr->s6_addr16[5]),
+-				       ntohs(saddr->s6_addr16[6]),
+-				       ntohs(saddr->s6_addr16[7]),
+-				       ntohs(daddr->s6_addr16[0]),
+-				       ntohs(daddr->s6_addr16[1]),
+-				       ntohs(daddr->s6_addr16[2]),
+-				       ntohs(daddr->s6_addr16[3]),
+-				       ntohs(daddr->s6_addr16[4]),
+-				       ntohs(daddr->s6_addr16[5]),
+-				       ntohs(daddr->s6_addr16[6]),
+-				       ntohs(daddr->s6_addr16[7]));
++				       NIP6(*saddr), NIP6(*daddr));
+ 			goto discard_it;
+ 		}
+ 	}
+@@ -659,7 +690,12 @@
+ 		sk->prot->unhash(sk);
+ 	}
+ 
+-	inet6_add_protocol(&icmpv6_protocol);
++	if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0) {
++		printk(KERN_ERR "Failed to register ICMP6 protocol\n");
++		sock_release(icmpv6_socket);
++		icmpv6_socket = NULL;
++		return -EAGAIN;
++	}
+ 
+ 	return 0;
+ fail:
+@@ -678,7 +714,7 @@
+ 		sock_release(icmpv6_socket_cpu(i));
+ 		icmpv6_socket_cpu(i) = NULL;
+ 	}
+-	inet6_del_protocol(&icmpv6_protocol);
++	inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
+ }
+ 
+ static struct icmp6_err {
+diff -Nru a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
+--- a/net/ipv6/ip6_fib.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/ip6_fib.c	2005-02-13 21:25:09 +11:00
+@@ -40,7 +40,6 @@
+ #include <net/ip6_route.h>
+ 
+ #define RT6_DEBUG 2
+-#undef CONFIG_IPV6_SUBTREES
+ 
+ #if RT6_DEBUG >= 3
+ #define RT6_TRACE(x...) printk(KERN_DEBUG x)
+@@ -453,7 +452,6 @@
+ 			 */
+ 
+ 			if ((iter->rt6i_dev == rt->rt6i_dev) &&
+-			    (iter->rt6i_flowr == rt->rt6i_flowr) &&
+ 			    (ipv6_addr_cmp(&iter->rt6i_gateway,
+ 					   &rt->rt6i_gateway) == 0)) {
+ 				if (!(iter->rt6i_flags&RTF_EXPIRES))
+@@ -500,13 +498,19 @@
+ 		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
+ }
+ 
++void fib6_force_start_gc(void)
++{
++	if (ip6_fib_timer.expires == 0)
++		mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval);
++}
++
+ /*
+  *	Add routing information to the routing tree.
+  *	<destination addr>/<source addr>
+  *	with source addr info in sub-trees
+  */
+ 
+-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh)
++int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+ {
+ 	struct fib6_node *fn;
+ 	int err = -ENOMEM;
+@@ -597,8 +601,8 @@
+ 	   is orphan. If it is, shoot it.
+ 	 */
+ st_failure:
+-	if (fn && !(fn->fn_flags&RTN_RTINFO|RTN_ROOT))
+-		fib_repair_tree(fn);
++	if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
++		fib6_repair_tree(fn);
+ 	dst_free(&rt->u.dst);
+ 	return err;
+ #endif
+@@ -888,7 +892,7 @@
+ }
+ 
+ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
+-    struct nlmsghdr *nlh)
++    struct nlmsghdr *nlh, void *_rtattr)
+ {
+ 	struct fib6_walker_t *w;
+ 	struct rt6_info *rt = *rtp;
+@@ -947,7 +951,7 @@
+ 	rt6_release(rt);
+ }
+ 
+-int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh)
++int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+ {
+ 	struct fib6_node *fn = rt->rt6i_node;
+ 	struct rt6_info **rtp;
+@@ -972,7 +976,7 @@
+ 
+ 	for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
+ 		if (*rtp == rt) {
+-			fib6_del_route(fn, rtp, nlh);
++			fib6_del_route(fn, rtp, nlh, _rtattr);
+ 			return 0;
+ 		}
+ 	}
+@@ -1101,7 +1105,7 @@
+ 		res = c->func(rt, c->arg);
+ 		if (res < 0) {
+ 			w->leaf = rt;
+-			res = fib6_del(rt, NULL);
++			res = fib6_del(rt, NULL, NULL);
+ 			if (res) {
+ #if RT6_DEBUG >= 2
+ 				printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
+@@ -1218,6 +1222,7 @@
+ 
+ 
+ 	write_lock_bh(&rt6_lock);
++	ndisc_dst_gc(&gc_args.more);
+ 	fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL);
+ 	write_unlock_bh(&rt6_lock);
+ 
+@@ -1232,17 +1237,17 @@
+ 
+ void __init fib6_init(void)
+ {
+-	if (!fib6_node_kmem)
+-		fib6_node_kmem = kmem_cache_create("fib6_nodes",
+-						   sizeof(struct fib6_node),
+-						   0, SLAB_HWCACHE_ALIGN,
+-						   NULL, NULL);
++	fib6_node_kmem = kmem_cache_create("fib6_nodes",
++					   sizeof(struct fib6_node),
++					   0, SLAB_HWCACHE_ALIGN,
++					   NULL, NULL);
+ }
+ 
+ #ifdef MODULE
+ void fib6_gc_cleanup(void)
+ {
+ 	del_timer(&ip6_fib_timer);
++	kmem_cache_destroy(fib6_node_kmem);
+ }
+ #endif
+ 
+diff -Nru a/net/ipv6/ip6_fw.c b/net/ipv6/ip6_fw.c
+--- a/net/ipv6/ip6_fw.c	2005-02-13 21:25:10 +11:00
++++ /dev/null	Wed Dec 31 16:00:00 196900
+@@ -1,390 +0,0 @@
+-/*
+- *	IPv6 Firewall
+- *	Linux INET6 implementation
+- *
+- *	Authors:
+- *	Pedro Roque		<roque at di.fc.ul.pt>	
+- *
+- *	$Id: ip6_fw.c,v 1.16 2001/10/31 08:17:58 davem Exp $
+- *
+- *	This program is free software; you can redistribute it and/or
+- *      modify it under the terms of the GNU General Public License
+- *      as published by the Free Software Foundation; either version
+- *      2 of the License, or (at your option) any later version.
+- */
+-
+-#include <linux/config.h>
+-#include <linux/errno.h>
+-#include <linux/types.h>
+-#include <linux/string.h>
+-#include <linux/socket.h>
+-#include <linux/sockios.h>
+-#include <linux/net.h>
+-#include <linux/route.h>
+-#include <linux/netdevice.h>
+-#include <linux/in6.h>
+-#include <linux/udp.h>
+-#include <linux/init.h>
+-
+-#include <net/ipv6.h>
+-#include <net/ip6_route.h>
+-#include <net/ip6_fw.h>
+-#include <net/netlink.h>
+-
+-static unsigned long ip6_fw_rule_cnt;
+-static struct ip6_fw_rule ip6_fw_rule_list = {
+-	{0},
+-	NULL, NULL,
+-	{0},
+-	IP6_FW_REJECT
+-};
+-
+-static int ip6_fw_accept(struct dst_entry *dst, struct fl_acc_args *args);
+-
+-struct flow_rule_ops ip6_fw_ops = {
+-	ip6_fw_accept
+-};
+-
+-
+-static struct rt6_info ip6_fw_null_entry = {
+-	{{NULL, 0, 0, NULL,
+-	  0, 0, 0, 0, 0, 0, 0, 0, -ENETUNREACH, NULL, NULL,
+-	  ip6_pkt_discard, ip6_pkt_discard, NULL}},
+-	NULL, {{{0}}}, 256, RTF_REJECT|RTF_NONEXTHOP, ~0UL,
+-	0, &ip6_fw_rule_list, {{{{0}}}, 128}, {{{{0}}}, 128}
+-};
+-
+-static struct fib6_node ip6_fw_fib = {
+-	NULL, NULL, NULL, NULL,
+-	&ip6_fw_null_entry,
+-	0, RTN_ROOT|RTN_TL_ROOT, 0
+-};
+-
+-rwlock_t ip6_fw_lock = RW_LOCK_UNLOCKED;
+-
+-
+-static void ip6_rule_add(struct ip6_fw_rule *rl)
+-{
+-	struct ip6_fw_rule *next;
+-
+-	write_lock_bh(&ip6_fw_lock);
+-	ip6_fw_rule_cnt++;
+-	next = &ip6_fw_rule_list;
+-	rl->next = next;
+-	rl->prev = next->prev;
+-	rl->prev->next = rl;
+-	next->prev = rl;
+-	write_unlock_bh(&ip6_fw_lock);
+-}
+-
+-static void ip6_rule_del(struct ip6_fw_rule *rl)
+-{
+-	struct ip6_fw_rule *next, *prev;
+-
+-	write_lock_bh(&ip6_fw_lock);
+-	ip6_fw_rule_cnt--;
+-	next = rl->next;
+-	prev = rl->prev;
+-	next->prev = prev;
+-	prev->next = next;
+-	write_unlock_bh(&ip6_fw_lock);
+-}
+-
+-static __inline__ struct ip6_fw_rule * ip6_fwrule_alloc(void)
+-{
+-	struct ip6_fw_rule *rl;
+-
+-	rl = kmalloc(sizeof(struct ip6_fw_rule), GFP_ATOMIC);
+-	if (rl)
+-	{
+-		memset(rl, 0, sizeof(struct ip6_fw_rule));
+-		rl->flowr.ops = &ip6_fw_ops;
+-	}
+-	return rl;
+-}
+-
+-static __inline__ void ip6_fwrule_free(struct ip6_fw_rule * rl)
+-{
+-	kfree(rl);
+-}
+-
+-static __inline__ int port_match(int rl_port, int fl_port)
+-{
+-	int res = 0;
+-	if (rl_port == 0 || (rl_port == fl_port))
+-		res = 1;
+-	return res;
+-}
+-
+-static int ip6_fw_accept_trans(struct ip6_fw_rule *rl,
+-			       struct fl_acc_args *args)
+-{
+-	int res = FLOWR_NODECISION;
+-	int proto = 0;
+-	int sport = 0;
+-	int dport = 0;
+-
+-	switch (args->type) {
+-	case FL_ARG_FORWARD:
+-	{
+-		struct sk_buff *skb = args->fl_u.skb;
+-		struct ipv6hdr *hdr = skb->nh.ipv6h;
+-		int len;
+-
+-		len = skb->len - sizeof(struct ipv6hdr);
+-
+-		proto = hdr->nexthdr;
+-
+-		switch (proto) {
+-		case IPPROTO_TCP:
+-		{
+-			struct tcphdr *th;
+-
+-			if (len < sizeof(struct tcphdr)) {
+-				res = FLOWR_ERROR;
+-				goto out;
+-			}
+-			th = (struct tcphdr *)(hdr + 1);
+-			sport = th->source;
+-			dport = th->dest;
+-			break;
+-		}
+-		case IPPROTO_UDP:
+-		{
+-			struct udphdr *uh;
+-
+-			if (len < sizeof(struct udphdr)) {
+-				res = FLOWR_ERROR;
+-				goto out;
+-			}
+-			uh = (struct udphdr *)(hdr + 1);
+-			sport = uh->source;
+-			dport = uh->dest;
+-			break;
+-		}
+-		default:
+-			goto out;
+-		};
+-		break;
+-	}
+-
+-	case FL_ARG_ORIGIN:
+-	{
+-		proto = args->fl_u.fl_o.flow->proto;
+-
+-		if (proto == IPPROTO_ICMPV6) {
+-			goto out;
+-		} else {
+-			sport = args->fl_u.fl_o.flow->uli_u.ports.sport;
+-			dport = args->fl_u.fl_o.flow->uli_u.ports.dport;
+-		}
+-		break;
+-	}
+-
+-	if (proto == rl->info.proto &&
+-	    port_match(args->fl_u.fl_o.flow->uli_u.ports.sport, sport) &&
+-	    port_match(args->fl_u.fl_o.flow->uli_u.ports.dport, dport)) {
+-		if (rl->policy & IP6_FW_REJECT)
+-			res = FLOWR_SELECT;
+-		else
+-			res = FLOWR_CLEAR;
+-	}
+-
+-	default:
+-#if IP6_FW_DEBUG >= 1
+-		printk(KERN_DEBUG "ip6_fw_accept: unknown arg type\n");
+-#endif
+-		goto out;
+-	};
+-
+-out:
+-	return res;
+-}
+-
+-static int ip6_fw_accept(struct dst_entry *dst, struct fl_acc_args *args)
+-{
+-	struct rt6_info *rt;
+-	struct ip6_fw_rule *rl;
+-	int proto;
+-	int res = FLOWR_NODECISION;
+-
+-	rt = (struct rt6_info *) dst;
+-	rl = (struct ip6_fw_rule *) rt->rt6i_flowr;
+-
+-	proto = rl->info.proto;
+-
+-	switch (proto) {
+-	case 0:
+-		if (rl->policy & IP6_FW_REJECT)
+-			res = FLOWR_SELECT;
+-		else
+-			res = FLOWR_CLEAR;
+-		break;
+-	case IPPROTO_TCP:
+-	case IPPROTO_UDP:
+-		res = ip6_fw_accept_trans(rl, args);
+-		break;
+-	case IPPROTO_ICMPV6:
+-	};
+-
+-	return res;
+-}
+-
+-static struct dst_entry * ip6_fw_dup(struct dst_entry *frule,
+-				     struct dst_entry *rt,
+-				     struct fl_acc_args *args)
+-{
+-	struct ip6_fw_rule *rl;
+-	struct rt6_info *nrt;
+-	struct rt6_info *frt;
+-
+-	frt = (struct rt6_info *) frule;
+-
+-	rl = (struct ip6_fw_rule *) frt->rt6i_flowr;
+-
+-	nrt = ip6_rt_copy((struct rt6_info *) rt);
+-
+-	if (nrt) {
+-		nrt->u.dst.input = frule->input;
+-		nrt->u.dst.output = frule->output;
+-
+-		nrt->rt6i_flowr = flow_clone(frt->rt6i_flowr);
+-
+-		nrt->rt6i_flags |= RTF_CACHE;
+-		nrt->rt6i_tstamp = jiffies;
+-	}
+-
+-	return (struct dst_entry *) nrt;
+-}
+-
+-int ip6_fw_reject(struct sk_buff *skb)
+-{
+-#if IP6_FW_DEBUG >= 1
+-	printk(KERN_DEBUG "packet rejected: \n");
+-#endif
+-
+-	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADM_PROHIBITED, 0,
+-		    skb->dev);
+-	/*
+-	 *	send it via netlink, as (rule, skb)
+-	 */
+-
+-	kfree_skb(skb);
+-	return 0;
+-}
+-
+-int ip6_fw_discard(struct sk_buff *skb)
+-{
+-	printk(KERN_DEBUG "ip6_fw: BUG fw_reject called\n");
+-	kfree_skb(skb);
+-	return 0;
+-}
+-
+-int ip6_fw_msg_add(struct ip6_fw_msg *msg)
+-{
+-	struct in6_rtmsg rtmsg;
+-	struct ip6_fw_rule *rl;
+-	struct rt6_info *rt;
+-	int err;
+-
+-	ipv6_addr_copy(&rtmsg.rtmsg_dst, &msg->dst);
+-	ipv6_addr_copy(&rtmsg.rtmsg_src, &msg->src);
+-	rtmsg.rtmsg_dst_len = msg->dst_len;
+-	rtmsg.rtmsg_src_len = msg->src_len;
+-	rtmsg.rtmsg_metric = IP6_RT_PRIO_FW;
+-
+-	rl = ip6_fwrule_alloc();
+-
+-	if (rl == NULL)
+-		return -ENOMEM;
+-
+-	rl->policy = msg->policy;
+-	rl->info.proto = msg->proto;
+-	rl->info.uli_u.data = msg->u.data;
+-
+-	rtmsg.rtmsg_flags = RTF_NONEXTHOP|RTF_POLICY;
+-	err = ip6_route_add(&rtmsg);
+-
+-	if (err) {
+-		ip6_fwrule_free(rl);
+-		return err;
+-	}
+-
+-	/* The rest will not work for now. --ABK (989725) */
+-
+-#ifndef notdef
+-	ip6_fwrule_free(rl);
+-	return -EPERM;
+-#else
+-	rt->u.dst.error = -EPERM;
+-
+-	if (msg->policy == IP6_FW_ACCEPT) {
+-		/*
+-		 *	Accept rules are never selected
+-		 *	(i.e. packets use normal forwarding)
+-		 */
+-		rt->u.dst.input = ip6_fw_discard;
+-		rt->u.dst.output = ip6_fw_discard;
+-	} else {
+-		rt->u.dst.input = ip6_fw_reject;
+-		rt->u.dst.output = ip6_fw_reject;
+-	}
+-
+-	ip6_rule_add(rl);
+-
+-	rt->rt6i_flowr = flow_clone((struct flow_rule *)rl);
+-
+-	return 0;
+-#endif
+-}
+-
+-static int ip6_fw_msgrcv(int unit, struct sk_buff *skb)
+-{
+-	int count = 0;
+-
+-	while (skb->len) {
+-		struct ip6_fw_msg *msg;
+-
+-		if (skb->len < sizeof(struct ip6_fw_msg)) {
+-			count = -EINVAL;
+-			break;
+-		}
+-
+-		msg = (struct ip6_fw_msg *) skb->data;
+-		skb_pull(skb, sizeof(struct ip6_fw_msg));
+-		count += sizeof(struct ip6_fw_msg);
+-
+-		switch (msg->action) {
+-		case IP6_FW_MSG_ADD:
+-			ip6_fw_msg_add(msg);
+-			break;
+-		case IP6_FW_MSG_DEL:
+-			break;
+-		default:
+-			return -EINVAL;
+-		};
+-	}
+-
+-	return count;
+-}
+-
+-static void ip6_fw_destroy(struct flow_rule *rl)
+-{
+-	ip6_fwrule_free((struct ip6_fw_rule *)rl);
+-}
+-
+-#ifdef MODULE
+-#define ip6_fw_init module_init
+-#endif
+-
+-void __init ip6_fw_init(void)
+-{
+-	netlink_attach(NETLINK_IP6_FW, ip6_fw_msgrcv);
+-}
+-
+-#ifdef MODULE
+-void cleanup_module(void)
+-{
+-	netlink_detach(NETLINK_IP6_FW);
+-}
+-#endif
+diff -Nru a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
+--- a/net/ipv6/ip6_input.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/ip6_input.c	2005-02-13 21:25:09 +11:00
+@@ -15,6 +15,11 @@
+  *      as published by the Free Software Foundation; either version
+  *      2 of the License, or (at your option) any later version.
+  */
++/* Changes
++ *
++ * 	Mitsuru KANDA @USAGI and
++ * 	YOSHIFUJI Hideaki @USAGI: Remove ipv6_parse_exthdrs().
++ */
+ 
+ #include <linux/errno.h>
+ #include <linux/types.h>
+@@ -39,6 +44,7 @@
+ #include <net/ndisc.h>
+ #include <net/ip6_route.h>
+ #include <net/addrconf.h>
++#include <net/xfrm.h>
+ 
+ 
+ 
+@@ -47,7 +53,7 @@
+ 	if (skb->dst == NULL)
+ 		ip6_route_input(skb);
+ 
+-	return skb->dst->input(skb);
++	return dst_input(skb);
+ }
+ 
+ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt)
+@@ -121,13 +127,12 @@
+ 
+ static inline int ip6_input_finish(struct sk_buff *skb)
+ {
+-	struct ipv6hdr *hdr = skb->nh.ipv6h;
+ 	struct inet6_protocol *ipprot;
+ 	struct sock *raw_sk;
+-	int nhoff;
++	unsigned int nhoff;
+ 	int nexthdr;
+-	int found = 0;
+ 	u8 hash;
++	int cksum_sub = 0;
+ 
+ 	skb->h.raw = skb->nh.raw + sizeof(struct ipv6hdr);
+ 
+@@ -135,7 +140,7 @@
+ 	 *	Parse extension headers
+ 	 */
+ 
+-	nexthdr = hdr->nexthdr;
++	nexthdr = skb->nh.ipv6h->nexthdr;
+ 	nhoff = offsetof(struct ipv6hdr, nexthdr);
+ 
+ 	/* Skip  hop-by-hop options, they are already parsed. */
+@@ -145,58 +150,46 @@
+ 		skb->h.raw += (skb->h.raw[1]+1)<<3;
+ 	}
+ 
+-	/* This check is sort of optimization.
+-	   It would be stupid to detect for optional headers,
+-	   which are missing with probability of 200%
+-	 */
+-	if (nexthdr != IPPROTO_TCP && nexthdr != IPPROTO_UDP) {
+-		nhoff = ipv6_parse_exthdrs(&skb, nhoff);
+-		if (nhoff < 0)
+-			return 0;
+-		nexthdr = skb->nh.raw[nhoff];
+-		hdr = skb->nh.ipv6h;
+-	}
+-
++resubmit:
+ 	if (!pskb_pull(skb, skb->h.raw - skb->data))
+ 		goto discard;
++	nexthdr = skb->nh.raw[nhoff];
+ 
+-	if (skb->ip_summed == CHECKSUM_HW)
+-		skb->csum = csum_sub(skb->csum,
+-				     csum_partial(skb->nh.raw, skb->h.raw-skb->nh.raw, 0));
+-
+-	raw_sk = raw_v6_htable[nexthdr&(MAX_INET_PROTOS-1)];
++	raw_sk = raw_v6_htable[nexthdr & (MAX_INET_PROTOS - 1)];
+ 	if (raw_sk)
+-		raw_sk = ipv6_raw_deliver(skb, nexthdr);
++		ipv6_raw_deliver(skb, nexthdr);
+ 
+ 	hash = nexthdr & (MAX_INET_PROTOS - 1);
+-	for (ipprot = (struct inet6_protocol *) inet6_protos[hash]; 
+-	     ipprot != NULL; 
+-	     ipprot = (struct inet6_protocol *) ipprot->next) {
+-		struct sk_buff *buff = skb;
+-
+-		if (ipprot->protocol != nexthdr)
+-			continue;
+-
+-		if (ipprot->copy || raw_sk)
+-			buff = skb_clone(skb, GFP_ATOMIC);
+-
+-		if (buff)
+-			ipprot->handler(buff);
+-		found = 1;
+-	}
+-
+-	if (raw_sk) {
+-		rawv6_rcv(raw_sk, skb);
+-		sock_put(raw_sk);
+-		found = 1;
+-	}
+-
+-	/*
+-	 *	not found: send ICMP parameter problem back
+-	 */
+-	if (!found) {
+-		IP6_INC_STATS_BH(Ip6InUnknownProtos);
+-		icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
++	if ((ipprot = inet6_protos[hash]) != NULL) {
++		int ret;
++		
++		if (ipprot->flags & INET6_PROTO_FINAL) {
++			if (!cksum_sub && skb->ip_summed == CHECKSUM_HW) {
++				skb->csum = csum_sub(skb->csum,
++						     csum_partial(skb->nh.raw, skb->h.raw-skb->nh.raw, 0));
++				cksum_sub++;
++			}
++		}
++		if (!(ipprot->flags & INET6_PROTO_NOPOLICY) &&
++		    !xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
++			kfree_skb(skb);
++			return 0;
++		}
++		
++		ret = ipprot->handler(&skb, &nhoff);
++		if (ret > 0)
++			goto resubmit;
++		else if (ret == 0)
++			IP6_INC_STATS_BH(Ip6InDelivers);
++	} else {
++		if (!raw_sk) {
++			if (xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
++				IP6_INC_STATS_BH(Ip6InUnknownProtos);
++				icmpv6_param_prob(skb, ICMPV6_UNK_NEXTHDR, nhoff);
++			}
++		} else {
++			kfree_skb(skb);
++		}
+ 	}
+ 
+ 	return 0;
+@@ -246,7 +239,7 @@
+ 				skb2 = skb;
+ 			}
+ 
+-			dst->output(skb2);
++			dst_output(skb2);
+ 		}
+ 	}
+ #endif
+diff -Nru a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
+--- a/net/ipv6/ip6_output.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/ip6_output.c	2005-02-13 21:25:09 +11:00
+@@ -23,6 +23,9 @@
+  *
+  *      H. von Brand    :       Added missing #include <linux/string.h>
+  *	Imran Patel	: 	frag id should be in NBO
++ *      Kazunori MIYAZAWA @USAGI
++ *			:       add ip6_append_data and related functions
++ *				for datagram xmit
+  */
+ 
+ #include <linux/config.h>
+@@ -49,6 +52,9 @@
+ #include <net/addrconf.h>
+ #include <net/rawv6.h>
+ #include <net/icmp.h>
++#include <net/xfrm.h>
++
++static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*));
+ 
+ static __inline__ void ipv6_select_ident(struct sk_buff *skb, struct frag_hdr *fhdr)
+ {
+@@ -99,7 +105,7 @@
+ }
+ 
+ 
+-int ip6_output(struct sk_buff *skb)
++static int ip6_output2(struct sk_buff *skb)
+ {
+ 	struct dst_entry *dst = skb->dst;
+ 	struct net_device *dev = dst->dev;
+@@ -134,21 +140,27 @@
+ 	return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb,NULL, skb->dev,ip6_output_finish);
+ }
+ 
++int ip6_output(struct sk_buff *skb)
++{
++	if ((skb->len > dst_pmtu(skb->dst) || skb_shinfo(skb)->frag_list))
++		return ip6_fragment(skb, ip6_output2);
++	else
++		return ip6_output2(skb);
++}
+ 
+ #ifdef CONFIG_NETFILTER
+ int ip6_route_me_harder(struct sk_buff *skb)
+ {
+ 	struct ipv6hdr *iph = skb->nh.ipv6h;
+ 	struct dst_entry *dst;
+-	struct flowi fl;
+-
+-	fl.proto = iph->nexthdr;
+-	fl.fl6_dst = &iph->daddr;
+-	fl.fl6_src = &iph->saddr;
+-	fl.oif = skb->sk ? skb->sk->bound_dev_if : 0;
+-	fl.fl6_flowlabel = 0;
+-	fl.uli_u.ports.dport = 0;
+-	fl.uli_u.ports.sport = 0;
++	struct flowi fl = {
++		.oif = skb->sk ? skb->sk->bound_dev_if : 0,
++		.nl_u =
++		{ .ip6_u =
++		  { .daddr = iph->daddr,
++		    .saddr = iph->saddr, } },
++		.proto = iph->nexthdr,
++	};
+ 
+ 	dst = ip6_route_output(skb->sk, &fl);
+ 
+@@ -177,7 +189,7 @@
+ 		}
+ 	}
+ #endif /* CONFIG_NETFILTER */
+-	return skb->dst->output(skb);
++	return dst_output(skb);
+ }
+ 
+ /*
+@@ -188,12 +200,13 @@
+ 	     struct ipv6_txoptions *opt)
+ {
+ 	struct ipv6_pinfo * np = sk ? &sk->net_pinfo.af_inet6 : NULL;
+-	struct in6_addr *first_hop = fl->nl_u.ip6_u.daddr;
++	struct in6_addr *first_hop = &fl->fl6_dst;
+ 	struct dst_entry *dst = skb->dst;
+ 	struct ipv6hdr *hdr;
+ 	u8  proto = fl->proto;
+ 	int seg_len = skb->len;
+ 	int hlimit;
++	u32 mtu;
+ 
+ 	if (opt) {
+ 		int head_room;
+@@ -231,16 +244,17 @@
+ 	if (np)
+ 		hlimit = np->hop_limit;
+ 	if (hlimit < 0)
+-		hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
++		hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+ 
+ 	hdr->payload_len = htons(seg_len);
+ 	hdr->nexthdr = proto;
+ 	hdr->hop_limit = hlimit;
+ 
+-	ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
++	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
+ 	ipv6_addr_copy(&hdr->daddr, first_hop);
+ 
+-	if (skb->len <= dst->pmtu) {
++	mtu = dst_pmtu(dst);
++	if (skb->len <= mtu) {
+ 		IP6_INC_STATS(Ip6OutRequests);
+ 		return NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, dst->dev, ip6_maybe_reroute);
+ 	}
+@@ -248,7 +262,7 @@
+ 	if (net_ratelimit())
+ 		printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
+ 	skb->dev = dst->dev;
+-	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
++	icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
+ 	kfree_skb(skb);
+ 	return -EMSGSIZE;
+ }
+@@ -302,8 +316,8 @@
+ 	hdr->hop_limit = hlimit;
+ 	hdr->nexthdr = fl->proto;
+ 
+-	ipv6_addr_copy(&hdr->saddr, fl->nl_u.ip6_u.saddr);
+-	ipv6_addr_copy(&hdr->daddr, fl->nl_u.ip6_u.daddr);
++	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
++	ipv6_addr_copy(&hdr->daddr, &fl->fl6_dst);
+ 	return hdr;
+ }
+ 
+@@ -507,19 +521,19 @@
+ 		   struct ipv6_txoptions *opt, int hlimit, int flags)
+ {
+ 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+-	struct in6_addr *final_dst = NULL;
++	struct in6_addr final_dst_buf, *final_dst = NULL;
+ 	struct dst_entry *dst;
+ 	int err = 0;
+ 	unsigned int pktlength, jumbolen, mtu;
+-	struct in6_addr saddr;
+ 
+ 	if (opt && opt->srcrt) {
+ 		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+-		final_dst = fl->fl6_dst;
+-		fl->fl6_dst = rt0->addr;
++		ipv6_addr_copy(&final_dst_buf, &fl->fl6_dst);
++		final_dst = &final_dst_buf;
++		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
+ 	}
+ 
+-	if (!fl->oif && ipv6_addr_is_multicast(fl->nl_u.ip6_u.daddr))
++	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst))
+ 		fl->oif = np->mcast_oif;
+ 
+ 	dst = __sk_dst_check(sk, np->dst_cookie);
+@@ -545,9 +559,9 @@
+ 			 */
+ 
+ 		if (((rt->rt6i_dst.plen != 128 ||
+-		      ipv6_addr_cmp(fl->fl6_dst, &rt->rt6i_dst.addr))
++		      ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
+ 		     && (np->daddr_cache == NULL ||
+-			 ipv6_addr_cmp(fl->fl6_dst, np->daddr_cache)))
++			 ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
+ 		    || (fl->oif && fl->oif != dst->dev->ifindex)) {
+ 			dst = NULL;
+ 		} else
+@@ -563,8 +577,8 @@
+ 		return -ENETUNREACH;
+ 	}
+ 
+-	if (fl->fl6_src == NULL) {
+-		err = ipv6_get_saddr(dst, fl->fl6_dst, &saddr);
++	if (ipv6_addr_any(&fl->fl6_src)) {
++		err = ipv6_get_saddr(dst, &fl->fl6_dst, &fl->fl6_src);
+ 
+ 		if (err) {
+ #if IP6_DEBUG >= 2
+@@ -573,17 +587,23 @@
+ #endif
+ 			goto out;
+ 		}
+-		fl->fl6_src = &saddr;
+ 	}
+ 	pktlength = length;
+ 
++        if (dst) {
++		if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0) {
++			dst_release(dst);	
++			return -ENETUNREACH;
++		}
++        }
++
+ 	if (hlimit < 0) {
+-		if (ipv6_addr_is_multicast(fl->fl6_dst))
++		if (ipv6_addr_is_multicast(&fl->fl6_dst))
+ 			hlimit = np->mcast_hops;
+ 		else
+ 			hlimit = np->hop_limit;
+ 		if (hlimit < 0)
+-			hlimit = ((struct rt6_info*)dst)->rt6i_hoplimit;
++			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
+ 	}
+ 
+ 	jumbolen = 0;
+@@ -593,7 +613,7 @@
+ 		if (opt)
+ 			pktlength += opt->opt_flen + opt->opt_nflen;
+ 
+-		if (pktlength > 0xFFFF + sizeof(struct ipv6hdr)) {
++		if (pktlength > sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
+ 			/* Jumbo datagram.
+ 			   It is assumed, that in the case of hdrincl
+ 			   jumbo option is supplied by user.
+@@ -603,7 +623,7 @@
+ 		}
+ 	}
+ 
+-	mtu = dst->pmtu;
++	mtu = dst_pmtu(dst);
+ 	if (np->frag_size < mtu) {
+ 		if (np->frag_size)
+ 			mtu = np->frag_size;
+@@ -631,9 +651,8 @@
+ 		err = 0;
+ 		if (flags&MSG_PROBE)
+ 			goto out;
+-
+-		skb = sock_alloc_send_skb(sk, pktlength + 15 +
+-					  dev->hard_header_len,
++		/* alloc skb with mtu as we do in the IPv4 stack for IPsec */
++		skb = sock_alloc_send_skb(sk, mtu + LL_RESERVED_SPACE(dev),
+ 					  flags & MSG_DONTWAIT, &err);
+ 
+ 		if (skb == NULL) {
+@@ -664,6 +683,8 @@
+ 		err = getfrag(data, &hdr->saddr,
+ 			      ((char *) hdr) + (pktlength - length),
+ 			      0, length);
++		if (!opt || !opt->dst1opt)
++			skb->h.raw = ((char *) hdr) + (pktlength - length);
+ 
+ 		if (!err) {
+ 			IP6_INC_STATS(Ip6OutRequests);
+@@ -688,7 +709,9 @@
+ 	 *	cleanup
+ 	 */
+ out:
+-	ip6_dst_store(sk, dst, fl->nl_u.ip6_u.daddr == &np->daddr ? &np->daddr : NULL);
++	ip6_dst_store(sk, dst,
++		      !ipv6_addr_cmp(&fl->fl6_dst, &np->daddr) ?
++		      &np->daddr : NULL);
+ 	if (err > 0)
+ 		err = np->recverr ? net_xmit_errno(err) : 0;
+ 	return err;
+@@ -723,7 +746,7 @@
+ 
+ static inline int ip6_forward_finish(struct sk_buff *skb)
+ {
+-	return skb->dst->output(skb);
++	return dst_output(skb);
+ }
+ 
+ int ip6_forward(struct sk_buff *skb)
+@@ -735,6 +758,9 @@
+ 	if (ipv6_devconf.forwarding == 0)
+ 		goto error;
+ 
++	if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb))
++		goto drop;
++
+ 	skb->ip_summed = CHECKSUM_NONE;
+ 
+ 	/*
+@@ -769,6 +795,9 @@
+ 		return -ETIMEDOUT;
+ 	}
+ 
++	if (!xfrm6_route_forward(skb))
++		goto drop;
++
+ 	/* IPv6 specs say nothing about it, but it is clear that we cannot
+ 	   send redirects to source routed frames.
+ 	 */
+@@ -799,10 +828,10 @@
+ 		goto error;
+ 	}
+ 
+-	if (skb->len > dst->pmtu) {
++	if (skb->len > dst_pmtu(dst)) {
+ 		/* Again, force OUTPUT device used as source address */
+ 		skb->dev = dst->dev;
+-		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst->pmtu, skb->dev);
++		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_pmtu(dst), skb->dev);
+ 		IP6_INC_STATS_BH(Ip6InTooBigErrors);
+ 		kfree_skb(skb);
+ 		return -EMSGSIZE;
+@@ -825,4 +854,657 @@
+ drop:
+ 	kfree_skb(skb);
+ 	return -EINVAL;
++}
++
++static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
++{
++	to->pkt_type = from->pkt_type;
++	to->priority = from->priority;
++	to->protocol = from->protocol;
++	to->security = from->security;
++	to->dst = dst_clone(from->dst);
++	to->dev = from->dev;
++
++#ifdef CONFIG_NET_SCHED
++	to->tc_index = from->tc_index;
++#endif
++#ifdef CONFIG_NETFILTER
++	to->nfmark = from->nfmark;
++	/* Connection association is same as pre-frag packet */
++	to->nfct = from->nfct;
++	nf_conntrack_get(to->nfct);
++#ifdef CONFIG_NETFILTER_DEBUG
++	to->nf_debug = from->nf_debug;
++#endif
++#endif
++}
++
++int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
++{
++	u16 offset = sizeof(struct ipv6hdr);
++	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1);
++	unsigned int packet_len = skb->tail - skb->nh.raw;
++	int found_rhdr = 0;
++	*nexthdr = &skb->nh.ipv6h->nexthdr;
++
++	while (offset + 1 <= packet_len) {
++
++		switch (**nexthdr) {
++
++		case NEXTHDR_HOP:
++		case NEXTHDR_ROUTING:
++		case NEXTHDR_DEST:
++			if (**nexthdr == NEXTHDR_ROUTING) found_rhdr = 1;
++			if (**nexthdr == NEXTHDR_DEST && found_rhdr) return offset;
++			offset += ipv6_optlen(exthdr);
++			*nexthdr = &exthdr->nexthdr;
++			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
++			break;
++		default :
++			return offset;
++		}
++	}
++
++	return offset;
++}
++
++static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff*))
++{
++	struct net_device *dev;
++	struct rt6_info *rt = (struct rt6_info*)skb->dst;
++	struct sk_buff *frag;
++	struct ipv6hdr *tmp_hdr;
++	struct frag_hdr *fh;
++	unsigned int mtu, hlen, left, len;
++	u32 frag_id = 0;
++	int ptr, offset = 0, err=0;
++	u8 *prevhdr, nexthdr = 0;
++
++	dev = rt->u.dst.dev;
++	hlen = ip6_find_1stfragopt(skb, &prevhdr);
++	nexthdr = *prevhdr;
++
++	mtu = dst_pmtu(&rt->u.dst) - hlen - sizeof(struct frag_hdr);
++
++	if (skb_shinfo(skb)->frag_list) {
++		int first_len = skb_pagelen(skb);
++
++		if (first_len - hlen > mtu ||
++		    ((first_len - hlen) & 7) ||
++		    skb_cloned(skb))
++			goto slow_path;
++
++		for (frag = skb_shinfo(skb)->frag_list; frag; frag = frag->next) {
++			/* Correct geometry. */
++			if (frag->len > mtu ||
++			    ((frag->len & 7) && frag->next) ||
++			    skb_headroom(frag) < hlen)
++			    goto slow_path;
++
++			/* Correct socket ownership. */
++			if (frag->sk == NULL)
++				goto slow_path;
++
++			/* Partially cloned skb? */
++			if (skb_shared(frag))
++				goto slow_path;
++		}
++
++		err = 0;
++		offset = 0;
++		frag = skb_shinfo(skb)->frag_list;
++		skb_shinfo(skb)->frag_list = 0;
++		/* BUILD HEADER */
++
++		tmp_hdr = kmalloc(hlen, GFP_ATOMIC);
++		if (!tmp_hdr) {
++			IP6_INC_STATS(Ip6FragFails);
++			return -ENOMEM;
++		}
++
++		*prevhdr = NEXTHDR_FRAGMENT;
++		memcpy(tmp_hdr, skb->nh.raw, hlen);
++		__skb_pull(skb, hlen);
++		fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
++		skb->nh.raw = __skb_push(skb, hlen);
++		memcpy(skb->nh.raw, tmp_hdr, hlen);
++
++		ipv6_select_ident(skb, fh);
++		fh->nexthdr = nexthdr;
++		fh->reserved = 0;
++		fh->frag_off = htons(IP6_MF);
++		frag_id = fh->identification;
++
++		first_len = skb_pagelen(skb);
++		skb->data_len = first_len - skb_headlen(skb);
++		skb->len = first_len;
++		skb->nh.ipv6h->payload_len = htons(first_len - sizeof(struct ipv6hdr));
++ 
++
++		for (;;) {
++			/* Prepare header of the next frame,
++			 * before previous one went down. */
++			if (frag) {
++				frag->h.raw = frag->data;
++				fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
++				frag->nh.raw = __skb_push(frag, hlen);
++				memcpy(frag->nh.raw, tmp_hdr, hlen);
++				offset += skb->len - hlen - sizeof(struct frag_hdr);
++				fh->nexthdr = nexthdr;
++				fh->reserved = 0;
++				fh->frag_off = htons(offset);
++				if (frag->next != NULL)
++					fh->frag_off |= htons(IP6_MF);
++				fh->identification = frag_id;
++				frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
++				ip6_copy_metadata(frag, skb);
++			}
++			err = output(skb);
++
++			if (err || !frag)
++				break;
++
++			skb = frag;
++			frag = skb->next;
++			skb->next = NULL;
++		}
++
++		if (tmp_hdr)
++			kfree(tmp_hdr);
++
++		if (err == 0) {
++			IP6_INC_STATS(Ip6FragOKs);
++			return 0;
++		}
++
++		while (frag) {
++			skb = frag->next;
++			kfree_skb(frag);
++			frag = skb;
++		}
++
++		IP6_INC_STATS(Ip6FragFails);
++		return err;
++	}
++
++slow_path:
++	left = skb->len - hlen;		/* Space per frame */
++	ptr = hlen;			/* Where to start from */
++
++	/*
++	 *	Fragment the datagram.
++	 */
++
++	*prevhdr = NEXTHDR_FRAGMENT;
++
++	/*
++	 *	Keep copying data until we run out.
++	 */
++	while(left > 0)	{
++		len = left;
++		/* IF: it doesn't fit, use 'mtu' - the data space left */
++		if (len > mtu)
++			len = mtu;
++		/* IF: we are not sending upto and including the packet end
++		   then align the next start on an eight byte boundary */
++		if (len < left)	{
++			len &= ~7;
++		}
++		/*
++		 *	Allocate buffer.
++		 */
++
++		if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_RESERVED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
++			NETDEBUG(printk(KERN_INFO "IPv6: frag: no memory for new fragment!\n"));
++			err = -ENOMEM;
++			goto fail;
++		}
++
++		/*
++		 *	Set up data on packet
++		 */
++
++		ip6_copy_metadata(frag, skb);
++		skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
++		skb_put(frag, len + hlen + sizeof(struct frag_hdr));
++		frag->nh.raw = frag->data;
++		fh = (struct frag_hdr*)(frag->data + hlen);
++		frag->h.raw = frag->data + hlen + sizeof(struct frag_hdr);
++
++		/*
++		 *	Charge the memory for the fragment to any owner
++		 *	it might possess
++		 */
++		if (skb->sk)
++			skb_set_owner_w(frag, skb->sk);
++
++		/*
++		 *	Copy the packet header into the new buffer.
++		 */
++		memcpy(frag->nh.raw, skb->data, hlen);
++
++		/*
++		 *	Build fragment header.
++		 */
++		fh->nexthdr = nexthdr;
++		fh->reserved = 0;
++		if (frag_id) {
++			ipv6_select_ident(skb, fh);
++			frag_id = fh->identification;
++		} else
++			fh->identification = frag_id;
++
++		/*
++		 *	Copy a block of the IP datagram.
++		 */
++		if (skb_copy_bits(skb, ptr, frag->h.raw, len))
++			BUG();
++		left -= len;
++
++		fh->frag_off = htons(offset);
++		if (left > 0)
++			fh->frag_off |= htons(IP6_MF);
++		frag->nh.ipv6h->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
++
++		ptr += len;
++		offset += len;
++
++		/*
++		 *	Put this fragment into the sending queue.
++		 */
++
++		IP6_INC_STATS(Ip6FragCreates);
++
++		err = output(frag);
++		if (err)
++			goto fail;
++	}
++	kfree_skb(skb);
++	IP6_INC_STATS(Ip6FragOKs);
++	return err;
++
++fail:
++	kfree_skb(skb); 
++	IP6_INC_STATS(Ip6FragFails);
++	return err;
++}
++
++int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
++{
++	int err = 0;
++
++	if (sk) {
++		struct ipv6_pinfo *np = inet6_sk(sk);
++	
++		*dst = __sk_dst_check(sk, np->dst_cookie);
++		if (*dst) {
++			struct rt6_info *rt = (struct rt6_info*)*dst;
++	
++				/* Yes, checking route validity in not connected
++				   case is not very simple. Take into account,
++				   that we do not support routing by source, TOS,
++				   and MSG_DONTROUTE 		--ANK (980726)
++	
++				   1. If route was host route, check that
++				      cached destination is current.
++				      If it is network route, we still may
++				      check its validity using saved pointer
++				      to the last used address: daddr_cache.
++				      We do not want to save whole address now,
++				      (because main consumer of this service
++				       is tcp, which has not this problem),
++				      so that the last trick works only on connected
++				      sockets.
++				   2. oif also should be the same.
++				 */
++	
++			if (((rt->rt6i_dst.plen != 128 ||
++			      ipv6_addr_cmp(&fl->fl6_dst, &rt->rt6i_dst.addr))
++			     && (np->daddr_cache == NULL ||
++				 ipv6_addr_cmp(&fl->fl6_dst, np->daddr_cache)))
++			    || (fl->oif && fl->oif != (*dst)->dev->ifindex)) {
++				*dst = NULL;
++			} else
++				dst_hold(*dst);
++		}
++	}
++
++	if (*dst == NULL)
++		*dst = ip6_route_output(sk, fl);
++
++	if ((err = (*dst)->error))
++		goto out_err_release;
++
++	if (ipv6_addr_any(&fl->fl6_src)) {
++		err = ipv6_get_saddr(*dst, &fl->fl6_dst, &fl->fl6_src);
++
++		if (err) {
++#if IP6_DEBUG >= 2
++			printk(KERN_DEBUG "ip6_dst_lookup: "
++			       "no availiable source address\n");
++#endif
++			goto out_err_release;
++		}
++	}
++	if ((err = xfrm_lookup(dst, fl, sk, 0)) < 0) {
++		err = -ENETUNREACH;
++		goto out_err_release;
++        }
++
++	return 0;
++
++out_err_release:
++	dst_release(*dst);
++	*dst = NULL;
++	return err;
++}
++
++int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb),
++		    void *from, int length, int transhdrlen,
++		    int hlimit, struct ipv6_txoptions *opt, struct flowi *fl, struct rt6_info *rt,
++		    unsigned int flags)
++{
++	struct inet_opt *inet = inet_sk(sk);
++	struct ipv6_pinfo *np = inet6_sk(sk);
++	struct sk_buff *skb;
++	unsigned int maxfraglen, fragheaderlen;
++	int exthdrlen;
++	int hh_len;
++	int mtu;
++	int copy = 0;
++	int err;
++	int offset = 0;
++	int csummode = CHECKSUM_NONE;
++
++	if (flags&MSG_PROBE)
++		return 0;
++	if (skb_queue_empty(&sk->write_queue)) {
++		/*
++		 * setup for corking
++		 */
++		if (opt) {
++			if (np->cork.opt == NULL) {
++				np->cork.opt = kmalloc(opt->tot_len, sk->allocation);
++				if (unlikely(np->cork.opt == NULL))
++					return -ENOBUFS;
++			}
++			memcpy(np->cork.opt, opt, opt->tot_len);
++			inet->cork.flags |= IPCORK_OPT;
++			/* need source address above miyazawa*/
++		}
++		dst_hold(&rt->u.dst);
++		np->cork.rt = rt;
++		np->cork.fl = *fl;
++		np->cork.hop_limit = hlimit;
++		inet->cork.fragsize = mtu = dst_pmtu(&rt->u.dst);
++		inet->cork.length = 0;
++		inet->sndmsg_page = NULL;
++		inet->sndmsg_off = 0;
++		exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0);
++		length += exthdrlen;
++		transhdrlen += exthdrlen;
++	} else {
++		rt = np->cork.rt;
++		if (inet->cork.flags & IPCORK_OPT)
++			opt = np->cork.opt;
++		transhdrlen = 0;
++		exthdrlen = 0;
++		mtu = inet->cork.fragsize;
++	}
++
++	hh_len = (rt->u.dst.dev->hard_header_len&~15) + 16;
++
++	fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0);
++	maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
++
++	if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
++		if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
++			ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
++			return -EMSGSIZE;
++		}
++	}
++
++	inet->cork.length += length;
++
++	if ((skb = skb_peek_tail(&sk->write_queue)) == NULL)
++		goto alloc_new_skb;
++
++	while (length > 0) {
++		if ((copy = maxfraglen - skb->len) <= 0) {
++			char *data;
++			unsigned int datalen;
++			unsigned int fraglen;
++			unsigned int alloclen;
++			BUG_TRAP(copy == 0);
++alloc_new_skb:
++			datalen = maxfraglen - fragheaderlen;
++			if (datalen > length)
++				datalen = length;
++			fraglen = datalen + fragheaderlen;
++			if ((flags & MSG_MORE) &&
++			    !(rt->u.dst.dev->features&NETIF_F_SG))
++				alloclen = maxfraglen;
++			else
++				alloclen = fraglen;
++			alloclen += sizeof(struct frag_hdr);
++			if (transhdrlen) {
++				skb = sock_alloc_send_skb(sk,
++						alloclen + hh_len + 15,
++						(flags & MSG_DONTWAIT), &err);
++			} else {
++				skb = NULL;
++				if (atomic_read(&sk->wmem_alloc) <= 2*sk->sndbuf)
++					skb = sock_wmalloc(sk,
++							   alloclen + hh_len + 15, 1,
++							   sk->allocation);
++				if (unlikely(skb == NULL))
++					err = -ENOBUFS;
++			}
++			if (skb == NULL)
++				goto error;
++			/*
++			 *	Fill in the control structures
++			 */
++			skb->ip_summed = csummode;
++			skb->csum = 0;
++			/* reserve 8 byte for fragmentation */
++			skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
++
++			/*
++			 *	Find where to start putting bytes
++			 */
++			data = skb_put(skb, fraglen);
++			skb->nh.raw = data + exthdrlen;
++			data += fragheaderlen;
++			skb->h.raw = data + exthdrlen;
++			copy = datalen - transhdrlen;
++			if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, 0, skb) < 0) {
++				err = -EFAULT;
++				kfree_skb(skb);
++				goto error;
++			}
++
++			offset += copy;
++			length -= datalen;
++			transhdrlen = 0;
++			exthdrlen = 0;
++			csummode = CHECKSUM_NONE;
++
++			/*
++			 * Put the packet on the pending queue
++			 */
++			__skb_queue_tail(&sk->write_queue, skb);
++			continue;
++		}
++
++		if (copy > length)
++			copy = length;
++
++		if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
++			unsigned int off;
++
++			off = skb->len;
++			if (getfrag(from, skb_put(skb, copy),
++						offset, copy, off, skb) < 0) {
++				__skb_trim(skb, off);
++				err = -EFAULT;
++				goto error;
++			}
++		} else {
++			int i = skb_shinfo(skb)->nr_frags;
++			skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
++			struct page *page = inet->sndmsg_page;
++			int off = inet->sndmsg_off;
++			unsigned int left;
++
++			if (page && (left = PAGE_SIZE - off) > 0) {
++				if (copy >= left)
++					copy = left;
++				if (page != frag->page) {
++					if (i == MAX_SKB_FRAGS) {
++						err = -EMSGSIZE;
++						goto error;
++					}
++					get_page(page);
++					skb_fill_page_desc(skb, i, page, inet->sndmsg_off, 0);
++					frag = &skb_shinfo(skb)->frags[i];
++				}
++			} else if(i < MAX_SKB_FRAGS) {
++				if (copy > PAGE_SIZE)
++					copy = PAGE_SIZE;
++				page = alloc_pages(sk->allocation, 0);
++				if (page == NULL) {
++					err = -ENOMEM;
++					goto error;
++				}
++				inet->sndmsg_page = page;
++				inet->sndmsg_off = 0;
++
++				skb_fill_page_desc(skb, i, page, 0, 0);
++				frag = &skb_shinfo(skb)->frags[i];
++				skb->truesize += PAGE_SIZE;
++				atomic_add(PAGE_SIZE, &sk->wmem_alloc);
++			} else {
++				err = -EMSGSIZE;
++				goto error;
++			}
++			if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
++				err = -EFAULT;
++				goto error;
++			}
++			inet->sndmsg_off += copy;
++			frag->size += copy;
++			skb->len += copy;
++			skb->data_len += copy;
++		}
++		offset += copy;
++		length -= copy;
++	}
++	return 0;
++error:
++	inet->cork.length -= length;
++	IP6_INC_STATS(Ip6OutDiscards);
++	return err;
++}
++
++int ip6_push_pending_frames(struct sock *sk)
++{
++	struct sk_buff *skb, *tmp_skb;
++	struct sk_buff **tail_skb;
++	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
++	struct inet_opt *inet = inet_sk(sk);
++	struct ipv6_pinfo *np = inet6_sk(sk);
++	struct ipv6hdr *hdr;
++	struct ipv6_txoptions *opt = np->cork.opt;
++	struct rt6_info *rt = np->cork.rt;
++	struct flowi *fl = &np->cork.fl;
++	unsigned char proto = fl->proto;
++	int err = 0;
++
++	if ((skb = __skb_dequeue(&sk->write_queue)) == NULL)
++		goto out;
++	tail_skb = &(skb_shinfo(skb)->frag_list);
++
++	/* move skb->data to ip header from ext header */
++	if (skb->data < skb->nh.raw)
++		__skb_pull(skb, skb->nh.raw - skb->data);
++	while ((tmp_skb = __skb_dequeue(&sk->write_queue)) != NULL) {
++		__skb_pull(tmp_skb, skb->h.raw - skb->nh.raw);
++		*tail_skb = tmp_skb;
++		tail_skb = &(tmp_skb->next);
++		skb->len += tmp_skb->len;
++		skb->data_len += tmp_skb->len;
++#if 0 /* Logically correct, but useless work, ip_fragment() will have to undo */
++		skb->truesize += tmp_skb->truesize;
++		__sock_put(tmp_skb->sk);
++		tmp_skb->destructor = NULL;
++		tmp_skb->sk = NULL;
++#endif
++	}
++
++	ipv6_addr_copy(final_dst, &fl->fl6_dst);
++	__skb_pull(skb, skb->h.raw - skb->nh.raw);
++	if (opt && opt->opt_flen)
++		ipv6_push_frag_opts(skb, opt, &proto);
++	if (opt && opt->opt_nflen)
++		ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
++
++	skb->nh.ipv6h = hdr = (struct ipv6hdr*) skb_push(skb, sizeof(struct ipv6hdr));
++	
++	*(u32*)hdr = fl->fl6_flowlabel | htonl(0x60000000);
++
++	if (skb->len <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN)
++		hdr->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++	else
++		hdr->payload_len = 0;
++	hdr->hop_limit = np->cork.hop_limit;
++	hdr->nexthdr = proto;
++	ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
++	ipv6_addr_copy(&hdr->daddr, final_dst);
++
++	skb->dst = dst_clone(&rt->u.dst);
++	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, dst_output);
++	if (err) {
++		if (err > 0)
++			err = inet->recverr ? net_xmit_errno(err) : 0;
++		if (err)
++			goto error;
++	}
++
++out:
++	inet->cork.flags &= ~IPCORK_OPT;
++	if (np->cork.opt) {
++		kfree(np->cork.opt);
++		np->cork.opt = NULL;
++	}
++	if (np->cork.rt) {
++		np->cork.rt = NULL;
++	}
++	memset(&np->cork.fl, 0, sizeof(np->cork.fl));
++	return err;
++error:
++	goto out;
++}
++
++void ip6_flush_pending_frames(struct sock *sk)
++{
++	struct inet_opt *inet = inet_sk(sk);
++	struct ipv6_pinfo *np = inet6_sk(sk);
++	struct sk_buff *skb;
++
++	while ((skb = __skb_dequeue_tail(&sk->write_queue)) != NULL)
++		kfree_skb(skb);
++
++	inet->cork.flags &= ~IPCORK_OPT;
++
++	if (np->cork.opt) {
++		kfree(np->cork.opt);
++		np->cork.opt = NULL;
++	}
++	if (np->cork.rt) {
++		dst_release(&np->cork.rt->u.dst);
++		dst_release(&np->cork.rt->u.dst);
++		np->cork.rt = NULL;
++	}
++	memset(&np->cork.fl, 0, sizeof(np->cork.fl));
+ }
+diff -Nru a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/ip6_tunnel.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,1146 @@
++/*
++ *	IPv6 over IPv6 tunnel device
++ *	Linux INET6 implementation
++ *
++ *	Authors:
++ *	Ville Nuorvala		<vnuorval at tcs.hut.fi>	
++ *
++ *	$Id$
++ *
++ *      Based on:
++ *      linux/net/ipv6/sit.c
++ *
++ *      RFC 2473
++ *
++ *	This program is free software; you can redistribute it and/or
++ *      modify it under the terms of the GNU General Public License
++ *      as published by the Free Software Foundation; either version
++ *      2 of the License, or (at your option) any later version.
++ *
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/errno.h>
++#include <linux/types.h>
++#include <linux/sockios.h>
++#include <linux/if.h>
++#include <linux/in.h>
++#include <linux/ip.h>
++#include <linux/if_tunnel.h>
++#include <linux/net.h>
++#include <linux/in6.h>
++#include <linux/netdevice.h>
++#include <linux/if_arp.h>
++#include <linux/icmpv6.h>
++#include <linux/init.h>
++#include <linux/route.h>
++#include <linux/rtnetlink.h>
++#include <linux/netfilter_ipv6.h>
++
++#include <asm/uaccess.h>
++#include <asm/atomic.h>
++
++#include <net/ip.h>
++#include <net/ipv6.h>
++#include <net/protocol.h>
++#include <net/ip6_route.h>
++#include <net/addrconf.h>
++#include <net/ip6_tunnel.h>
++#include <net/xfrm.h>
++
++MODULE_AUTHOR("Ville Nuorvala");
++MODULE_DESCRIPTION("IPv6-in-IPv6 tunnel");
++MODULE_LICENSE("GPL");
++
++#define IPV6_TLV_TEL_DST_SIZE 8
++
++#ifdef IP6_TNL_DEBUG
++#define IP6_TNL_TRACE(x...) printk(KERN_DEBUG "%s:" x "\n", __FUNCTION__)
++#else
++#define IP6_TNL_TRACE(x...) do {;} while(0)
++#endif
++
++#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK)
++
++#define HASH_SIZE  32
++
++#define HASH(addr) (((addr)->s6_addr32[0] ^ (addr)->s6_addr32[1] ^ \
++	             (addr)->s6_addr32[2] ^ (addr)->s6_addr32[3]) & \
++                    (HASH_SIZE - 1))
++
++static int ip6ip6_fb_tnl_dev_init(struct net_device *dev);
++static int ip6ip6_tnl_dev_init(struct net_device *dev);
++static void ip6ip6_tnl_dev_setup(struct net_device *dev);
++
++/* the IPv6 tunnel fallback device */
++static struct net_device *ip6ip6_fb_tnl_dev;
++
++
++/* lists for storing tunnels in use */
++static struct ip6_tnl *tnls_r_l[HASH_SIZE];
++static struct ip6_tnl *tnls_wc[1];
++static struct ip6_tnl **tnls[2] = { tnls_wc, tnls_r_l };
++
++/* lock for the tunnel lists */
++static rwlock_t ip6ip6_lock = RW_LOCK_UNLOCKED;
++
++static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t)
++{
++	struct dst_entry *dst = t->dst_cache;
++
++	if (dst && dst->obsolete && 
++	    dst->ops->check(dst, t->dst_cookie) == NULL) {
++		t->dst_cache = NULL;
++		return NULL;
++	}
++
++	return dst;
++}
++
++static inline void ip6_tnl_dst_reset(struct ip6_tnl *t)
++{
++	dst_release(t->dst_cache);
++	t->dst_cache = NULL;
++}
++
++static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst)
++{
++	struct rt6_info *rt = (struct rt6_info *) dst;
++	t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0;
++	dst_release(t->dst_cache);
++	t->dst_cache = dst;
++}
++
++/**
++ * ip6ip6_tnl_lookup - fetch tunnel matching the end-point addresses
++ *   @remote: the address of the tunnel exit-point 
++ *   @local: the address of the tunnel entry-point 
++ *
++ * Return:  
++ *   tunnel matching given end-points if found,
++ *   else fallback tunnel if its device is up, 
++ *   else %NULL
++ **/
++
++static struct ip6_tnl *
++ip6ip6_tnl_lookup(struct in6_addr *remote, struct in6_addr *local)
++{
++	unsigned h0 = HASH(remote);
++	unsigned h1 = HASH(local);
++	struct ip6_tnl *t;
++
++	for (t = tnls_r_l[h0 ^ h1]; t; t = t->next) {
++		if (!ipv6_addr_cmp(local, &t->parms.laddr) &&
++		    !ipv6_addr_cmp(remote, &t->parms.raddr) &&
++		    (t->dev->flags & IFF_UP))
++			return t;
++	}
++	if ((t = tnls_wc[0]) != NULL && (t->dev->flags & IFF_UP))
++		return t;
++
++	return NULL;
++}
++
++/**
++ * ip6ip6_bucket - get head of list matching given tunnel parameters
++ *   @p: parameters containing tunnel end-points 
++ *
++ * Description:
++ *   ip6ip6_bucket() returns the head of the list matching the 
++ *   &struct in6_addr entries laddr and raddr in @p.
++ *
++ * Return: head of IPv6 tunnel list 
++ **/
++
++static struct ip6_tnl **
++ip6ip6_bucket(struct ip6_tnl_parm *p)
++{
++	struct in6_addr *remote = &p->raddr;
++	struct in6_addr *local = &p->laddr;
++	unsigned h = 0;
++	int prio = 0;
++
++	if (!ipv6_addr_any(remote) || !ipv6_addr_any(local)) {
++		prio = 1;
++		h = HASH(remote) ^ HASH(local);
++	}
++	return &tnls[prio][h];
++}
++
++/**
++ * ip6ip6_tnl_link - add tunnel to hash table
++ *   @t: tunnel to be added
++ **/
++
++static void
++ip6ip6_tnl_link(struct ip6_tnl *t)
++{
++	struct ip6_tnl **tp = ip6ip6_bucket(&t->parms);
++
++	write_lock_bh(&ip6ip6_lock);
++	t->next = *tp;
++	write_unlock_bh(&ip6ip6_lock);
++	*tp = t;
++}
++
++/**
++ * ip6ip6_tnl_unlink - remove tunnel from hash table
++ *   @t: tunnel to be removed
++ **/
++
++static void
++ip6ip6_tnl_unlink(struct ip6_tnl *t)
++{
++	struct ip6_tnl **tp;
++
++	for (tp = ip6ip6_bucket(&t->parms); *tp; tp = &(*tp)->next) {
++		if (t == *tp) {
++			write_lock_bh(&ip6ip6_lock);
++			*tp = t->next;
++			write_unlock_bh(&ip6ip6_lock);
++			break;
++		}
++	}
++}
++
++/**
++ * ip6_tnl_create() - create a new tunnel
++ *   @p: tunnel parameters
++ *   @pt: pointer to new tunnel
++ *
++ * Description:
++ *   Create tunnel matching given parameters.
++ * 
++ * Return: 
++ *   0 on success
++ **/
++
++static int
++ip6_tnl_create(struct ip6_tnl_parm *p, struct ip6_tnl **pt)
++{
++	struct net_device *dev;
++	struct ip6_tnl *t;
++	char name[IFNAMSIZ];
++	int err;
++
++	if (p->name[0]) {
++		memcpy(name, p->name, IFNAMSIZ - 1);
++		name[IFNAMSIZ - 1] = 0;
++	} else {
++		int i;
++		for (i = 1; i < IP6_TNL_MAX; i++) {
++			sprintf(name, "ip6tnl%d", i);
++			if (__dev_get_by_name(name) == NULL)
++				break;
++		}
++		if (i == IP6_TNL_MAX) 
++			return -ENOBUFS;
++	}
++	dev = alloc_netdev(sizeof (*t), name, ip6ip6_tnl_dev_setup);
++	if (dev == NULL)
++		return -ENOMEM;
++
++	t = dev->priv;
++	dev->init = ip6ip6_tnl_dev_init;
++	t->parms = *p;
++
++	if ((err = register_netdevice(dev)) < 0) {
++		kfree(dev);
++		return err;
++	}
++	dev_hold(dev);
++
++	ip6ip6_tnl_link(t);
++	*pt = t;
++	return 0;
++}
++
++/**
++ * ip6ip6_tnl_locate - find or create tunnel matching given parameters
++ *   @p: tunnel parameters 
++ *   @create: != 0 if allowed to create new tunnel if no match found
++ *
++ * Description:
++ *   ip6ip6_tnl_locate() first tries to locate an existing tunnel
++ *   based on @parms. If this is unsuccessful, but @create is set a new
++ *   tunnel device is created and registered for use.
++ *
++ * Return:
++ *   0 if tunnel located or created,
++ *   -EINVAL if parameters incorrect,
++ *   -ENODEV if no matching tunnel available
++ **/
++
++static int
++ip6ip6_tnl_locate(struct ip6_tnl_parm *p, struct ip6_tnl **pt, int create)
++{
++	struct in6_addr *remote = &p->raddr;
++	struct in6_addr *local = &p->laddr;
++	struct ip6_tnl *t;
++
++	if (p->proto != IPPROTO_IPV6)
++		return -EINVAL;
++
++	for (t = *ip6ip6_bucket(p); t; t = t->next) {
++		if (!ipv6_addr_cmp(local, &t->parms.laddr) &&
++		    !ipv6_addr_cmp(remote, &t->parms.raddr)) {
++			*pt = t;
++			return (create ? -EEXIST : 0);
++		}
++	}
++	if (!create)
++		return -ENODEV;
++	
++	return ip6_tnl_create(p, pt);
++}
++
++/**
++ * ip6ip6_tnl_dev_uninit - tunnel device uninitializer
++ *   @dev: the device to be destroyed
++ *   
++ * Description:
++ *   ip6ip6_tnl_dev_uninit() removes tunnel from its list
++ **/
++
++static void
++ip6ip6_tnl_dev_uninit(struct net_device *dev)
++{
++	struct ip6_tnl *t = dev->priv;
++
++	if (dev == ip6ip6_fb_tnl_dev) {
++		write_lock_bh(&ip6ip6_lock);
++		tnls_wc[0] = NULL;
++		write_unlock_bh(&ip6ip6_lock);
++	} else {
++		ip6ip6_tnl_unlink(t);
++	}
++	ip6_tnl_dst_reset(t);
++	dev_put(dev);
++}
++
++/**
++ * parse_tvl_tnl_enc_lim - handle encapsulation limit option
++ *   @skb: received socket buffer
++ *
++ * Return: 
++ *   0 if none was found, 
++ *   else index to encapsulation limit
++ **/
++
++static __u16
++parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw)
++{
++	struct ipv6hdr *ipv6h = (struct ipv6hdr *) raw;
++	__u8 nexthdr = ipv6h->nexthdr;
++	__u16 off = sizeof (*ipv6h);
++
++	while (ipv6_ext_hdr(nexthdr) && nexthdr != NEXTHDR_NONE) {
++		__u16 optlen = 0;
++		struct ipv6_opt_hdr *hdr;
++		if (raw + off + sizeof (*hdr) > skb->data &&
++		    !pskb_may_pull(skb, raw - skb->data + off + sizeof (*hdr)))
++			break;
++
++		hdr = (struct ipv6_opt_hdr *) (raw + off);
++		if (nexthdr == NEXTHDR_FRAGMENT) {
++			struct frag_hdr *frag_hdr = (struct frag_hdr *) hdr;
++			if (frag_hdr->frag_off)
++				break;
++			optlen = 8;
++		} else if (nexthdr == NEXTHDR_AUTH) {
++			optlen = (hdr->hdrlen + 2) << 2;
++		} else {
++			optlen = ipv6_optlen(hdr);
++		}
++		if (nexthdr == NEXTHDR_DEST) {
++			__u16 i = off + 2;
++			while (1) {
++				struct ipv6_tlv_tnl_enc_lim *tel;
++
++				/* No more room for encapsulation limit */
++				if (i + sizeof (*tel) > off + optlen)
++					break;
++
++				tel = (struct ipv6_tlv_tnl_enc_lim *) &raw[i];
++				/* return index of option if found and valid */
++				if (tel->type == IPV6_TLV_TNL_ENCAP_LIMIT &&
++				    tel->length == 1)
++					return i;
++				/* else jump to next option */
++				if (tel->type)
++					i += tel->length + 2;
++				else
++					i++;
++			}
++		}
++		nexthdr = hdr->nexthdr;
++		off += optlen;
++	}
++	return 0;
++}
++
++/**
++ * ip6ip6_err - tunnel error handler
++ *
++ * Description:
++ *   ip6ip6_err() should handle errors in the tunnel according
++ *   to the specifications in RFC 2473.
++ **/
++
++static void 
++ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
++	   int type, int code, int offset, __u32 info)
++{
++	struct ipv6hdr *ipv6h = (struct ipv6hdr *) skb->data;
++	struct ip6_tnl *t;
++	int rel_msg = 0;
++	int rel_type = ICMPV6_DEST_UNREACH;
++	int rel_code = ICMPV6_ADDR_UNREACH;
++	__u32 rel_info = 0;
++	__u16 len;
++
++	/* If the packet doesn't contain the original IPv6 header we are 
++	   in trouble since we might need the source address for furter 
++	   processing of the error. */
++
++	read_lock(&ip6ip6_lock);
++	if ((t = ip6ip6_tnl_lookup(&ipv6h->daddr, &ipv6h->saddr)) == NULL)
++		goto out;
++
++	switch (type) {
++		__u32 teli;
++		struct ipv6_tlv_tnl_enc_lim *tel;
++		__u32 mtu;
++	case ICMPV6_DEST_UNREACH:
++		if (net_ratelimit())
++			printk(KERN_WARNING
++			       "%s: Path to destination invalid "
++			       "or inactive!\n", t->parms.name);
++		rel_msg = 1;
++		break;
++	case ICMPV6_TIME_EXCEED:
++		if (code == ICMPV6_EXC_HOPLIMIT) {
++			if (net_ratelimit())
++				printk(KERN_WARNING
++				       "%s: Too small hop limit or "
++				       "routing loop in tunnel!\n", 
++				       t->parms.name);
++			rel_msg = 1;
++		}
++		break;
++	case ICMPV6_PARAMPROB:
++		/* ignore if parameter problem not caused by a tunnel
++		   encapsulation limit sub-option */
++		if (code != ICMPV6_HDR_FIELD) {
++			break;
++		}
++		teli = parse_tlv_tnl_enc_lim(skb, skb->data);
++
++		if (teli && teli == ntohl(info) - 2) {
++			tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli];
++			if (tel->encap_limit == 0) {
++				if (net_ratelimit())
++					printk(KERN_WARNING
++					       "%s: Too small encapsulation "
++					       "limit or routing loop in "
++					       "tunnel!\n", t->parms.name);
++				rel_msg = 1;
++			}
++		}
++		break;
++	case ICMPV6_PKT_TOOBIG:
++		mtu = ntohl(info) - offset;
++		if (mtu < IPV6_MIN_MTU)
++			mtu = IPV6_MIN_MTU;
++		t->dev->mtu = mtu;
++
++		if ((len = sizeof (*ipv6h) + ipv6h->payload_len) > mtu) {
++			rel_type = ICMPV6_PKT_TOOBIG;
++			rel_code = 0;
++			rel_info = mtu;
++			rel_msg = 1;
++		}
++		break;
++	}
++	if (rel_msg &&  pskb_may_pull(skb, offset + sizeof (*ipv6h))) {
++		struct rt6_info *rt;
++		struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
++		if (!skb2)
++			goto out;
++
++		dst_release(skb2->dst);
++		skb2->dst = NULL;
++		skb_pull(skb2, offset);
++		skb2->nh.raw = skb2->data;
++
++		/* Try to guess incoming interface */
++		rt = rt6_lookup(&skb2->nh.ipv6h->saddr, NULL, 0, 0);
++
++		if (rt && rt->rt6i_dev)
++			skb2->dev = rt->rt6i_dev;
++
++		icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev);
++
++		if (rt)
++			dst_release(&rt->u.dst);
++
++		kfree_skb(skb2);
++	}
++out:
++	read_unlock(&ip6ip6_lock);
++}
++
++/**
++ * ip6ip6_rcv - decapsulate IPv6 packet and retransmit it locally
++ *   @skb: received socket buffer
++ *
++ * Return: 0
++ **/
++
++static int 
++ip6ip6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
++{
++	struct sk_buff *skb = *pskb;
++	struct ipv6hdr *ipv6h;
++	struct ip6_tnl *t;
++
++	if (!pskb_may_pull(skb, sizeof (*ipv6h)))
++		goto discard;
++
++	ipv6h = skb->nh.ipv6h;
++
++	read_lock(&ip6ip6_lock);
++
++	if ((t = ip6ip6_tnl_lookup(&ipv6h->saddr, &ipv6h->daddr)) != NULL) {
++		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
++			kfree_skb(skb);
++			return 0;
++		}
++
++		if (!(t->parms.flags & IP6_TNL_F_CAP_RCV)) {
++			t->stat.rx_dropped++;
++			read_unlock(&ip6ip6_lock);
++			goto discard;
++		}
++		secpath_reset(skb);
++		skb->mac.raw = skb->nh.raw;
++		skb->nh.raw = skb->data;
++		skb->protocol = htons(ETH_P_IPV6);
++		skb->pkt_type = PACKET_HOST;
++		memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
++		skb->dev = t->dev;
++		dst_release(skb->dst);
++		skb->dst = NULL;
++		t->stat.rx_packets++;
++		t->stat.rx_bytes += skb->len;
++		netif_rx(skb);
++		read_unlock(&ip6ip6_lock);
++		return 0;
++	}
++	read_unlock(&ip6ip6_lock);
++	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev);
++discard:
++	return 1;
++}
++
++static inline struct ipv6_txoptions *create_tel(__u8 encap_limit)
++{
++	struct ipv6_tlv_tnl_enc_lim *tel;
++	struct ipv6_txoptions *opt;
++	__u8 *raw;
++
++	int opt_len = sizeof(*opt) + 8;
++
++	if (!(opt = kmalloc(opt_len, GFP_ATOMIC))) {
++		return NULL;
++	}
++	memset(opt, 0, opt_len);
++	opt->tot_len = opt_len;
++	opt->dst0opt = (struct ipv6_opt_hdr *) (opt + 1);
++	opt->opt_nflen = 8;
++
++	tel = (struct ipv6_tlv_tnl_enc_lim *) (opt->dst0opt + 1);
++	tel->type = IPV6_TLV_TNL_ENCAP_LIMIT;
++	tel->length = 1;
++	tel->encap_limit = encap_limit;
++
++	raw = (__u8 *) opt->dst0opt;
++	raw[5] = IPV6_TLV_PADN;
++	raw[6] = 1;
++
++	return opt;
++}
++
++/**
++ * ip6ip6_tnl_addr_conflict - compare packet addresses to tunnel's own
++ *   @t: the outgoing tunnel device
++ *   @hdr: IPv6 header from the incoming packet 
++ *
++ * Description:
++ *   Avoid trivial tunneling loop by checking that tunnel exit-point 
++ *   doesn't match source of incoming packet.
++ *
++ * Return: 
++ *   1 if conflict,
++ *   0 else
++ **/
++
++static inline int
++ip6ip6_tnl_addr_conflict(struct ip6_tnl *t, struct ipv6hdr *hdr)
++{
++	return !ipv6_addr_cmp(&t->parms.raddr, &hdr->saddr);
++}
++
++/**
++ * ip6ip6_tnl_xmit - encapsulate packet and send 
++ *   @skb: the outgoing socket buffer
++ *   @dev: the outgoing tunnel device 
++ *
++ * Description:
++ *   Build new header and do some sanity checks on the packet before sending
++ *   it.
++ *
++ * Return: 
++ *   0
++ **/
++
++static int 
++ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
++{
++	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
++	struct net_device_stats *stats = &t->stat;
++	struct ipv6hdr *ipv6h = skb->nh.ipv6h;
++	struct ipv6_txoptions *opt = NULL;
++	int encap_limit = -1;
++	__u16 offset;
++	struct flowi fl;
++	struct dst_entry *dst;
++	struct net_device *tdev;
++	int mtu;
++	int max_headroom = sizeof(struct ipv6hdr);
++	u8 proto;
++	int err;
++	int pkt_len;
++
++	if (t->recursion++) {
++		stats->collisions++;
++		goto tx_err;
++	}
++	if (skb->protocol != htons(ETH_P_IPV6) ||
++	    !(t->parms.flags & IP6_TNL_F_CAP_XMIT) ||
++	    ip6ip6_tnl_addr_conflict(t, ipv6h)) {
++		goto tx_err;
++	}
++	if ((offset = parse_tlv_tnl_enc_lim(skb, skb->nh.raw)) > 0) {
++		struct ipv6_tlv_tnl_enc_lim *tel;
++		tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->nh.raw[offset];
++		if (tel->encap_limit == 0) {
++			icmpv6_send(skb, ICMPV6_PARAMPROB,
++				    ICMPV6_HDR_FIELD, offset + 2, skb->dev);
++			goto tx_err;
++		}
++		encap_limit = tel->encap_limit - 1;
++	} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
++		encap_limit = t->parms.encap_limit;
++	}
++	memcpy(&fl, &t->fl, sizeof (fl));
++	proto = fl.proto;
++
++	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS))
++		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_TCLASS_MASK);
++	if ((t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL))
++		fl.fl6_flowlabel |= (*(__u32 *) ipv6h & IPV6_FLOWLABEL_MASK);
++
++	if (encap_limit >= 0 && (opt = create_tel(encap_limit)) == NULL)
++		goto tx_err;
++
++	if ((dst = ip6_tnl_dst_check(t)) != NULL)
++		dst_hold(dst);
++	else
++		dst = ip6_route_output(NULL, &fl);
++
++	if (dst->error || xfrm_lookup(&dst, &fl, NULL, 0) < 0)
++		goto tx_err_link_failure;
++
++	tdev = dst->dev;
++
++	if (tdev == dev) {
++		stats->collisions++;
++		if (net_ratelimit())
++			printk(KERN_WARNING 
++			       "%s: Local routing loop detected!\n",
++			       t->parms.name);
++		goto tx_err_dst_release;
++	}
++	mtu = dst_pmtu(dst) - sizeof (*ipv6h);
++	if (opt) {
++		max_headroom += 8;
++		mtu -= 8;
++	}
++	if (mtu < IPV6_MIN_MTU)
++		mtu = IPV6_MIN_MTU;
++	if (skb->dst && mtu < dst_pmtu(skb->dst)) {
++		struct rt6_info *rt = (struct rt6_info *) skb->dst;
++		rt->rt6i_flags |= RTF_MODIFIED;
++		rt->u.dst.metrics[RTAX_MTU-1] = mtu;
++	}
++	if (skb->len > mtu) {
++		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
++		goto tx_err_dst_release;
++	}
++
++	/*
++	 * Okay, now see if we can stuff it in the buffer as-is.
++	 */
++	max_headroom += LL_RESERVED_SPACE(tdev);
++	
++	if (skb_headroom(skb) < max_headroom || 
++	    skb_cloned(skb) || skb_shared(skb)) {
++		struct sk_buff *new_skb;
++		
++		if (!(new_skb = skb_realloc_headroom(skb, max_headroom)))
++			goto tx_err_dst_release;
++
++		if (skb->sk)
++			skb_set_owner_w(new_skb, skb->sk);
++		kfree_skb(skb);
++		skb = new_skb;
++	}
++	dst_release(skb->dst);
++	skb->dst = dst_clone(dst);
++
++	skb->h.raw = skb->nh.raw;
++
++	if (opt)
++		ipv6_push_nfrag_opts(skb, opt, &proto, NULL);
++
++	skb->nh.raw = skb_push(skb, sizeof(struct ipv6hdr));
++	ipv6h = skb->nh.ipv6h;
++	*(u32*)ipv6h = fl.fl6_flowlabel | htonl(0x60000000);
++	ipv6h->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++	ipv6h->hop_limit = t->parms.hop_limit;
++	ipv6h->nexthdr = proto;
++	ipv6_addr_copy(&ipv6h->saddr, &fl.fl6_src);
++	ipv6_addr_copy(&ipv6h->daddr, &fl.fl6_dst);
++	nf_reset(skb);
++	pkt_len = skb->len;
++	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, 
++		      skb->dst->dev, dst_output);
++
++	if (err == NET_XMIT_SUCCESS || err == NET_XMIT_CN) {
++		stats->tx_bytes += pkt_len;
++		stats->tx_packets++;
++	} else {
++		stats->tx_errors++;
++		stats->tx_aborted_errors++;
++	}
++	ip6_tnl_dst_store(t, dst);
++
++	if (opt)
++		kfree(opt);
++
++	t->recursion--;
++	return 0;
++tx_err_link_failure:
++	stats->tx_carrier_errors++;
++	dst_link_failure(skb);
++tx_err_dst_release:
++	dst_release(dst);
++	if (opt)
++		kfree(opt);
++tx_err:
++	stats->tx_errors++;
++	stats->tx_dropped++;
++	kfree_skb(skb);
++	t->recursion--;
++	return 0;
++}
++
++static void ip6_tnl_set_cap(struct ip6_tnl *t)
++{
++	struct ip6_tnl_parm *p = &t->parms;
++	struct in6_addr *laddr = &p->laddr;
++	struct in6_addr *raddr = &p->raddr;
++	int ltype = ipv6_addr_type(laddr);
++	int rtype = ipv6_addr_type(raddr);
++
++	p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV);
++
++	if (ltype != IPV6_ADDR_ANY && rtype != IPV6_ADDR_ANY &&
++	    ((ltype|rtype) &
++	     (IPV6_ADDR_UNICAST|
++	      IPV6_ADDR_LOOPBACK|IPV6_ADDR_LINKLOCAL|
++	      IPV6_ADDR_MAPPED|IPV6_ADDR_RESERVED)) == IPV6_ADDR_UNICAST) {
++		struct net_device *ldev = NULL;
++		int l_ok = 1;
++		int r_ok = 1;
++
++		if (p->link)
++			ldev = dev_get_by_index(p->link);
++		
++		if ((ltype&IPV6_ADDR_UNICAST) && !ipv6_chk_addr(laddr, ldev))
++			l_ok = 0;
++		
++		if ((rtype&IPV6_ADDR_UNICAST) && ipv6_chk_addr(raddr, NULL))
++			r_ok = 0;
++		
++		if (l_ok && r_ok) {
++			if (ltype&IPV6_ADDR_UNICAST)
++				p->flags |= IP6_TNL_F_CAP_XMIT;
++			if (rtype&IPV6_ADDR_UNICAST)
++				p->flags |= IP6_TNL_F_CAP_RCV;
++		}
++		if (ldev)
++			dev_put(ldev);
++	}
++}
++
++static void ip6ip6_tnl_link_config(struct ip6_tnl *t)
++{
++	struct net_device *dev = t->dev;
++	struct ip6_tnl_parm *p = &t->parms;
++	struct flowi *fl = &t->fl;
++
++	ipv6_addr_copy(&fl->fl6_src, &p->laddr);
++	ipv6_addr_copy(&fl->fl6_dst, &p->raddr);
++	fl->oif = p->link;
++	fl->fl6_flowlabel = 0;
++
++	if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS))
++		fl->fl6_flowlabel |= IPV6_TCLASS_MASK & p->flowinfo;
++	if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL))
++		fl->fl6_flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo;
++
++	ip6_tnl_set_cap(t);
++
++	if (p->flags&IP6_TNL_F_CAP_XMIT && p->flags&IP6_TNL_F_CAP_RCV)
++		dev->flags |= IFF_POINTOPOINT;
++	else
++		dev->flags &= ~IFF_POINTOPOINT;
++
++	dev->iflink = p->link;
++
++	if (p->flags & IP6_TNL_F_CAP_XMIT) {
++		struct rt6_info *rt = rt6_lookup(&p->raddr, &p->laddr,
++						 p->link, 0);
++
++		if (rt == NULL)
++			return;
++
++		if (rt->rt6i_dev) {
++			dev->hard_header_len = rt->rt6i_dev->hard_header_len +
++				sizeof (struct ipv6hdr);
++
++			dev->mtu = rt->rt6i_dev->mtu - sizeof (struct ipv6hdr);
++
++			if (dev->mtu < IPV6_MIN_MTU)
++				dev->mtu = IPV6_MIN_MTU;
++		}
++		dst_release(&rt->u.dst);
++	}
++}
++
++/**
++ * ip6ip6_tnl_change - update the tunnel parameters
++ *   @t: tunnel to be changed
++ *   @p: tunnel configuration parameters
++ *   @active: != 0 if tunnel is ready for use
++ *
++ * Description:
++ *   ip6ip6_tnl_change() updates the tunnel parameters
++ **/
++
++static int
++ip6ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p)
++{
++	ipv6_addr_copy(&t->parms.laddr, &p->laddr);
++	ipv6_addr_copy(&t->parms.raddr, &p->raddr);
++	t->parms.flags = p->flags;
++	t->parms.hop_limit = p->hop_limit;
++	t->parms.encap_limit = p->encap_limit;
++	t->parms.flowinfo = p->flowinfo;
++	ip6ip6_tnl_link_config(t);
++	return 0;
++}
++
++/**
++ * ip6ip6_tnl_ioctl - configure ipv6 tunnels from userspace 
++ *   @dev: virtual device associated with tunnel
++ *   @ifr: parameters passed from userspace
++ *   @cmd: command to be performed
++ *
++ * Description:
++ *   ip6ip6_tnl_ioctl() is used for managing IPv6 tunnels 
++ *   from userspace. 
++ *
++ *   The possible commands are the following:
++ *     %SIOCGETTUNNEL: get tunnel parameters for device
++ *     %SIOCADDTUNNEL: add tunnel matching given tunnel parameters
++ *     %SIOCCHGTUNNEL: change tunnel parameters to those given
++ *     %SIOCDELTUNNEL: delete tunnel
++ *
++ *   The fallback device "ip6tnl0", created during module 
++ *   initialization, can be used for creating other tunnel devices.
++ *
++ * Return:
++ *   0 on success,
++ *   %-EFAULT if unable to copy data to or from userspace,
++ *   %-EPERM if current process hasn't %CAP_NET_ADMIN set
++ *   %-EINVAL if passed tunnel parameters are invalid,
++ *   %-EEXIST if changing a tunnel's parameters would cause a conflict
++ *   %-ENODEV if attempting to change or delete a nonexisting device
++ **/
++
++static int
++ip6ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
++{
++	int err = 0;
++	int create;
++	struct ip6_tnl_parm p;
++	struct ip6_tnl *t = NULL;
++
++	switch (cmd) {
++	case SIOCGETTUNNEL:
++		if (dev == ip6ip6_fb_tnl_dev) {
++			if (copy_from_user(&p,
++					   ifr->ifr_ifru.ifru_data,
++					   sizeof (p))) {
++				err = -EFAULT;
++				break;
++			}
++			if ((err = ip6ip6_tnl_locate(&p, &t, 0)) == -ENODEV)
++				t = (struct ip6_tnl *) dev->priv;
++			else if (err)
++				break;
++		} else
++			t = (struct ip6_tnl *) dev->priv;
++
++		memcpy(&p, &t->parms, sizeof (p));
++		if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) {
++			err = -EFAULT;
++		}
++		break;
++	case SIOCADDTUNNEL:
++	case SIOCCHGTUNNEL:
++		err = -EPERM;
++		create = (cmd == SIOCADDTUNNEL);
++		if (!capable(CAP_NET_ADMIN))
++			break;
++		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) {
++			err = -EFAULT;
++			break;
++		}
++		if (!create && dev != ip6ip6_fb_tnl_dev) {
++			t = (struct ip6_tnl *) dev->priv;
++		}
++		if (!t && (err = ip6ip6_tnl_locate(&p, &t, create))) {
++			break;
++		}
++		if (cmd == SIOCCHGTUNNEL) {
++			if (t->dev != dev) {
++				err = -EEXIST;
++				break;
++			}
++			ip6ip6_tnl_unlink(t);
++			err = ip6ip6_tnl_change(t, &p);
++			ip6ip6_tnl_link(t);
++			netdev_state_change(dev);
++		}
++		if (copy_to_user(ifr->ifr_ifru.ifru_data,
++				 &t->parms, sizeof (p))) {
++			err = -EFAULT;
++		} else {
++			err = 0;
++		}
++		break;
++	case SIOCDELTUNNEL:
++		err = -EPERM;
++		if (!capable(CAP_NET_ADMIN))
++			break;
++
++		if (dev == ip6ip6_fb_tnl_dev) {
++			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data,
++					   sizeof (p))) {
++				err = -EFAULT;
++				break;
++			}
++			err = ip6ip6_tnl_locate(&p, &t, 0);
++			if (err)
++				break;
++			if (t == ip6ip6_fb_tnl_dev->priv) {
++				err = -EPERM;
++				break;
++			}
++		} else {
++			t = (struct ip6_tnl *) dev->priv;
++		}
++		err = unregister_netdevice(t->dev);
++		break;
++	default:
++		err = -EINVAL;
++	}
++	return err;
++}
++
++/**
++ * ip6ip6_tnl_get_stats - return the stats for tunnel device 
++ *   @dev: virtual device associated with tunnel
++ *
++ * Return: stats for device
++ **/
++
++static struct net_device_stats *
++ip6ip6_tnl_get_stats(struct net_device *dev)
++{
++	return &(((struct ip6_tnl *) dev->priv)->stat);
++}
++
++/**
++ * ip6ip6_tnl_change_mtu - change mtu manually for tunnel device
++ *   @dev: virtual device associated with tunnel
++ *   @new_mtu: the new mtu
++ *
++ * Return:
++ *   0 on success,
++ *   %-EINVAL if mtu too small
++ **/
++
++static int
++ip6ip6_tnl_change_mtu(struct net_device *dev, int new_mtu)
++{
++	if (new_mtu < IPV6_MIN_MTU) {
++		return -EINVAL;
++	}
++	dev->mtu = new_mtu;
++	return 0;
++}
++
++/**
++ * ip6ip6_tnl_dev_setup - setup virtual tunnel device
++ *   @dev: virtual device associated with tunnel
++ *
++ * Description:
++ *   Initialize function pointers and device parameters
++ **/
++
++static void ip6ip6_tnl_dev_setup(struct net_device *dev)
++{
++	SET_MODULE_OWNER(dev);
++	dev->uninit = ip6ip6_tnl_dev_uninit;
++	dev->destructor = (void (*)(struct net_device *))kfree;
++	dev->hard_start_xmit = ip6ip6_tnl_xmit;
++	dev->get_stats = ip6ip6_tnl_get_stats;
++	dev->do_ioctl = ip6ip6_tnl_ioctl;
++	dev->change_mtu = ip6ip6_tnl_change_mtu;
++
++	dev->type = ARPHRD_TUNNEL6;
++	dev->hard_header_len = LL_MAX_HEADER + sizeof (struct ipv6hdr);
++	dev->mtu = ETH_DATA_LEN - sizeof (struct ipv6hdr);
++	dev->flags |= IFF_NOARP;
++	/* Hmm... MAX_ADDR_LEN is 8, so the ipv6 addresses can't be
++	   copied to dev->dev_addr and dev->broadcast, like the ipv4
++	   addresses were in ipip.c, ip_gre.c and sit.c. */
++	dev->addr_len = 0;
++}
++
++
++/**
++ * ip6ip6_tnl_dev_init_gen - general initializer for all tunnel devices
++ *   @dev: virtual device associated with tunnel
++ **/
++
++static inline void
++ip6ip6_tnl_dev_init_gen(struct net_device *dev)
++{
++	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
++	t->fl.proto = IPPROTO_IPV6;
++	t->dev = dev;
++	strcpy(t->parms.name, dev->name);
++}
++
++/**
++ * ip6ip6_tnl_dev_init - initializer for all non fallback tunnel devices
++ *   @dev: virtual device associated with tunnel
++ **/
++
++static int
++ip6ip6_tnl_dev_init(struct net_device *dev)
++{
++	struct ip6_tnl *t = (struct ip6_tnl *) dev->priv;
++	ip6ip6_tnl_dev_init_gen(dev);
++	ip6ip6_tnl_link_config(t);
++	return 0;
++}
++
++/**
++ * ip6ip6_fb_tnl_dev_init - initializer for fallback tunnel device
++ *   @dev: fallback device
++ *
++ * Return: 0
++ **/
++
++static int 
++ip6ip6_fb_tnl_dev_init(struct net_device *dev)
++{
++	struct ip6_tnl *t = dev->priv;
++	ip6ip6_tnl_dev_init_gen(dev);
++	dev_hold(dev);
++	tnls_wc[0] = t;
++	return 0;
++}
++
++static struct xfrm6_tunnel ip6ip6_handler = {
++	.handler = ip6ip6_rcv,
++	.err_handler = ip6ip6_err,
++};
++
++/**
++ * ip6_tunnel_init - register protocol and reserve needed resources
++ *
++ * Return: 0 on success
++ **/
++
++int __init ip6_tunnel_init(void)
++{
++	int  err;
++
++	if (xfrm6_tunnel_register(&ip6ip6_handler) < 0) {
++		printk(KERN_ERR "ip6ip6 init: can't register tunnel\n");
++		return -EAGAIN;
++	}
++	ip6ip6_fb_tnl_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6tnl0",
++					 ip6ip6_tnl_dev_setup);
++
++	if (!ip6ip6_fb_tnl_dev) {
++		err = -ENOMEM;
++		goto fail;
++	}
++	ip6ip6_fb_tnl_dev->init = ip6ip6_fb_tnl_dev_init;
++
++	if ((err = register_netdev(ip6ip6_fb_tnl_dev))) {
++		kfree(ip6ip6_fb_tnl_dev);
++		goto fail;
++	}
++	return 0;
++fail:
++	xfrm6_tunnel_deregister(&ip6ip6_handler);
++	return err;
++}
++
++/**
++ * ip6_tunnel_cleanup - free resources and unregister protocol
++ **/
++
++void ip6_tunnel_cleanup(void)
++{
++	if (xfrm6_tunnel_deregister(&ip6ip6_handler) < 0)
++		printk(KERN_INFO "ip6ip6 close: can't deregister tunnel\n");
++
++	unregister_netdev(ip6ip6_fb_tnl_dev);
++}
++
++#ifdef MODULE
++module_init(ip6_tunnel_init);
++module_exit(ip6_tunnel_cleanup);
++#endif
+diff -Nru a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/ipcomp6.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,376 @@
++/*
++ * IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173
++ *
++ * Copyright (C)2003 USAGI/WIDE Project
++ *
++ * Author	Mitsuru KANDA  <mk at linux-ipv6.org>
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * 
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ * 
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ */
++/* 
++ * [Memo]
++ *
++ * Outbound:
++ *  The compression of IP datagram MUST be done before AH/ESP processing, 
++ *  fragmentation, and the addition of Hop-by-Hop/Routing header. 
++ *
++ * Inbound:
++ *  The decompression of IP datagram MUST be done after the reassembly, 
++ *  AH/ESP processing.
++ */
++#include <linux/config.h>
++#include <linux/module.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/ipcomp.h>
++#include <asm/scatterlist.h>
++#include <linux/crypto.h>
++#include <linux/pfkeyv2.h>
++#include <linux/random.h>
++#include <net/icmp.h>
++#include <net/ipv6.h>
++#include <linux/ipv6.h>
++#include <linux/icmpv6.h>
++
++static int ipcomp6_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	int err = 0;
++	u8 nexthdr = 0;
++	int hdr_len = skb->h.raw - skb->nh.raw;
++	unsigned char *tmp_hdr = NULL;
++	struct ipv6hdr *iph;
++	int plen, dlen;
++	struct ipcomp_data *ipcd = x->data;
++	u8 *start, *scratch = ipcd->scratch;
++
++	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
++		skb_linearize(skb, GFP_ATOMIC) != 0) {
++		err = -ENOMEM;
++		goto out;
++	}
++
++	skb->ip_summed = CHECKSUM_NONE;
++
++	/* Remove ipcomp header and decompress original payload */
++	iph = skb->nh.ipv6h;
++	tmp_hdr = kmalloc(hdr_len, GFP_ATOMIC);
++	if (!tmp_hdr)
++		goto out;
++	memcpy(tmp_hdr, iph, hdr_len);
++	nexthdr = *(u8 *)skb->data;
++	skb_pull(skb, sizeof(struct ipv6_comp_hdr)); 
++	skb->nh.raw += sizeof(struct ipv6_comp_hdr);
++	memcpy(skb->nh.raw, tmp_hdr, hdr_len);
++	iph = skb->nh.ipv6h;
++	iph->payload_len = htons(ntohs(iph->payload_len) - sizeof(struct ipv6_comp_hdr));
++	skb->h.raw = skb->data;
++
++	/* decompression */
++	plen = skb->len;
++	dlen = IPCOMP_SCRATCH_SIZE;
++	start = skb->data;
++
++	err = crypto_comp_decompress(ipcd->tfm, start, plen, scratch, &dlen);
++	if (err) {
++		err = -EINVAL;
++		goto out;
++	}
++
++	if (dlen < (plen + sizeof(struct ipv6_comp_hdr))) {
++		err = -EINVAL;
++		goto out;
++	}
++
++	err = pskb_expand_head(skb, 0, dlen - plen, GFP_ATOMIC);
++	if (err) {
++		goto out;
++	}
++
++	skb_put(skb, dlen - plen);
++	memcpy(skb->data, scratch, dlen);
++
++	iph = skb->nh.ipv6h;
++	iph->payload_len = htons(skb->len);
++	
++out:
++	if (tmp_hdr)
++		kfree(tmp_hdr);
++	if (err)
++		goto error_out;
++	return nexthdr;
++error_out:
++	return err;
++}
++
++static int ipcomp6_output(struct sk_buff *skb)
++{
++	int err;
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x = dst->xfrm;
++	struct ipv6hdr *top_iph;
++	int hdr_len;
++	struct ipv6_comp_hdr *ipch;
++	struct ipcomp_data *ipcd = x->data;
++	int plen, dlen;
++	u8 *start, *scratch = ipcd->scratch;
++
++	hdr_len = skb->h.raw - skb->data;
++
++	/* check whether datagram len is larger than threshold */
++	if ((skb->len - hdr_len) < ipcd->threshold) {
++		goto out_ok;
++	}
++
++	if ((skb_is_nonlinear(skb) || skb_cloned(skb)) &&
++		skb_linearize(skb, GFP_ATOMIC) != 0) {
++		err = -ENOMEM;
++		goto error;
++	}
++
++	/* compression */
++	plen = skb->len - hdr_len;
++	dlen = IPCOMP_SCRATCH_SIZE;
++	start = skb->h.raw;
++
++	err = crypto_comp_compress(ipcd->tfm, start, plen, scratch, &dlen);
++	if (err) {
++		goto error;
++	}
++	if ((dlen + sizeof(struct ipv6_comp_hdr)) >= plen) {
++		goto out_ok;
++	}
++	memcpy(start + sizeof(struct ip_comp_hdr), scratch, dlen);
++	pskb_trim(skb, hdr_len + dlen + sizeof(struct ip_comp_hdr));
++
++	/* insert ipcomp header and replace datagram */
++	top_iph = (struct ipv6hdr *)skb->data;
++
++	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++
++	ipch = (struct ipv6_comp_hdr *)start;
++	ipch->nexthdr = *skb->nh.raw;
++	ipch->flags = 0;
++	ipch->cpi = htons((u16 )ntohl(x->id.spi));
++	*skb->nh.raw = IPPROTO_COMP;
++
++out_ok:
++	err = 0;
++
++error:
++	return err;
++}
++
++static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
++		                int type, int code, int offset, __u32 info)
++{
++	u32 spi;
++	struct ipv6hdr *iph = (struct ipv6hdr*)skb->data;
++	struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset);
++	struct xfrm_state *x;
++
++	if (type != ICMPV6_DEST_UNREACH && type != ICMPV6_PKT_TOOBIG)
++		return;
++
++	spi = ntohl(ntohs(ipcomph->cpi));
++	x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6);
++	if (!x)
++		return;
++
++	printk(KERN_DEBUG "pmtu discovery on SA IPCOMP/%08x/"
++			"%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
++			spi, NIP6(iph->daddr));
++	xfrm_state_put(x);
++}
++
++static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
++{
++	struct xfrm_state *t = NULL;
++
++	t = xfrm_state_alloc();
++	if (!t)
++		goto out;
++
++	t->id.proto = IPPROTO_IPV6;
++	t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
++	memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
++	memcpy(&t->sel, &x->sel, sizeof(t->sel));
++	t->props.family = AF_INET6;
++	t->props.mode = 1;
++	memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
++
++	t->type = xfrm_get_type(IPPROTO_IPV6, t->props.family);
++	if (t->type == NULL)
++		goto error;
++
++	if (t->type->init_state(t, NULL))
++		goto error;
++
++	t->km.state = XFRM_STATE_VALID;
++	atomic_set(&t->tunnel_users, 1);
++
++out:
++	return t;
++
++error:
++	xfrm_state_put(t);
++	goto out;
++}
++
++static int ipcomp6_tunnel_attach(struct xfrm_state *x)
++{
++	int err = 0;
++	struct xfrm_state *t = NULL;
++	u32 spi;
++
++	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
++	if (spi)
++		t = xfrm_state_lookup((xfrm_address_t *)&x->id.daddr,
++					      spi, IPPROTO_IPV6, AF_INET6);
++	if (!t) {
++		t = ipcomp6_tunnel_create(x);
++		if (!t) {
++			err = -EINVAL;
++			goto out;
++		}
++		xfrm_state_insert(t);
++		xfrm_state_hold(t);
++	}
++	x->tunnel = t;
++	atomic_inc(&t->tunnel_users);
++
++out:
++	return err;
++}
++
++static void ipcomp6_free_data(struct ipcomp_data *ipcd)
++{
++	if (ipcd->tfm)
++		crypto_free_tfm(ipcd->tfm);
++	if (ipcd->scratch)
++		kfree(ipcd->scratch);
++}
++
++static void ipcomp6_destroy(struct xfrm_state *x)
++{
++	struct ipcomp_data *ipcd = x->data;
++	if (!ipcd)
++		return;
++	xfrm_state_delete_tunnel(x);
++	ipcomp6_free_data(ipcd);
++	kfree(ipcd);
++
++	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
++}
++
++static int ipcomp6_init_state(struct xfrm_state *x, void *args)
++{
++	int err;
++	struct ipcomp_data *ipcd;
++	struct xfrm_algo_desc *calg_desc;
++
++	err = -EINVAL;
++	if (!x->calg)
++		goto out;
++
++	if (x->encap)
++		goto out;
++
++	err = -ENOMEM;
++	ipcd = kmalloc(sizeof(*ipcd), GFP_KERNEL);
++	if (!ipcd)
++		goto error;
++
++	memset(ipcd, 0, sizeof(*ipcd));
++	x->props.header_len = 0;
++	if (x->props.mode)
++		x->props.header_len += sizeof(struct ipv6hdr);
++	
++	ipcd->scratch = kmalloc(IPCOMP_SCRATCH_SIZE, GFP_KERNEL);
++	if (!ipcd->scratch)
++		goto error;
++
++	ipcd->tfm = crypto_alloc_tfm(x->calg->alg_name, 0);
++	if (!ipcd->tfm)
++		goto error;
++
++	if (x->props.mode) {
++		err = ipcomp6_tunnel_attach(x);
++		if (err)
++			goto error;
++	}
++
++	calg_desc = xfrm_calg_get_byname(x->calg->alg_name);
++	BUG_ON(!calg_desc);
++	ipcd->threshold = calg_desc->uinfo.comp.threshold;
++	x->data = ipcd;
++	err = 0;
++out:
++	return err;
++error:
++	if (ipcd) {
++		ipcomp6_free_data(ipcd);
++		kfree(ipcd);
++	}
++
++	goto out;
++}
++
++static struct xfrm_type ipcomp6_type = 
++{
++	.description	= "IPCOMP6",
++	.owner		= THIS_MODULE,
++	.proto		= IPPROTO_COMP,
++	.init_state	= ipcomp6_init_state,
++	.destructor	= ipcomp6_destroy,
++	.input		= ipcomp6_input,
++	.output		= ipcomp6_output,
++};
++
++static struct inet6_protocol ipcomp6_protocol = 
++{
++	.handler	= xfrm6_rcv,
++	.err_handler	= ipcomp6_err,
++	.flags		= INET6_PROTO_NOPOLICY,
++};
++
++static int __init ipcomp6_init(void)
++{
++	if (xfrm_register_type(&ipcomp6_type, AF_INET6) < 0) {
++		printk(KERN_INFO "ipcomp6 init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++	if (inet6_add_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) {
++		printk(KERN_INFO "ipcomp6 init: can't add protocol\n");
++		xfrm_unregister_type(&ipcomp6_type, AF_INET6);
++		return -EAGAIN;
++	}
++	return 0;
++}
++
++static void __exit ipcomp6_fini(void)
++{
++	if (inet6_del_protocol(&ipcomp6_protocol, IPPROTO_COMP) < 0) 
++		printk(KERN_INFO "ipv6 ipcomp close: can't remove protocol\n");
++	if (xfrm_unregister_type(&ipcomp6_type, AF_INET6) < 0)
++		printk(KERN_INFO "ipv6 ipcomp close: can't remove xfrm type\n");
++}
++
++module_init(ipcomp6_init);
++module_exit(ipcomp6_fini);
++MODULE_LICENSE("GPL");
++MODULE_DESCRIPTION("IP Payload Compression Protocol (IPComp) for IPv6 - RFC3173");
++MODULE_AUTHOR("Mitsuru KANDA <mk at linux-ipv6.org>");
++
++
+diff -Nru a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
+--- a/net/ipv6/ipv6_sockglue.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/ipv6_sockglue.c	2005-02-13 21:25:09 +11:00
+@@ -51,6 +51,7 @@
+ #include <net/inet_common.h>
+ #include <net/tcp.h>
+ #include <net/udp.h>
++#include <net/xfrm.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -517,6 +518,10 @@
+ 	case IPV6_FLOWLABEL_MGR:
+ 		retv = ipv6_flowlabel_opt(sk, optval, optlen);
+ 		break;
++	case IPV6_IPSEC_POLICY:
++	case IPV6_XFRM_POLICY:
++		retv = xfrm_user_policy(sk, optname, optval, optlen);
++		break;
+ 
+ #ifdef CONFIG_NETFILTER
+ 	default:
+@@ -550,6 +555,15 @@
+ 	if (get_user(len, optlen))
+ 		return -EFAULT;
+ 	switch (optname) {
++	case IPV6_ADDRFORM:
++		if (sk->protocol != IPPROTO_UDP &&
++		    sk->protocol != IPPROTO_TCP)
++			return -EINVAL;
++		if (sk->state != TCP_ESTABLISHED)
++			return -ENOTCONN;
++		val = sk->family;
++		break;
++
+ 	case IPV6_PKTOPTIONS:
+ 	{
+ 		struct msghdr msg;
+@@ -595,7 +609,7 @@
+ 		lock_sock(sk);
+ 		dst = sk_dst_get(sk);
+ 		if (dst) {
+-			val = dst->pmtu;
++			val = dst_pmtu(dst) - dst->header_len;
+ 			dst_release(dst);
+ 		}
+ 		release_sock(sk);
+diff -Nru a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c
+--- a/net/ipv6/ipv6_syms.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/ipv6_syms.c	2005-02-13 21:25:09 +11:00
+@@ -6,6 +6,7 @@
+ #include <net/ipv6.h>
+ #include <net/addrconf.h>
+ #include <net/ip6_route.h>
++#include <net/xfrm.h>
+ 
+ EXPORT_SYMBOL(ipv6_addr_type);
+ EXPORT_SYMBOL(icmpv6_send);
+@@ -33,5 +34,16 @@
+ EXPORT_SYMBOL(ipv6_get_saddr);
+ EXPORT_SYMBOL(ipv6_chk_addr);
+ EXPORT_SYMBOL(in6_dev_finish_destroy);
++EXPORT_SYMBOL(ip6_find_1stfragopt);
++#ifdef CONFIG_XFRM
++EXPORT_SYMBOL(xfrm6_rcv);
++#endif
++EXPORT_SYMBOL(rt6_lookup);
++EXPORT_SYMBOL(fl6_sock_lookup);
++EXPORT_SYMBOL(ipv6_ext_hdr);
++EXPORT_SYMBOL(ip6_append_data);
++EXPORT_SYMBOL(ip6_flush_pending_frames);
++EXPORT_SYMBOL(ip6_push_pending_frames);
++EXPORT_SYMBOL(ipv6_push_nfrag_opts);
+ EXPORT_SYMBOL(ipv6_skip_exthdr);
+ 
+diff -Nru a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
+--- a/net/ipv6/ndisc.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/ndisc.c	2005-02-13 21:25:09 +11:00
+@@ -72,6 +72,7 @@
+ #include <net/addrconf.h>
+ #include <net/icmp.h>
+ 
++#include <net/flow.h>
+ #include <net/checksum.h>
+ #include <linux/proc_fs.h>
+ 
+@@ -139,6 +140,19 @@
+ 	30*HZ, 128, 512, 1024,
+ };
+ 
++/* ND options */
++struct ndisc_options {
++	struct nd_opt_hdr *nd_opt_array[7];
++	struct nd_opt_hdr *nd_opt_piend;
++};
++
++#define nd_opts_src_lladdr	nd_opt_array[ND_OPT_SOURCE_LL_ADDR]
++#define nd_opts_tgt_lladdr	nd_opt_array[ND_OPT_TARGET_LL_ADDR]
++#define nd_opts_pi		nd_opt_array[ND_OPT_PREFIX_INFO]
++#define nd_opts_pi_end		nd_opt_piend
++#define nd_opts_rh		nd_opt_array[ND_OPT_REDIRECT_HDR]
++#define nd_opts_mtu		nd_opt_array[ND_OPT_MTU]
++
+ #define NDISC_OPT_SPACE(len) (((len)+2+7)&~7)
+ 
+ static u8 *ndisc_fill_option(u8 *opt, int type, void *data, int data_len)
+@@ -155,8 +169,8 @@
+ 	return opt + space;
+ }
+ 
+-struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
+-				     struct nd_opt_hdr *end)
++static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur,
++					    struct nd_opt_hdr *end)
+ {
+ 	int type;
+ 	if (!cur || !end || cur >= end)
+@@ -168,8 +182,8 @@
+ 	return (cur <= end && cur->nd_opt_type == type ? cur : NULL);
+ }
+ 
+-struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
+-					  struct ndisc_options *ndopts)
++static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len,
++						 struct ndisc_options *ndopts)
+ {
+ 	struct nd_opt_hdr *nd_opt = (struct nd_opt_hdr *)opt;
+ 
+@@ -333,8 +347,6 @@
+ 	unsigned char ha[MAX_ADDR_LEN];
+ 	unsigned char *h_dest = NULL;
+ 
+-	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
+-
+ 	if (dev->hard_header) {
+ 		if (ipv6_addr_type(daddr) & IPV6_ADDR_MULTICAST) {
+ 			ndisc_mc_map(daddr, ha, dev, 1);
+@@ -371,11 +383,38 @@
+  *	Send a Neighbour Advertisement
+  */
+ 
++static int ndisc_output(struct sk_buff *skb)
++{
++	if (skb) {
++		struct neighbour *neigh = (skb->dst ? skb->dst->neighbour : NULL);
++		if (ndisc_build_ll_hdr(skb, skb->dev, &skb->nh.ipv6h->daddr, neigh, skb->len) == 0) {
++			kfree_skb(skb);
++			return -EINVAL;
++		}
++		dev_queue_xmit(skb);
++		return 0;
++	}
++	return -EINVAL;
++}
++
++static inline void ndisc_flow_init(struct flowi *fl, u8 type,
++			    struct in6_addr *saddr, struct in6_addr *daddr)
++{
++	memset(fl, 0, sizeof(*fl));
++	ipv6_addr_copy(&fl->fl6_src, saddr);
++	ipv6_addr_copy(&fl->fl6_dst, daddr);
++	fl->proto	 	= IPPROTO_ICMPV6;
++	fl->fl_icmp_type	= type;
++	fl->fl_icmp_code	= 0;
++}
++
+ void ndisc_send_na(struct net_device *dev, struct neighbour *neigh,
+ 		   struct in6_addr *daddr, struct in6_addr *solicited_addr,
+-		   int router, int solicited, int override, int inc_opt) 
++	 	   int router, int solicited, int override, int inc_opt) 
+ {
+-	static struct in6_addr tmpaddr;
++	struct flowi fl;
++	struct dst_entry* dst;
++	struct in6_addr tmpaddr;
+ 	struct inet6_ifaddr *ifp;
+         struct sock *sk = ndisc_socket->sk;
+ 	struct in6_addr *src_addr;
+@@ -386,6 +425,29 @@
+ 
+ 	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ 
++	/* for anycast or proxy, solicited_addr != src_addr */
++	ifp = ipv6_get_ifaddr(solicited_addr, dev);
++ 	if (ifp) {
++		src_addr = solicited_addr;
++		in6_ifa_put(ifp);
++	} else {
++		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr, 0))
++			return;
++		src_addr = &tmpaddr;
++	}
++
++	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_ADVERTISEMENT, src_addr, daddr);
++
++	dst = ndisc_dst_alloc(dev, neigh, ndisc_output);
++	if (!dst)
++		return;
++
++	err = xfrm_lookup(&dst, &fl, NULL, 0);
++	if (err < 0) {
++		dst_release(dst);
++		return;
++	}
++
+ 	if (inc_opt) {
+ 		if (dev->addr_len)
+ 			len += NDISC_OPT_SPACE(dev->addr_len);
+@@ -398,27 +460,14 @@
+ 
+ 	if (skb == NULL) {
+ 		ND_PRINTK1("send_na: alloc skb failed\n");
+-		return;
+-	}
+-	/* for anycast or proxy, solicited_addr != src_addr */
+-	ifp = ipv6_get_ifaddr(solicited_addr, dev);
+-	if (ifp) {
+-		src_addr = solicited_addr;
+-		in6_ifa_put(ifp);
+-	} else {
+-		if (ipv6_dev_get_saddr(dev, daddr, &tmpaddr, 0))
+-			return;
+-		src_addr = &tmpaddr;
+-	}
+-
+-	if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
+-		kfree_skb(skb);
++		dst_release(dst);
+ 		return;
+ 	}
+ 
++	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
+ 	ip6_nd_hdr(sk, skb, dev, src_addr, daddr, IPPROTO_ICMPV6, len);
+ 
+-	msg = (struct nd_msg *) skb_put(skb, len);
++	skb->h.raw = (unsigned char*) msg = (struct nd_msg *) skb_put(skb, len);
+ 
+         msg->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
+         msg->icmph.icmp6_code = 0;
+@@ -441,7 +490,8 @@
+ 						 csum_partial((__u8 *) msg, 
+ 							      len, 0));
+ 
+-	dev_queue_xmit(skb);
++	skb->dst = dst;
++	dst_output(skb);
+ 
+ 	ICMP6_INC_STATS(Icmp6OutNeighborAdvertisements);
+ 	ICMP6_INC_STATS(Icmp6OutMsgs);
+@@ -451,6 +501,8 @@
+ 		   struct in6_addr *solicit,
+ 		   struct in6_addr *daddr, struct in6_addr *saddr) 
+ {
++	struct flowi fl;
++	struct dst_entry* dst;
+         struct sock *sk = ndisc_socket->sk;
+         struct sk_buff *skb;
+         struct nd_msg *msg;
+@@ -465,6 +517,18 @@
+ 		saddr = &addr_buf;
+ 	}
+ 
++	ndisc_flow_init(&fl, NDISC_NEIGHBOUR_SOLICITATION, saddr, daddr);
++
++	dst = ndisc_dst_alloc(dev, neigh, ndisc_output);
++	if (!dst)
++		return;
++
++	err = xfrm_lookup(&dst, &fl, NULL, 0);
++	if (err < 0) {
++		dst_release(dst);
++		return;
++	}
++
+ 	len = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);
+ 	send_llinfo = dev->addr_len && ipv6_addr_type(saddr) != IPV6_ADDR_ANY;
+ 	if (send_llinfo)
+@@ -474,17 +538,14 @@
+ 				  1, &err);
+ 	if (skb == NULL) {
+ 		ND_PRINTK1("send_ns: alloc skb failed\n");
++		dst_release(dst);
+ 		return;
+ 	}
+ 
+-	if (ndisc_build_ll_hdr(skb, dev, daddr, neigh, len) == 0) {
+-		kfree_skb(skb);
+-		return;
+-	}
+-
++	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
+ 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+ 
+-	msg = (struct nd_msg *)skb_put(skb, len);
++	skb->h.raw = (unsigned char*) msg = (struct nd_msg *)skb_put(skb, len);
+ 	msg->icmph.icmp6_type = NDISC_NEIGHBOUR_SOLICITATION;
+ 	msg->icmph.icmp6_code = 0;
+ 	msg->icmph.icmp6_cksum = 0;
+@@ -503,7 +564,8 @@
+ 						 csum_partial((__u8 *) msg, 
+ 							      len, 0));
+ 	/* send it! */
+-	dev_queue_xmit(skb);
++	skb->dst = dst;
++	dst_output(skb);
+ 
+ 	ICMP6_INC_STATS(Icmp6OutNeighborSolicits);
+ 	ICMP6_INC_STATS(Icmp6OutMsgs);
+@@ -512,6 +574,8 @@
+ void ndisc_send_rs(struct net_device *dev, struct in6_addr *saddr,
+ 		   struct in6_addr *daddr)
+ {
++	struct flowi fl;
++	struct dst_entry* dst;
+ 	struct sock *sk = ndisc_socket->sk;
+         struct sk_buff *skb;
+         struct icmp6hdr *hdr;
+@@ -519,6 +583,18 @@
+         int len;
+ 	int err;
+ 
++	ndisc_flow_init(&fl, NDISC_ROUTER_SOLICITATION, saddr, daddr);
++
++	dst = ndisc_dst_alloc(dev, NULL, ndisc_output);
++	if (!dst)
++		return;
++
++	err = xfrm_lookup(&dst, &fl, NULL, 0);
++	if (err < 0) {
++		dst_release(dst);
++		return;
++	}
++
+ 	len = sizeof(struct icmp6hdr);
+ 	if (dev->addr_len)
+ 		len += NDISC_OPT_SPACE(dev->addr_len);
+@@ -530,14 +606,10 @@
+ 		return;
+ 	}
+ 
+-	if (ndisc_build_ll_hdr(skb, dev, daddr, NULL, len) == 0) {
+-		kfree_skb(skb);
+-		return;
+-	}
+-
++	skb_reserve(skb, (dev->hard_header_len + 15) & ~15);
+ 	ip6_nd_hdr(sk, skb, dev, saddr, daddr, IPPROTO_ICMPV6, len);
+ 
+-        hdr = (struct icmp6hdr *) skb_put(skb, len);
++        skb->h.raw = (unsigned char*) hdr = (struct icmp6hdr *) skb_put(skb, len);
+         hdr->icmp6_type = NDISC_ROUTER_SOLICITATION;
+         hdr->icmp6_code = 0;
+         hdr->icmp6_cksum = 0;
+@@ -554,7 +626,8 @@
+ 					   csum_partial((__u8 *) hdr, len, 0));
+ 
+ 	/* send it! */
+-	dev_queue_xmit(skb);
++	skb->dst = dst;
++	dst_output(skb);
+ 
+ 	ICMP6_INC_STATS(Icmp6OutRouterSolicits);
+ 	ICMP6_INC_STATS(Icmp6OutMsgs);
+@@ -598,7 +671,7 @@
+ 	}
+ }
+ 
+-void ndisc_recv_ns(struct sk_buff *skb)
++static void ndisc_recv_ns(struct sk_buff *skb)
+ {
+ 	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+ 	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+@@ -610,6 +683,7 @@
+ 	struct net_device *dev = skb->dev;
+ 	struct inet6_ifaddr *ifp;
+ 	struct neighbour *neigh;
++	int addr_type = ipv6_addr_type(saddr);
+ 
+ 	if (skb->len < sizeof(struct nd_msg)) {
+ 		if (net_ratelimit())
+@@ -623,6 +697,20 @@
+ 		return;
+ 	}
+ 
++	/*
++	 * RFC2461 7.1.1:
++	 * DAD has to be destined for solicited node multicast address.
++	 */
++	if (addr_type == IPV6_ADDR_ANY &&
++	    !(daddr->s6_addr32[0] == htonl(0xff020000) &&
++	      daddr->s6_addr32[1] == htonl(0x00000000) &&
++	      daddr->s6_addr32[2] == htonl(0x00000001) &&
++	      daddr->s6_addr [12] == 0xff )) {
++		if (net_ratelimit())
++			printk(KERN_DEBUG "ICMP6 NS: bad DAD packet (wrong destination\n");
++		return;
++	}
++
+ 	if (!ndisc_parse_options(msg->opt, ndoptlen, &ndopts)) {
+ 		if (net_ratelimit())
+ 			printk(KERN_WARNING "ICMP NS: invalid ND option, ignored.\n");
+@@ -637,23 +725,20 @@
+ 				printk(KERN_WARNING "ICMP NS: bad lladdr length.\n");
+ 			return;
+ 		}
+-	}
+ 
+-	/* XXX: RFC2461 7.1.1:
+-	 * 	If the IP source address is the unspecified address, there
+-	 *	MUST NOT be source link-layer address option in the message.
+-	 *
+-	 *	NOTE! Linux kernel < 2.4.4 broke this rule.
+-	 */
+-		 	
+-	/* XXX: RFC2461 7.1.1:
+-	 *	If the IP source address is the unspecified address, the IP
+-      	 *	destination address MUST be a solicited-node multicast address.
+-	 */
++		/* XXX: RFC2461 7.1.1:
++	 	 *	If the IP source address is the unspecified address, 
++		 *	there MUST NOT be source link-layer address option 
++		 *	in the message.
++		 */
++		if (addr_type == IPV6_ADDR_ANY) {
++			if (net_ratelimit())
++				printk(KERN_WARNING "ICMP6 NS: bad DAD packet (link-layer address option)\n");
++			return;
++		}
++	}
+ 
+ 	if ((ifp = ipv6_get_ifaddr(&msg->target, dev)) != NULL) {
+-		int addr_type = ipv6_addr_type(saddr);
+-
+ 		if (ifp->flags & IFA_F_TENTATIVE) {
+ 			/* Address is tentative. If the source
+ 			   is unspecified address, it is someone
+@@ -686,8 +771,7 @@
+ 			ipv6_addr_all_nodes(&maddr);
+ 			ndisc_send_na(dev, NULL, &maddr, &ifp->addr, 
+ 				      ifp->idev->cnf.forwarding, 0, 
+-				      ipv6_addr_type(&ifp->addr)&IPV6_ADDR_ANYCAST ? 0 : 1, 
+-				      1);
++				      1, 1);
+ 			in6_ifa_put(ifp);
+ 			return;
+ 		}
+@@ -710,8 +794,7 @@
+ 			if (neigh || !dev->hard_header) {
+ 				ndisc_send_na(dev, neigh, saddr, &ifp->addr, 
+ 					      ifp->idev->cnf.forwarding, 1, 
+-					      ipv6_addr_type(&ifp->addr)&IPV6_ADDR_ANYCAST ? 0 : 1, 
+-					      1);
++					      1, 1);
+ 				if (neigh)
+ 					neigh_release(neigh);
+ 			}
+@@ -719,7 +802,6 @@
+ 		in6_ifa_put(ifp);
+ 	} else if (ipv6_chk_acast_addr(dev, &msg->target)) {
+ 		struct inet6_dev *idev = in6_dev_get(dev);
+-		int addr_type = ipv6_addr_type(saddr);
+ 
+ 		/* anycast */
+ 
+@@ -763,10 +845,10 @@
+ 		in6_dev_put(idev);
+ 	} else {
+ 		struct inet6_dev *in6_dev = in6_dev_get(dev);
+-		int addr_type = ipv6_addr_type(saddr);
+ 
+ 		if (in6_dev && in6_dev->cnf.forwarding &&
+-		    (addr_type & IPV6_ADDR_UNICAST) &&
++		    (addr_type & IPV6_ADDR_UNICAST ||
++		     addr_type == IPV6_ADDR_ANY) &&
+ 		    pneigh_lookup(&nd_tbl, &msg->target, dev, 0)) {
+ 			int inc = ipv6_addr_type(daddr)&IPV6_ADDR_MULTICAST;
+ 
+@@ -781,12 +863,20 @@
+ 					NEIGH_CACHE_STAT_INC(&nd_tbl, 
+ 							rcv_probes_ucast);
+ 					
+-				neigh = neigh_event_ns(&nd_tbl, lladdr, saddr, dev);
++				if (addr_type & IPV6_ADDR_UNICAST) {
++					neigh = neigh_event_ns(&nd_tbl, lladdr, saddr, dev);
+ 
+-				if (neigh) {
+-					ndisc_send_na(dev, neigh, saddr, &msg->target,
+-						      0, 1, 0, 1);
+-					neigh_release(neigh);
++					if (neigh) {
++						ndisc_send_na(dev, neigh, saddr, &msg->target,
++							      0, 1, 0, 1);
++						neigh_release(neigh);
++					}
++				} else {
++					/* proxy should also protect against DAD */
++					struct in6_addr maddr;
++					ipv6_addr_all_nodes(&maddr);
++					ndisc_send_na(dev, NULL, &maddr, &msg->target, 
++						      0, 0, 0, 1);
+ 				}
+ 			} else {
+ 				struct sk_buff *n = skb_clone(skb, GFP_ATOMIC);
+@@ -802,7 +892,7 @@
+ 	return;
+ }
+ 
+-void ndisc_recv_na(struct sk_buff *skb)
++static void ndisc_recv_na(struct sk_buff *skb)
+ {
+ 	struct nd_msg *msg = (struct nd_msg *)skb->h.raw;
+ 	struct in6_addr *saddr = &skb->nh.ipv6h->saddr;
+@@ -872,12 +962,8 @@
+ 				 */
+ 				struct rt6_info *rt;
+ 				rt = rt6_get_dflt_router(saddr, dev);
+-				if (rt) {
+-					/* It is safe only because
+-					   we aer in BH */
+-					dst_release(&rt->u.dst);
+-					ip6_del_rt(rt, NULL);
+-				}
++				if (rt)
++					ip6_del_rt(rt, NULL, NULL);
+ 			}
+ 		} else {
+ 			if (msg->icmph.icmp6_router)
+@@ -962,7 +1048,7 @@
+ 	rt = rt6_get_dflt_router(&skb->nh.ipv6h->saddr, skb->dev);
+ 
+ 	if (rt && lifetime == 0) {
+-		ip6_del_rt(rt, NULL);
++		ip6_del_rt(rt, NULL, NULL);
+ 		rt = NULL;
+ 	}
+ 
+@@ -1074,7 +1160,7 @@
+ 			in6_dev->cnf.mtu6 = mtu;
+ 
+ 			if (rt)
+-				rt->u.dst.pmtu = mtu;
++				rt->u.dst.metrics[RTAX_MTU-1] = mtu;
+ 
+ 			rt6_mtu_change(skb->dev, mtu);
+ 		}
+@@ -1197,27 +1283,44 @@
+ 	struct in6_addr *addrp;
+ 	struct net_device *dev;
+ 	struct rt6_info *rt;
++	struct dst_entry *dst;
++	struct flowi fl;
+ 	u8 *opt;
+ 	int rd_len;
+ 	int err;
+ 	int hlen;
+ 
+ 	dev = skb->dev;
+-	rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
+ 
++	if (ipv6_get_lladdr(dev, &saddr_buf)) {
++ 		ND_PRINTK1("redirect: no link_local addr for dev\n");
++ 		return;
++ 	}
++
++	ndisc_flow_init(&fl, NDISC_REDIRECT, &saddr_buf, &skb->nh.ipv6h->saddr);
++
++	rt = rt6_lookup(&skb->nh.ipv6h->saddr, NULL, dev->ifindex, 1);
+ 	if (rt == NULL)
+ 		return;
++	dst = &rt->u.dst;
++
++	err = xfrm_lookup(&dst, &fl, NULL, 0);
++	if (err) {
++		dst_release(dst);
++		return;
++	}
++
++	rt = (struct rt6_info *) dst;
+ 
+ 	if (rt->rt6i_flags & RTF_GATEWAY) {
+ 		ND_PRINTK1("ndisc_send_redirect: not a neighbour\n");
+-		dst_release(&rt->u.dst);
++		dst_release(dst);
+ 		return;
+ 	}
+-	if (!xrlim_allow(&rt->u.dst, 1*HZ)) {
+-		dst_release(&rt->u.dst);
++	if (!xrlim_allow(dst, 1*HZ)) {
++		dst_release(dst);
+ 		return;
+ 	}
+-	dst_release(&rt->u.dst);
+ 
+ 	if (dev->addr_len) {
+ 		if (neigh->nud_state&NUD_VALID) {
+@@ -1227,6 +1330,7 @@
+ 			   We will make it later, when will be sure,
+ 			   that it is alive.
+ 			 */
++			dst_release(dst);
+ 			return;
+ 		}
+ 	}
+@@ -1236,11 +1340,6 @@
+ 	rd_len &= ~0x7;
+ 	len += rd_len;
+ 
+-	if (ipv6_get_lladdr(dev, &saddr_buf)) {
+- 		ND_PRINTK1("redirect: no link_local addr for dev\n");
+- 		return;
+- 	}
+-
+ 	buff = sock_alloc_send_skb(sk, MAX_HEADER + len + dev->hard_header_len + 15,
+ 				   1, &err);
+ 	if (buff == NULL) {
+@@ -1250,15 +1349,11 @@
+ 
+ 	hlen = 0;
+ 
+-	if (ndisc_build_ll_hdr(buff, dev, &skb->nh.ipv6h->saddr, NULL, len) == 0) {
+-		kfree_skb(buff);
+-		return;
+-	}
+-
++	skb_reserve(buff, (dev->hard_header_len + 15) & ~15);
+ 	ip6_nd_hdr(sk, buff, dev, &saddr_buf, &skb->nh.ipv6h->saddr,
+ 		   IPPROTO_ICMPV6, len);
+ 
+-	icmph = (struct icmp6hdr *) skb_put(buff, len);
++	buff->h.raw = (unsigned char*) icmph = (struct icmp6hdr *) skb_put(buff, len);
+ 
+ 	memset(icmph, 0, sizeof(struct icmp6hdr));
+ 	icmph->icmp6_type = NDISC_REDIRECT;
+@@ -1296,7 +1391,8 @@
+ 					     len, IPPROTO_ICMPV6,
+ 					     csum_partial((u8 *) icmph, len, 0));
+ 
+-	dev_queue_xmit(buff);
++	buff->dst = dst;
++	dst_output(buff);
+ 
+ 	ICMP6_INC_STATS(Icmp6OutRedirects);
+ 	ICMP6_INC_STATS(Icmp6OutMsgs);
+@@ -1416,6 +1512,9 @@
+ 
+ void ndisc_cleanup(void)
+ {
++#ifdef CONFIG_SYSCTL
++	neigh_sysctl_unregister(&nd_tbl.parms);
++#endif
+ 	neigh_table_clear(&nd_tbl);
+ 	sock_release(ndisc_socket);
+ 	ndisc_socket = NULL; /* For safety. */
+diff -Nru a/net/ipv6/netfilter/ip6t_LOG.c b/net/ipv6/netfilter/ip6t_LOG.c
+--- a/net/ipv6/netfilter/ip6t_LOG.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/netfilter/ip6t_LOG.c	2005-02-13 21:25:09 +11:00
+@@ -25,16 +25,6 @@
+ #define DEBUGP(format, args...)
+ #endif
+ 
+-#define NIP6(addr) \
+-	ntohs((addr).s6_addr16[0]), \
+-	ntohs((addr).s6_addr16[1]), \
+-	ntohs((addr).s6_addr16[2]), \
+-	ntohs((addr).s6_addr16[3]), \
+-	ntohs((addr).s6_addr16[4]), \
+-	ntohs((addr).s6_addr16[5]), \
+-	ntohs((addr).s6_addr16[6]), \
+-	ntohs((addr).s6_addr16[7])
+-
+ /* FIXME evil kludge */
+ 
+ struct ahhdr {
+diff -Nru a/net/ipv6/netfilter/ip6t_multiport.c b/net/ipv6/netfilter/ip6t_multiport.c
+--- a/net/ipv6/netfilter/ip6t_multiport.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv6/netfilter/ip6t_multiport.c	2005-02-13 21:25:10 +11:00
+@@ -5,6 +5,7 @@
+ #include <linux/udp.h>
+ #include <linux/skbuff.h>
+ #include <linux/in.h>
++#include <linux/socket.h>
+ 
+ #include <linux/netfilter_ipv6/ip6t_multiport.h>
+ #include <linux/netfilter_ipv6/ip6_tables.h>
+diff -Nru a/net/ipv6/protocol.c b/net/ipv6/protocol.c
+--- a/net/ipv6/protocol.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/protocol.c	2005-02-13 21:25:09 +11:00
+@@ -42,77 +42,42 @@
+ 
+ struct inet6_protocol *inet6_protos[MAX_INET_PROTOS];
+ 
+-void inet6_add_protocol(struct inet6_protocol *prot)
++int inet6_add_protocol(struct inet6_protocol *prot, unsigned char protocol)
+ {
+-	unsigned char hash;
+-	struct inet6_protocol *p2;
++	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+ 
+-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
+ 	br_write_lock_bh(BR_NETPROTO_LOCK);
+-	prot->next = inet6_protos[hash];
+-	inet6_protos[hash] = prot;
+-	prot->copy = 0;
+-
+-	/*
+-	 *	Set the copy bit if we need to. 
+-	 */
+-	 
+-	p2 = (struct inet6_protocol *) prot->next;
+-	while(p2 != NULL) {
+-		if (p2->protocol == prot->protocol) {
+-			prot->copy = 1;
+-			break;
+-		}
+-		p2 = (struct inet6_protocol *) p2->next;
++
++	if (inet6_protos[hash]) {
++		ret = -1;
++	} else {
++		inet6_protos[hash] = prot;
++		ret = 0;
+ 	}
++
+ 	br_write_unlock_bh(BR_NETPROTO_LOCK);
++
++	return ret;
+ }
+ 
+ /*
+  *	Remove a protocol from the hash tables.
+  */
+  
+-int inet6_del_protocol(struct inet6_protocol *prot)
++int inet6_del_protocol(struct inet6_protocol *prot, unsigned char protocol)
+ {
+-	struct inet6_protocol *p;
+-	struct inet6_protocol *lp = NULL;
+-	unsigned char hash;
++	int ret, hash = protocol & (MAX_INET_PROTOS - 1);
+ 
+-	hash = prot->protocol & (MAX_INET_PROTOS - 1);
+ 	br_write_lock_bh(BR_NETPROTO_LOCK);
+-	if (prot == inet6_protos[hash]) {
+-		inet6_protos[hash] = (struct inet6_protocol *) inet6_protos[hash]->next;
+-		br_write_unlock_bh(BR_NETPROTO_LOCK);
+-		return(0);
+-	}
+-
+-	p = (struct inet6_protocol *) inet6_protos[hash];
+ 
+-        if (p != NULL && p->protocol == prot->protocol)
+-                lp = p;
+-
+-	while(p != NULL) {
+-		/*
+-		 * We have to worry if the protocol being deleted is
+-		 * the last one on the list, then we may need to reset
+-		 * someone's copied bit.
+-		 */
+-		if (p->next != NULL && p->next == prot) {
+-			/*
+-			 * if we are the last one with this protocol and
+-			 * there is a previous one, reset its copy bit.
+-			 */
+-			if (prot->copy == 0 && lp != NULL)
+-				lp->copy = 0;
+-			p->next = prot->next;
+-			br_write_unlock_bh(BR_NETPROTO_LOCK);
+-			return(0);
+-		}
+-		if (p->next != NULL && p->next->protocol == prot->protocol) 
+-			lp = p->next;
+-
+-		p = (struct inet6_protocol *) p->next;
++	if (inet6_protos[hash] != prot) {
++		ret = -1;
++	} else {
++		inet6_protos[hash] = NULL;
++		ret = 0;
+ 	}
++
+ 	br_write_unlock_bh(BR_NETPROTO_LOCK);
+-	return(-1);
++
++	return ret;
+ }
+diff -Nru a/net/ipv6/raw.c b/net/ipv6/raw.c
+--- a/net/ipv6/raw.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/raw.c	2005-02-13 21:25:09 +11:00
+@@ -12,6 +12,7 @@
+  *	Fixes:
+  *	Hideaki YOSHIFUJI	:	sin6_scope_id support
+  *	YOSHIFUJI,H. at USAGI	:	raw checksum (RFC2292(bis) compliance) 
++ *	Kazunori MIYAZAWA @USAGI:	change process style to use ip6_append_data
+  *
+  *	This program is free software; you can redistribute it and/or
+  *      modify it under the terms of the GNU General Public License
+@@ -29,6 +30,8 @@
+ #include <linux/netdevice.h>
+ #include <linux/if_arp.h>
+ #include <linux/icmpv6.h>
++#include <linux/netfilter.h>
++#include <linux/netfilter_ipv6.h>
+ #include <asm/uaccess.h>
+ #include <asm/ioctls.h>
+ 
+@@ -45,6 +48,7 @@
+ #include <net/inet_common.h>
+ 
+ #include <net/rawv6.h>
++#include <net/xfrm.h>
+ 
+ struct sock *raw_v6_htable[RAWV6_HTABLE_SIZE];
+ rwlock_t raw_v6_lock = RW_LOCK_UNLOCKED;
+@@ -133,12 +137,14 @@
+  *	demultiplex raw sockets.
+  *	(should consider queueing the skb in the sock receive_queue
+  *	without calling rawv6.c)
++ *
++ *	Caller owns SKB so we must make clones.
+  */
+-struct sock * ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
++void ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
+ {
+ 	struct in6_addr *saddr;
+ 	struct in6_addr *daddr;
+-	struct sock *sk, *sk2;
++	struct sock *sk;
+ 	__u8 hash;
+ 
+ 	saddr = &skb->nh.ipv6h->saddr;
+@@ -159,30 +165,18 @@
+ 
+ 	sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr);
+ 
+-	if (sk) {
+-		sk2 = sk;
+-
+-		while ((sk2 = __raw_v6_lookup(sk2->next, nexthdr, daddr, saddr))) {
+-			struct sk_buff *buff;
+-
+-			if (nexthdr == IPPROTO_ICMPV6 &&
+-			    icmpv6_filter(sk2, skb))
+-				continue;
+-
+-			buff = skb_clone(skb, GFP_ATOMIC);
+-			if (buff)
+-				rawv6_rcv(sk2, buff);
++	while (sk) {
++		if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) {
++			struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC);
++
++			/* Not releasing hash table! */
++			if (clone)
++				rawv6_rcv(sk, clone);
+ 		}
++		sk = __raw_v6_lookup(sk->next, nexthdr, daddr, saddr);
+ 	}
+-
+-	if (sk && nexthdr == IPPROTO_ICMPV6 && icmpv6_filter(sk, skb))
+-		sk = NULL;
+-
+ out:
+-	if (sk)
+-		sock_hold(sk);
+ 	read_unlock(&raw_v6_lock);
+-	return sk;
+ }
+ 
+ /* This cleans up af_inet6 a bit. -DaveM */
+@@ -311,6 +305,11 @@
+  */
+ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
+ {
++        if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
++                kfree_skb(skb);
++                return NET_RX_DROP;
++        }
++
+ 	if (!sk->tp_pinfo.tp_raw.checksum)
+ 		skb->ip_summed = CHECKSUM_UNNECESSARY;
+ 
+@@ -439,86 +438,160 @@
+ 	goto out_free;
+ }
+ 
+-/*
+- *	Sending...
+- */
++static int rawv6_push_pending_frames(struct sock *sk, struct flowi *fl, struct raw6_opt *opt, int len)
++{
++	struct sk_buff *skb;
++	int err = 0;
++	u16 *csum;
+ 
+-struct rawv6_fakehdr {
+-	struct iovec	*iov;
+-	struct sock	*sk;
+-	__u32		len;
+-	__u32		cksum;
+-	__u32		proto;
+-	struct in6_addr *daddr;
+-};
++	if ((skb = skb_peek(&sk->write_queue)) == NULL)
++		goto out;
+ 
+-static int rawv6_getfrag(const void *data, struct in6_addr *saddr, 
+-			  char *buff, unsigned int offset, unsigned int len)
+-{
+-	struct iovec *iov = (struct iovec *) data;
++	if (opt->offset + 1 < len)
++		csum = (u16 *)(skb->h.raw + opt->offset);
++	else {
++		err = -EINVAL;
++		goto out;
++	}
++
++	if (skb_queue_len(&sk->write_queue) == 1) {
++		/*
++		 * Only one fragment on the socket.
++		 */
++		/* should be check HW csum miyazawa */
++		*csum = csum_ipv6_magic(&fl->fl6_src,
++					&fl->fl6_dst,
++					len, fl->proto, skb->csum);
++	} else {
++		u32 tmp_csum = 0;
++
++		skb_queue_walk(&sk->write_queue, skb) {
++			tmp_csum = csum_add(tmp_csum, skb->csum);
++		}
+ 
+-	return memcpy_fromiovecend(buff, iov, offset, len);
++		tmp_csum = csum_ipv6_magic(&fl->fl6_src,
++					   &fl->fl6_dst,
++					   len, fl->proto, tmp_csum);
++		*csum = tmp_csum;
++	}
++	if (*csum == 0)
++		*csum = -1;
++	ip6_push_pending_frames(sk);
++out:
++	return err;
+ }
+ 
+-static int rawv6_frag_cksum(const void *data, struct in6_addr *addr,
+-			     char *buff, unsigned int offset, 
+-			     unsigned int len)
+-{
+-	struct rawv6_fakehdr *hdr = (struct rawv6_fakehdr *) data;
+-	
+-	if (csum_partial_copy_fromiovecend(buff, hdr->iov, offset, 
+-						    len, &hdr->cksum))
+-		return -EFAULT;
+-	
+-	if (offset == 0) {
+-		struct sock *sk;
+-		struct raw6_opt *opt;
+-		struct in6_addr *daddr;
+-		
+-		sk = hdr->sk;
+-		opt = &sk->tp_pinfo.tp_raw;
++static int rawv6_send_hdrinc(struct sock *sk, void *from, int length,
++			struct flowi *fl, struct rt6_info *rt, 
++			unsigned int flags)
++{
++	struct inet_opt *inet = inet_sk(sk);
++	struct ipv6hdr *iph;
++	struct sk_buff *skb;
++	unsigned int hh_len;
++	int err;
+ 
+-		if (hdr->daddr)
+-			daddr = hdr->daddr;
+-		else
+-			daddr = addr + 1;
+-		
+-		hdr->cksum = csum_ipv6_magic(addr, daddr, hdr->len,
+-					     hdr->proto, hdr->cksum);
+-		
+-		if (opt->offset + 1 < len) {
+-			__u16 *csum;
++	if (length > rt->u.dst.dev->mtu) {
++		ipv6_local_error(sk, EMSGSIZE, fl, rt->u.dst.dev->mtu);
++		return -EMSGSIZE;
++	}
++	if (flags&MSG_PROBE)
++		goto out;
+ 
+-			csum = (__u16 *) (buff + opt->offset);
+-			if (*csum) {
+-				/* in case cksum was not initialized */
+-				__u32 sum = hdr->cksum;
+-				sum += *csum;
+-				*csum = hdr->cksum = (sum + (sum>>16));
+-			} else {
+-				*csum = hdr->cksum;
++	hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
++
++	skb = sock_alloc_send_skb(sk, length+hh_len+15,
++				  flags&MSG_DONTWAIT, &err);
++	if (skb == NULL)
++		goto error; 
++	skb_reserve(skb, hh_len);
++
++	skb->priority = sk->priority;
++	skb->dst = dst_clone(&rt->u.dst);
++
++	skb->nh.ipv6h = iph = (struct ipv6hdr *)skb_put(skb, length);
++
++	skb->ip_summed = CHECKSUM_NONE;
++
++	skb->h.raw = skb->nh.raw;
++	err = memcpy_fromiovecend((void *)iph, from, 0, length);
++	if (err)
++		goto error_fault;
++
++	err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, rt->u.dst.dev,
++		      dst_output);
++	if (err > 0)
++		err = inet->recverr ? net_xmit_errno(err) : 0;
++	if (err)
++		goto error;
++out:
++	return 0;
++
++error_fault:
++	err = -EFAULT;
++	kfree_skb(skb);
++error:
++	IP6_INC_STATS(Ip6OutDiscards);
++	return err; 
++}
++
++static void rawv6_probe_proto_opt(struct flowi *fl, struct msghdr *msg)
++{
++	struct iovec *iov;
++	u8 __user *type = NULL;
++	u8 __user *code = NULL;
++	int probed = 0;
++	int i;
++
++	if (!msg->msg_iov)
++		return;
++
++	for (i = 0; i < msg->msg_iovlen; i++) {
++		iov = &msg->msg_iov[i];
++		if (!iov)
++			continue;
++
++		switch (fl->proto) {
++		case IPPROTO_ICMPV6:
++			/* check if one-byte field is readable or not. */
++			if (iov->iov_base && iov->iov_len < 1)
++				break;
++
++			if (!type) {
++				type = iov->iov_base;
++				/* check if code field is readable or not. */
++				if (iov->iov_len > 1)
++					code = type + 1;
++			} else if (!code)
++				code = iov->iov_base;
++
++			if (type && code) {
++				get_user(fl->fl_icmp_type, type);
++				__get_user(fl->fl_icmp_code, code);
++				probed = 1;
+ 			}
+-		} else {
+-			if (net_ratelimit())
+-				printk(KERN_DEBUG "icmp: cksum offset too big\n");
+-			return -EINVAL;
++			break;
++		default:
++			probed = 1;
++			break;
+ 		}
+-	}	
+-	return 0; 
++		if (probed)
++			break;
++	}
+ }
+ 
+-
+ static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
+ {
+ 	struct ipv6_txoptions opt_space;
+ 	struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
++	struct in6_addr *daddr;
+ 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
++	struct raw6_opt *raw_opt = raw6_sk(sk);
+ 	struct ipv6_txoptions *opt = NULL;
+ 	struct ip6_flowlabel *flowlabel = NULL;
++	struct dst_entry *dst = NULL;
+ 	struct flowi fl;
+ 	int addr_len = msg->msg_namelen;
+-	struct in6_addr *daddr;
+-	struct raw6_opt *raw_opt;
+ 	int hlimit = -1;
+ 	u16 proto;
+ 	int err;
+@@ -536,9 +609,7 @@
+ 	/*
+ 	 *	Get and verify the address. 
+ 	 */
+-
+-	fl.fl6_flowlabel = 0;
+-	fl.oif = 0;
++	memset(&fl, 0, sizeof(fl));
+ 
+ 	if (sin6) {
+ 		if (addr_len < SIN6_LEN_RFC2133) 
+@@ -552,6 +623,8 @@
+ 
+ 		if (!proto)
+ 			proto = sk->num;
++		else if (proto != sk->num)
++			return(-EINVAL);
+ 
+ 		if (proto > 255)
+ 			return(-EINVAL);
+@@ -590,16 +663,17 @@
+ 		 * unspecfied destination address 
+ 		 * treated as error... is this correct ?
+ 		 */
++		fl6_sock_release(flowlabel);
+ 		return(-EINVAL);
+ 	}
+ 
+ 	if (fl.oif == 0)
+ 		fl.oif = sk->bound_dev_if;
+-	fl.fl6_src = NULL;
+ 
+ 	if (msg->msg_controllen) {
+ 		opt = &opt_space;
+ 		memset(opt, 0, sizeof(struct ipv6_txoptions));
++		opt->tot_len = sizeof(struct ipv6_txoptions);
+ 
+ 		err = datagram_send_ctl(msg, &fl, opt, &hlimit);
+ 		if (err < 0) {
+@@ -619,39 +693,73 @@
+ 	if (flowlabel)
+ 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
+ 
+-	raw_opt = &sk->tp_pinfo.tp_raw;
+-
+ 	fl.proto = proto;
+-	fl.fl6_dst = daddr;
+-	if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
+-		fl.fl6_src = &np->saddr;
+-	fl.uli_u.icmpt.type = 0;
+-	fl.uli_u.icmpt.code = 0;
+-	
+-	if (raw_opt->checksum) {
+-		struct rawv6_fakehdr hdr;
+-		
+-		hdr.iov = msg->msg_iov;
+-		hdr.sk  = sk;
+-		hdr.len = len;
+-		hdr.cksum = 0;
+-		hdr.proto = proto;
++	rawv6_probe_proto_opt(&fl, msg);
++ 
++	ipv6_addr_copy(&fl.fl6_dst, daddr);
++	if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
++		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
++
++	/* merge ip6_build_xmit from ip6_output */
++	if (opt && opt->srcrt) {
++		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
++		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
++	}
++
++	if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))
++		fl.oif = np->mcast_oif;
++
++	err = ip6_dst_lookup(sk, &dst, &fl);
++	if (err)
++		goto out;
+ 
+-		if (opt && opt->srcrt)
+-			hdr.daddr = daddr;
++	if (hlimit < 0) {
++		if (ipv6_addr_is_multicast(&fl.fl6_dst))
++			hlimit = np->mcast_hops;
+ 		else
+-			hdr.daddr = NULL;
++			hlimit = np->hop_limit;
++		if (hlimit < 0)
++			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
++	}
++
++	if (msg->msg_flags&MSG_CONFIRM)
++		goto do_confirm;
+ 
+-		err = ip6_build_xmit(sk, rawv6_frag_cksum, &hdr, &fl, len,
+-				     opt, hlimit, msg->msg_flags);
++back_from_confirm:
++	if (sk->protinfo.af_inet.hdrincl) {
++		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl, (struct rt6_info*)dst, msg->msg_flags);
+ 	} else {
+-		err = ip6_build_xmit(sk, rawv6_getfrag, msg->msg_iov, &fl, len,
+-				     opt, hlimit, msg->msg_flags);
++		lock_sock(sk);
++		err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, len, 0,
++					hlimit, opt, &fl, (struct rt6_info*)dst, msg->msg_flags);
++
++		if (err)
++			ip6_flush_pending_frames(sk);
++		else if (!(msg->msg_flags & MSG_MORE)) {
++			if (raw_opt->checksum) {
++				err = rawv6_push_pending_frames(sk, &fl, raw_opt, len);
++			} else {
++				err = ip6_push_pending_frames(sk);
++			}
++		}
+ 	}
++done:
++	ip6_dst_store(sk, dst,
++		      !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ?
++		      &np->daddr : NULL);
++	if (err > 0)
++		err = np->recverr ? net_xmit_errno(err) : 0;
+ 
++	release_sock(sk);
++out:	
+ 	fl6_sock_release(flowlabel);
+-
+ 	return err<0?err:len;
++do_confirm:
++	dst_confirm(dst);
++	if (!(msg->msg_flags & MSG_PROBE) || len)
++		goto back_from_confirm;
++	err = 0;
++	goto done;
+ }
+ 
+ static int rawv6_seticmpfilter(struct sock *sk, int level, int optname, 
+diff -Nru a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c
+--- a/net/ipv6/reassembly.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/reassembly.c	2005-02-13 21:25:09 +11:00
+@@ -23,6 +23,10 @@
+  *      Horst von Brand Add missing #include <linux/string.h>
+  *	Alexey Kuznetsov	SMP races, threading, cleanup.
+  *	Patrick McHardy		LRU queue of frag heads for evictor.
++ *	Mitsuru KANDA @USAGI	Register inet6_protocol{}.
++ *	David Stevens and
++ *	YOSHIFUJI,H. @USAGI	Always remove fragment header to
++ *				calculate ICV correctly.
+  */
+ #include <linux/config.h>
+ #include <linux/errno.h>
+@@ -428,7 +432,7 @@
+ 	end = offset + (ntohs(skb->nh.ipv6h->payload_len) -
+ 			((u8 *) (fhdr + 1) - (u8 *) (skb->nh.ipv6h + 1)));
+ 
+-	if ((unsigned int)end >= 65536) {
++	if ((unsigned int)end > IPV6_MAXPLEN) {
+  		icmpv6_param_prob(skb,ICMPV6_HDR_FIELD, (u8*)&fhdr->frag_off - skb->nh.raw);
+  		return;
+ 	}
+@@ -438,7 +442,7 @@
+  				     csum_partial(skb->nh.raw, (u8*)(fhdr+1)-skb->nh.raw, 0));
+ 
+ 	/* Is this the final fragment? */
+-	if (!(fhdr->frag_off & htons(0x0001))) {
++	if (!(fhdr->frag_off & htons(IP6_MF))) {
+ 		/* If we already have some bits beyond end
+ 		 * or have different end, the segment is corrupted.
+ 		 */
+@@ -586,12 +590,12 @@
+  *	the last and the first frames arrived and all the bits are here.
+  */
+ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff **skb_in,
++			  unsigned int *nhoffp,
+ 			  struct net_device *dev)
+ {
+ 	struct sk_buff *fp, *head = fq->fragments;
+-	int    remove_fraghdr = 0;
+ 	int    payload_len;
+-	int    nhoff;
++	unsigned int nhoff;
+ 
+ 	fq_kill(fq);
+ 
+@@ -599,15 +603,9 @@
+ 	BUG_TRAP(FRAG6_CB(head)->offset == 0);
+ 
+ 	/* Unfragmented part is taken from the first segment. */
+-	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len;
+-	nhoff = head->h.raw - head->nh.raw;
+-
+-	if (payload_len > 65535) {
+-		payload_len -= 8;
+-		if (payload_len > 65535)
+-			goto out_oversize;
+-		remove_fraghdr = 1;
+-	}
++	payload_len = (head->data - head->nh.raw) - sizeof(struct ipv6hdr) + fq->len - sizeof(struct frag_hdr);
++	if (payload_len > IPV6_MAXPLEN)
++		goto out_oversize;
+ 
+ 	/* Head of list must not be cloned. */
+ 	if (skb_cloned(head) && pskb_expand_head(head, 0, 0, GFP_ATOMIC))
+@@ -636,18 +634,14 @@
+ 		atomic_add(clone->truesize, &ip6_frag_mem);
+ 	}
+ 
+-	/* Normally we do not remove frag header from datagram, but
+-	 * we have to do this and to relocate header, when payload
+-	 * is > 65535-8. */
+-	if (remove_fraghdr) {
+-		nhoff = fq->nhoffset;
+-		head->nh.raw[nhoff] = head->h.raw[0];
+-		memmove(head->head+8, head->head, (head->data-head->head)-8);
+-		head->mac.raw += 8;
+-		head->nh.raw += 8;
+-	} else {
+-		((struct frag_hdr*)head->h.raw)->frag_off = 0;
+-	}
++	/* We have to remove fragment header from datagram and to relocate
++	 * header in order to calculate ICV correctly. */
++	nhoff = fq->nhoffset;
++	head->nh.raw[nhoff] = head->h.raw[0];
++	memmove(head->head + sizeof(struct frag_hdr), head->head, 
++		(head->data - head->head) - sizeof(struct frag_hdr));
++	head->mac.raw += sizeof(struct frag_hdr);
++	head->nh.raw += sizeof(struct frag_hdr);
+ 
+ 	skb_shinfo(head)->frag_list = head->next;
+ 	head->h.raw = head->data;
+@@ -678,7 +672,8 @@
+ 
+ 	IP6_INC_STATS_BH(Ip6ReasmOKs);
+ 	fq->fragments = NULL;
+-	return nhoff;
++	*nhoffp = nhoff;
++	return 1;
+ 
+ out_oversize:
+ 	if (net_ratelimit())
+@@ -692,7 +687,7 @@
+ 	return -1;
+ }
+ 
+-int ipv6_reassembly(struct sk_buff **skbp, int nhoff)
++static int ipv6_frag_rcv(struct sk_buff **skbp, unsigned int *nhoffp)
+ {
+ 	struct sk_buff *skb = *skbp; 
+ 	struct net_device *dev = skb->dev;
+@@ -722,7 +717,8 @@
+ 		skb->h.raw += sizeof(struct frag_hdr);
+ 		IP6_INC_STATS_BH(Ip6ReasmOKs);
+ 
+-		return (u8*)fhdr - skb->nh.raw;
++		*nhoffp = (u8*)fhdr - skb->nh.raw;
++		return 1;
+ 	}
+ 
+ 	if (atomic_read(&ip6_frag_mem) > sysctl_ip6frag_high_thresh)
+@@ -733,11 +729,11 @@
+ 
+ 		spin_lock(&fq->lock);
+ 
+-		ip6_frag_queue(fq, skb, fhdr, nhoff);
++		ip6_frag_queue(fq, skb, fhdr, *nhoffp);
+ 
+ 		if (fq->last_in == (FIRST_IN|LAST_IN) &&
+ 		    fq->meat == fq->len)
+-			ret = ip6_frag_reasm(fq, skbp, dev);
++			ret = ip6_frag_reasm(fq, skbp, nhoffp, dev);
+ 
+ 		spin_unlock(&fq->lock);
+ 		fq_put(fq, NULL);
+@@ -749,8 +745,17 @@
+ 	return -1;
+ }
+ 
++static struct inet6_protocol frag_protocol =
++{
++	.handler	=	ipv6_frag_rcv,
++	.flags		=	INET6_PROTO_NOPOLICY,
++};
++
+ void __init ipv6_frag_init(void)
+ {
++	if (inet6_add_protocol(&frag_protocol, IPPROTO_FRAGMENT) < 0)
++		printk(KERN_ERR "ipv6_frag_init: Could not register protocol\n");
++
+ 	ip6_frag_hash_rnd = (u32) ((num_physpages ^ (num_physpages>>7)) ^
+ 				   (jiffies ^ (jiffies >> 6)));
+ 
+diff -Nru a/net/ipv6/route.c b/net/ipv6/route.c
+--- a/net/ipv6/route.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/route.c	2005-02-13 21:25:09 +11:00
+@@ -49,6 +49,8 @@
+ #include <net/addrconf.h>
+ #include <net/tcp.h>
+ #include <linux/rtnetlink.h>
++#include <net/dst.h>
++#include <net/xfrm.h>
+ 
+ #include <asm/uaccess.h>
+ 
+@@ -56,8 +58,6 @@
+ #include <linux/sysctl.h>
+ #endif
+ 
+-#undef CONFIG_RT6_POLICY
+-
+ /* Set to 3 to get tracing. */
+ #define RT6_DEBUG 2
+ 
+@@ -80,39 +80,43 @@
+ 
+ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort);
+ static struct dst_entry	*ip6_dst_check(struct dst_entry *dst, u32 cookie);
+-static struct dst_entry	*ip6_dst_reroute(struct dst_entry *dst,
+-					 struct sk_buff *skb);
+ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
+ static int		 ip6_dst_gc(void);
+ 
+ static int		ip6_pkt_discard(struct sk_buff *skb);
+ static void		ip6_link_failure(struct sk_buff *skb);
++static void		ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu);
+ 
+ struct dst_ops ip6_dst_ops = {
+-	AF_INET6,
+-	__constant_htons(ETH_P_IPV6),
+-	1024,
+-
+-        ip6_dst_gc,
+-	ip6_dst_check,
+-	ip6_dst_reroute,
+-	NULL,
+-	ip6_negative_advice,
+-	ip6_link_failure,
+-	sizeof(struct rt6_info),
++	.family			=	AF_INET6,
++	.protocol		=	__constant_htons(ETH_P_IPV6),
++	.gc			=	ip6_dst_gc,
++	.gc_thresh		=	1024,
++	.check			=	ip6_dst_check,
++	.negative_advice	=	ip6_negative_advice,
++	.link_failure		=	ip6_link_failure,
++	.update_pmtu		=	ip6_rt_update_pmtu,
++	.entry_size		=	sizeof(struct rt6_info),
+ };
+ 
+ struct rt6_info ip6_null_entry = {
+-	{{NULL, ATOMIC_INIT(1), 1, &loopback_dev,
+-	  -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+-	  -ENETUNREACH, NULL, NULL,
+-	  ip6_pkt_discard, ip6_pkt_discard,
+-#ifdef CONFIG_NET_CLS_ROUTE
+-	  0,
+-#endif
+-	  &ip6_dst_ops}},
+-	NULL, {{{0}}}, RTF_REJECT|RTF_NONEXTHOP, ~0U,
+-	255, ATOMIC_INIT(1), {NULL}, {{{{0}}}, 0}, {{{{0}}}, 0}
++	.u = {
++		.dst = {
++			.__refcnt	= ATOMIC_INIT(1),
++			.__use		= 1,
++			.dev		= &loopback_dev,
++			.obsolete	= -1,
++			.error		= -ENETUNREACH,
++			.metrics	= { [RTAX_HOPLIMIT - 1] = 255, },
++			.input		= ip6_pkt_discard,
++			.output		= ip6_pkt_discard,
++			.ops		= &ip6_dst_ops,
++			.path		= (struct dst_entry*)&ip6_null_entry,
++		}
++	},
++	.rt6i_flags	= (RTF_REJECT | RTF_NONEXTHOP),
++	.rt6i_metric	= ~(u32) 0,
++	.rt6i_ref	= ATOMIC_INIT(1),
+ };
+ 
+ struct fib6_node ip6_routing_table = {
+@@ -121,29 +125,17 @@
+ 	0, RTN_ROOT|RTN_TL_ROOT|RTN_RTINFO, 0
+ };
+ 
+-#ifdef CONFIG_RT6_POLICY
+-int	ip6_rt_policy = 0;
+-
+-struct pol_chain *rt6_pol_list = NULL;
+-
+-
+-static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb);
+-static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk);
+-
+-static struct rt6_info	*rt6_flow_lookup(struct rt6_info *rt,
+-					 struct in6_addr *daddr,
+-					 struct in6_addr *saddr,
+-					 struct fl_acc_args *args);
+-
+-#else
+-#define ip6_rt_policy (0)
+-#endif
+-
+ /* Protects all the ip6 fib */
+ 
+ rwlock_t rt6_lock = RW_LOCK_UNLOCKED;
+ 
+ 
++/* allocate dst with ip6_dst_ops */
++static __inline__ struct rt6_info *ip6_dst_alloc(void)
++{
++	return dst_alloc(&ip6_dst_ops);
++}
++
+ /*
+  *	Route lookup. Any rt6_lock is implied.
+  */
+@@ -269,9 +261,12 @@
+ 		}
+ 	}
+ 
+-	if (match)
++	if (match) {
++		if (rt6_dflt_pointer != match)
++			RT6_TRACE("changed default router: %p->%p\n",
++				  rt6_dflt_pointer, match);
+ 		rt6_dflt_pointer = match;
+-
++	}
+ 	spin_unlock(&rt6_dflt_lock);
+ 
+ 	if (!match) {
+@@ -325,12 +320,12 @@
+    be destroyed.
+  */
+ 
+-static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh)
++static int rt6_ins(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+ {
+ 	int err;
+ 
+ 	write_lock_bh(&rt6_lock);
+-	err = fib6_add(&ip6_routing_table, rt, nlh);
++	err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
+ 	write_unlock_bh(&rt6_lock);
+ 
+ 	return err;
+@@ -373,7 +368,7 @@
+ 
+ 		dst_hold(&rt->u.dst);
+ 
+-		err = rt6_ins(rt, NULL);
++		err = rt6_ins(rt, NULL, NULL);
+ 		if (err == 0)
+ 			return rt;
+ 
+@@ -385,38 +380,6 @@
+ 	return &ip6_null_entry;
+ }
+ 
+-#ifdef CONFIG_RT6_POLICY
+-static __inline__ struct rt6_info *rt6_flow_lookup_in(struct rt6_info *rt,
+-						      struct sk_buff *skb)
+-{
+-	struct in6_addr *daddr, *saddr;
+-	struct fl_acc_args arg;
+-
+-	arg.type = FL_ARG_FORWARD;
+-	arg.fl_u.skb = skb;
+-
+-	saddr = &skb->nh.ipv6h->saddr;
+-	daddr = &skb->nh.ipv6h->daddr;
+-
+-	return rt6_flow_lookup(rt, daddr, saddr, &arg);
+-}
+-
+-static __inline__ struct rt6_info *rt6_flow_lookup_out(struct rt6_info *rt,
+-						       struct sock *sk,
+-						       struct flowi *fl)
+-{
+-	struct fl_acc_args arg;
+-
+-	arg.type = FL_ARG_ORIGIN;
+-	arg.fl_u.fl_o.sk = sk;
+-	arg.fl_u.fl_o.flow = fl;
+-
+-	return rt6_flow_lookup(rt, fl->nl_u.ip6_u.daddr, fl->nl_u.ip6_u.saddr,
+-			       &arg);
+-}
+-
+-#endif
+-
+ #define BACKTRACK() \
+ if (rt == &ip6_null_entry && strict) { \
+        while ((fn = fn->parent) != NULL) { \
+@@ -449,53 +412,30 @@
+ 	rt = fn->leaf;
+ 
+ 	if ((rt->rt6i_flags & RTF_CACHE)) {
+-		if (ip6_rt_policy == 0) {
+-			rt = rt6_device_match(rt, skb->dev->ifindex, strict);
+-			BACKTRACK();
+-			dst_hold(&rt->u.dst);
+-			goto out;
+-		}
+-
+-#ifdef CONFIG_RT6_POLICY
+-		if ((rt->rt6i_flags & RTF_FLOW)) {
+-			struct rt6_info *sprt;
+-
+-			for (sprt = rt; sprt; sprt = sprt->u.next) {
+-				if (rt6_flow_match_in(sprt, skb)) {
+-					rt = sprt;
+-					dst_hold(&rt->u.dst);
+-					goto out;
+-				}
+-			}
+-		}
+-#endif
++		rt = rt6_device_match(rt, skb->dev->ifindex, strict);
++		BACKTRACK();
++		dst_hold(&rt->u.dst);
++		goto out;
+ 	}
+ 
+ 	rt = rt6_device_match(rt, skb->dev->ifindex, 0);
+ 	BACKTRACK();
+ 
+-	if (ip6_rt_policy == 0) {
+-		if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+-			read_unlock_bh(&rt6_lock);
++	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
++		read_unlock_bh(&rt6_lock);
+ 
+-			rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
+-				     &skb->nh.ipv6h->saddr);
++		rt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
++			     &skb->nh.ipv6h->saddr);
+ 			
+-			if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+-				goto out2;
+-			/* Race condition! In the gap, when rt6_lock was
+-			   released someone could insert this route.  Relookup.
+-			 */
+-			goto relookup;
+-		}
+-		dst_hold(&rt->u.dst);
+-	} else {
+-#ifdef CONFIG_RT6_POLICY
+-		rt = rt6_flow_lookup_in(rt, skb);
+-#else
+-		/* NEVER REACHED */
+-#endif
++		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
++			goto out2;
++		/* Race condition! In the gap, when rt6_lock was
++		   released someone could insert this route.  Relookup.
++		*/
++		dst_release(&rt->u.dst);
++		goto relookup;
+ 	}
++	dst_hold(&rt->u.dst);
+ 
+ out:
+ 	read_unlock_bh(&rt6_lock);
+@@ -512,38 +452,21 @@
+ 	int strict;
+ 	int attempts = 3;
+ 
+-	strict = ipv6_addr_type(fl->nl_u.ip6_u.daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
++	strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL);
+ 
+ relookup:
+ 	read_lock_bh(&rt6_lock);
+ 
+-	fn = fib6_lookup(&ip6_routing_table, fl->nl_u.ip6_u.daddr,
+-			 fl->nl_u.ip6_u.saddr);
++	fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src);
+ 
+ restart:
+ 	rt = fn->leaf;
+ 
+ 	if ((rt->rt6i_flags & RTF_CACHE)) {
+-		if (ip6_rt_policy == 0) {
+-			rt = rt6_device_match(rt, fl->oif, strict);
+-			BACKTRACK();
+-			dst_hold(&rt->u.dst);
+-			goto out;
+-		}
+-
+-#ifdef CONFIG_RT6_POLICY
+-		if ((rt->rt6i_flags & RTF_FLOW)) {
+-			struct rt6_info *sprt;
+-
+-			for (sprt = rt; sprt; sprt = sprt->u.next) {
+-				if (rt6_flow_match_out(sprt, sk)) {
+-					rt = sprt;
+-					dst_hold(&rt->u.dst);
+-					goto out;
+-				}
+-			}
+-		}
+-#endif
++		rt = rt6_device_match(rt, fl->oif, strict);
++		BACKTRACK();
++		dst_hold(&rt->u.dst);
++		goto out;
+ 	}
+ 	if (rt->rt6i_flags & RTF_DEFAULT) {
+ 		if (rt->rt6i_metric >= IP6_RT_PRIO_ADDRCONF)
+@@ -553,29 +476,21 @@
+ 		BACKTRACK();
+ 	}
+ 
+-	if (ip6_rt_policy == 0) {
+-		if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+-			read_unlock_bh(&rt6_lock);
++	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
++		read_unlock_bh(&rt6_lock);
+ 
+-			rt = rt6_cow(rt, fl->nl_u.ip6_u.daddr,
+-				     fl->nl_u.ip6_u.saddr);
+-			
+-			if (rt->u.dst.error != -EEXIST || --attempts <= 0)
+-				goto out2;
++		rt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
+ 
+-			/* Race condition! In the gap, when rt6_lock was
+-			   released someone could insert this route.  Relookup.
+-			 */
+-			goto relookup;
+-		}
+-		dst_hold(&rt->u.dst);
+-	} else {
+-#ifdef CONFIG_RT6_POLICY
+-		rt = rt6_flow_lookup_out(rt, sk, fl);
+-#else
+-		/* NEVER REACHED */
+-#endif
++		if (rt->u.dst.error != -EEXIST || --attempts <= 0)
++			goto out2;
++
++		/* Race condition! In the gap, when rt6_lock was
++		   released someone could insert this route.  Relookup.
++		*/
++		dst_release(&rt->u.dst);
++		goto relookup;
+ 	}
++	dst_hold(&rt->u.dst);
+ 
+ out:
+ 	read_unlock_bh(&rt6_lock);
+@@ -603,23 +518,13 @@
+ 	return NULL;
+ }
+ 
+-static struct dst_entry *ip6_dst_reroute(struct dst_entry *dst, struct sk_buff *skb)
+-{
+-	/*
+-	 *	FIXME
+-	 */
+-	RDBG(("ip6_dst_reroute(%p,%p)[%p] (AIEEE)\n", dst, skb,
+-	      __builtin_return_address(0)));
+-	return NULL;
+-}
+-
+ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
+ {
+ 	struct rt6_info *rt = (struct rt6_info *) dst;
+ 
+ 	if (rt) {
+ 		if (rt->rt6i_flags & RTF_CACHE)
+-			ip6_del_rt(rt, NULL);
++			ip6_del_rt(rt, NULL, NULL);
+ 		else
+ 			dst_release(dst);
+ 	}
+@@ -642,7 +547,80 @@
+ 	}
+ }
+ 
+-static int ip6_dst_gc()
++static void ip6_rt_update_pmtu(struct dst_entry *dst, u32 mtu)
++{
++	struct rt6_info *rt6 = (struct rt6_info*)dst;
++
++	if (mtu < dst_pmtu(dst) && rt6->rt6i_dst.plen == 128) {
++		rt6->rt6i_flags |= RTF_MODIFIED;
++		dst->metrics[RTAX_MTU-1] = mtu;
++	}
++}
++
++/* Protected by rt6_lock.  */
++static struct dst_entry *ndisc_dst_gc_list;
++static int ipv6_get_mtu(struct net_device *dev);
++static inline unsigned int ipv6_advmss(unsigned int mtu);
++
++struct dst_entry *ndisc_dst_alloc(struct net_device *dev, 
++				  struct neighbour *neigh,
++				  int (*output)(struct sk_buff *))
++{
++	struct rt6_info *rt = ip6_dst_alloc();
++
++	if (unlikely(rt == NULL))
++		goto out;
++
++	if (dev)
++		dev_hold(dev);
++	if (neigh)
++		neigh_hold(neigh);
++
++	rt->rt6i_dev	  = dev;
++	rt->rt6i_nexthop  = neigh;
++	rt->rt6i_expires  = 0;
++	rt->rt6i_flags    = RTF_LOCAL | RTF_NDISC;
++	rt->rt6i_metric   = 0;
++	atomic_set(&rt->u.dst.__refcnt, 1);
++	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = 255;
++	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
++	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
++	rt->u.dst.output  = output;
++
++	write_lock_bh(&rt6_lock);
++	rt->u.dst.next = ndisc_dst_gc_list;
++	ndisc_dst_gc_list = &rt->u.dst;
++	write_unlock_bh(&rt6_lock);
++
++	fib6_force_start_gc();
++
++out:
++	return (struct dst_entry *)rt;
++}
++
++int ndisc_dst_gc(int *more)
++{
++	struct dst_entry *dst, *next, **pprev;
++	int freed;
++
++	next = NULL;
++	pprev = &ndisc_dst_gc_list;
++	freed = 0;
++	while ((dst = *pprev) != NULL) {
++		if (!atomic_read(&dst->__refcnt)) {
++			*pprev = dst->next;
++			dst_free(dst);
++			freed++;
++		} else {
++			pprev = &dst->next;
++			(*more)++;
++		}
++	}
++
++	return freed;
++}
++
++static int ip6_dst_gc(void)
+ {
+ 	static unsigned expire = 30*HZ;
+ 	static unsigned long last_gc;
+@@ -669,19 +647,6 @@
+    Remove it only when all the things will work!
+  */
+ 
+-static void ipv6_addr_prefix(struct in6_addr *pfx,
+-			     const struct in6_addr *addr, int plen)
+-{
+-	int b = plen&0x7;
+-	int o = plen>>3;
+-
+-	memcpy(pfx->s6_addr, addr, o);
+-	if (o < 16)
+-		memset(pfx->s6_addr + o, 0, 16 - o);
+-	if (b != 0)
+-		pfx->s6_addr[o] = addr->s6_addr[o]&(0xff00 >> b);
+-}
+-
+ static int ipv6_get_mtu(struct net_device *dev)
+ {
+ 	int mtu = IPV6_MIN_MTU;
+@@ -695,6 +660,24 @@
+ 	return mtu;
+ }
+ 
++static inline unsigned int ipv6_advmss(unsigned int mtu)
++{
++	mtu -= sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
++
++	if (mtu < ip6_rt_min_advmss)
++		mtu = ip6_rt_min_advmss;
++
++	/*
++	 * Maximal non-jumbo IPv6 payload is IPV6_MAXPLEN and 
++	 * corresponding MSS is IPV6_MAXPLEN - tcp_header_size. 
++	 * IPV6_MAXPLEN is also valid and means: "any MSS, 
++	 * rely only on pmtu discovery"
++	 */
++	if (mtu > IPV6_MAXPLEN - sizeof(struct tcphdr))
++		mtu = IPV6_MAXPLEN;
++	return mtu;
++}
++
+ static int ipv6_get_hoplimit(struct net_device *dev)
+ {
+ 	int hoplimit = ipv6_devconf.hop_limit;
+@@ -712,14 +695,17 @@
+  *
+  */
+ 
+-int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh)
++int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
+ {
+ 	int err;
+ 	struct rtmsg *r;
++	struct rtattr **rta;
+ 	struct rt6_info *rt;
+ 	struct net_device *dev = NULL;
+ 	int addr_type;
+ 
++	rta = (struct rtattr **) _rtattr;
++
+ 	if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128)
+ 		return -EINVAL;
+ #ifndef CONFIG_IPV6_SUBTREES
+@@ -729,7 +715,7 @@
+ 	if (rtmsg->rtmsg_metric == 0)
+ 		rtmsg->rtmsg_metric = IP6_RT_PRIO_USER;
+ 
+-	rt = dst_alloc(&ip6_dst_ops);
++	rt = ip6_dst_alloc();
+ 
+ 	if (rt == NULL)
+ 		return -ENOMEM;
+@@ -849,23 +835,42 @@
+ 		}
+ 	}
+ 
+-	if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
+-		rt->rt6i_hoplimit = IPV6_DEFAULT_MCASTHOPS;
+-	else
+-		rt->rt6i_hoplimit = ipv6_get_hoplimit(dev);
+-	rt->rt6i_flags = rtmsg->rtmsg_flags;
++	rt->rt6i_flags = rtmsg->rtmsg_flags & ~RTF_NDISC;
+ 
+ install_route:
+-	rt->u.dst.pmtu = ipv6_get_mtu(dev);
+-	rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
+-	/* Maximal non-jumbo IPv6 payload is 65535 and corresponding
+-	   MSS is 65535 - tcp_header_size. 65535 is also valid and
+-	   means: "any MSS, rely only on pmtu discovery"
+-	 */
+-	if (rt->u.dst.advmss > 65535-20)
+-		rt->u.dst.advmss = 65535;
++	if (rta && rta[RTA_METRICS-1]) {
++		int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]);
++		struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]);
++
++		while (RTA_OK(attr, attrlen)) {
++			unsigned flavor = attr->rta_type;
++			if (flavor) {
++				if (flavor > RTAX_MAX) {
++					err = -EINVAL;
++					goto out;
++				}
++				rt->u.dst.metrics[flavor-1] =
++					*(u32 *)RTA_DATA(attr);
++			}
++			attr = RTA_NEXT(attr, attrlen);
++		}
++	}
++
++	if (rt->u.dst.metrics[RTAX_HOPLIMIT-1] == 0) {
++		if (ipv6_addr_is_multicast(&rt->rt6i_dst.addr))
++			rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
++				IPV6_DEFAULT_MCASTHOPS;
++		else
++			rt->u.dst.metrics[RTAX_HOPLIMIT-1] =
++				ipv6_get_hoplimit(dev);
++	}
++
++	if (!rt->u.dst.metrics[RTAX_MTU-1])
++		rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(dev);
++	if (!rt->u.dst.metrics[RTAX_ADVMSS-1])
++		rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
+ 	rt->u.dst.dev = dev;
+-	return rt6_ins(rt, nlh);
++	return rt6_ins(rt, nlh, _rtattr);
+ 
+ out:
+ 	if (dev)
+@@ -874,7 +879,7 @@
+ 	return err;
+ }
+ 
+-int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh)
++int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
+ {
+ 	int err;
+ 
+@@ -886,13 +891,13 @@
+ 
+ 	dst_release(&rt->u.dst);
+ 
+-	err = fib6_del(rt, nlh);
++	err = fib6_del(rt, nlh, _rtattr);
+ 	write_unlock_bh(&rt6_lock);
+ 
+ 	return err;
+ }
+ 
+-int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh)
++static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
+ {
+ 	struct fib6_node *fn;
+ 	struct rt6_info *rt;
+@@ -919,7 +924,7 @@
+ 			dst_hold(&rt->u.dst);
+ 			read_unlock_bh(&rt6_lock);
+ 
+-			return ip6_del_rt(rt, nlh);
++			return ip6_del_rt(rt, nlh, _rtattr);
+ 		}
+ 	}
+ 	read_unlock_bh(&rt6_lock);
+@@ -1015,17 +1020,14 @@
+ 	ipv6_addr_copy(&nrt->rt6i_gateway, (struct in6_addr*)neigh->primary_key);
+ 	nrt->rt6i_nexthop = neigh_clone(neigh);
+ 	/* Reset pmtu, it may be better */
+-	nrt->u.dst.pmtu = ipv6_get_mtu(neigh->dev);
+-	nrt->u.dst.advmss = max_t(unsigned int, nrt->u.dst.pmtu - 60, ip6_rt_min_advmss);
+-	if (rt->u.dst.advmss > 65535-20)
+-		rt->u.dst.advmss = 65535;
+-	nrt->rt6i_hoplimit = ipv6_get_hoplimit(neigh->dev);
++	nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
++	nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&nrt->u.dst));
+ 
+-	if (rt6_ins(nrt, NULL))
++	if (rt6_ins(nrt, NULL, NULL))
+ 		goto out;
+ 
+ 	if (rt->rt6i_flags&RTF_CACHE) {
+-		ip6_del_rt(rt, NULL);
++		ip6_del_rt(rt, NULL, NULL);
+ 		return;
+ 	}
+ 
+@@ -1060,7 +1062,7 @@
+ 	if (rt == NULL)
+ 		return;
+ 
+-	if (pmtu >= rt->u.dst.pmtu)
++	if (pmtu >= dst_pmtu(&rt->u.dst))
+ 		goto out;
+ 
+ 	/* New mtu received -> path was valid.
+@@ -1075,7 +1077,7 @@
+ 	   would return automatically.
+ 	 */
+ 	if (rt->rt6i_flags & RTF_CACHE) {
+-		rt->u.dst.pmtu = pmtu;
++		rt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+ 		dst_set_expires(&rt->u.dst, ip6_rt_mtu_expires);
+ 		rt->rt6i_flags |= RTF_MODIFIED|RTF_EXPIRES;
+ 		goto out;
+@@ -1089,7 +1091,7 @@
+ 	if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
+ 		nrt = rt6_cow(rt, daddr, saddr);
+ 		if (!nrt->u.dst.error) {
+-			nrt->u.dst.pmtu = pmtu;
++			nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
+ 			/* According to RFC 1981, detecting PMTU increase shouldn't be
+ 			   happened within 5 mins, the recommended timer is 10 mins.
+ 			   Here this route expiration time is set to ip6_rt_mtu_expires 
+@@ -1098,8 +1100,8 @@
+ 			 */
+ 			dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
+ 			nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES;
+-			dst_release(&nrt->u.dst);
+ 		}
++		dst_release(&nrt->u.dst);
+ 	} else {
+ 		nrt = ip6_rt_copy(rt);
+ 		if (nrt == NULL)
+@@ -1110,8 +1112,8 @@
+ 		nrt->rt6i_nexthop = neigh_clone(rt->rt6i_nexthop);
+ 		dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires);
+ 		nrt->rt6i_flags |= RTF_DYNAMIC|RTF_CACHE|RTF_EXPIRES;
+-		nrt->u.dst.pmtu = pmtu;
+-		rt6_ins(nrt, NULL);
++		nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
++		rt6_ins(nrt, NULL, NULL);
+ 	}
+ 
+ out:
+@@ -1124,20 +1126,19 @@
+ 
+ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort)
+ {
+-	struct rt6_info *rt;
++	struct rt6_info *rt = ip6_dst_alloc();
+ 
+-	rt = dst_alloc(&ip6_dst_ops);
++	BUG_ON(ort->rt6i_flags & RTF_NDISC);
+ 
+ 	if (rt) {
+ 		rt->u.dst.input = ort->u.dst.input;
+ 		rt->u.dst.output = ort->u.dst.output;
+ 
+-		memcpy(&rt->u.dst.mxlock, &ort->u.dst.mxlock, RTAX_MAX*sizeof(unsigned));
++		memcpy(rt->u.dst.metrics, ort->u.dst.metrics, RTAX_MAX*sizeof(u32));
+ 		rt->u.dst.dev = ort->u.dst.dev;
+ 		if (rt->u.dst.dev)
+ 			dev_hold(rt->u.dst.dev);
+ 		rt->u.dst.lastuse = jiffies;
+-		rt->rt6i_hoplimit = ort->rt6i_hoplimit;
+ 		rt->rt6i_expires = 0;
+ 
+ 		ipv6_addr_copy(&rt->rt6i_gateway, &ort->rt6i_gateway);
+@@ -1184,7 +1185,7 @@
+ 
+ 	rtmsg.rtmsg_ifindex = dev->ifindex;
+ 
+-	ip6_route_add(&rtmsg, NULL);
++	ip6_route_add(&rtmsg, NULL, NULL);
+ 	return rt6_get_dflt_router(gwaddr, dev);
+ }
+ 
+@@ -1210,7 +1211,7 @@
+ 
+ 			read_unlock_bh(&rt6_lock);
+ 
+-			ip6_del_rt(rt, NULL);
++			ip6_del_rt(rt, NULL, NULL);
+ 
+ 			goto restart;
+ 		}
+@@ -1236,10 +1237,10 @@
+ 		rtnl_lock();
+ 		switch (cmd) {
+ 		case SIOCADDRT:
+-			err = ip6_route_add(&rtmsg, NULL);
++			err = ip6_route_add(&rtmsg, NULL, NULL);
+ 			break;
+ 		case SIOCDELRT:
+-			err = ip6_route_del(&rtmsg, NULL);
++			err = ip6_route_del(&rtmsg, NULL, NULL);
+ 			break;
+ 		default:
+ 			err = -EINVAL;
+@@ -1268,11 +1269,10 @@
+  *	Add address
+  */
+ 
+-int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev)
++int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, int anycast)
+ {
+-	struct rt6_info *rt;
++	struct rt6_info *rt = ip6_dst_alloc();
+ 
+-	rt = dst_alloc(&ip6_dst_ops);
+ 	if (rt == NULL)
+ 		return -ENOMEM;
+ 
+@@ -1280,14 +1280,14 @@
+ 	rt->u.dst.input = ip6_input;
+ 	rt->u.dst.output = ip6_output;
+ 	rt->rt6i_dev = dev_get_by_name("lo");
+-	rt->u.dst.pmtu = ipv6_get_mtu(rt->rt6i_dev);
+-	rt->u.dst.advmss = max_t(unsigned int, rt->u.dst.pmtu - 60, ip6_rt_min_advmss);
+-	if (rt->u.dst.advmss > 65535-20)
+-		rt->u.dst.advmss = 65535;
+-	rt->rt6i_hoplimit = ipv6_get_hoplimit(rt->rt6i_dev);
++	rt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(rt->rt6i_dev);
++	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_pmtu(&rt->u.dst));
++	rt->u.dst.metrics[RTAX_HOPLIMIT-1] = ipv6_get_hoplimit(rt->rt6i_dev);
+ 	rt->u.dst.obsolete = -1;
+ 
+ 	rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP;
++	if (!anycast)
++		rt->rt6i_flags |= RTF_LOCAL;
+ 	rt->rt6i_nexthop = ndisc_get_neigh(rt->rt6i_dev, &rt->rt6i_gateway);
+ 	if (rt->rt6i_nexthop == NULL) {
+ 		dst_free((struct dst_entry *) rt);
+@@ -1296,7 +1296,7 @@
+ 
+ 	ipv6_addr_copy(&rt->rt6i_dst.addr, addr);
+ 	rt->rt6i_dst.plen = 128;
+-	rt6_ins(rt, NULL);
++	rt6_ins(rt, NULL, NULL);
+ 
+ 	return 0;
+ }
+@@ -1313,129 +1313,13 @@
+ 	rt = rt6_lookup(addr, NULL, loopback_dev.ifindex, 1);
+ 	if (rt) {
+ 		if (rt->rt6i_dst.plen == 128)
+-			err = ip6_del_rt(rt, NULL);
++			err = ip6_del_rt(rt, NULL, NULL);
+ 		else
+ 			dst_release(&rt->u.dst);
+ 	}
+ 
+ 	return err;
+ }
+-
+-#ifdef CONFIG_RT6_POLICY
+-
+-static int rt6_flow_match_in(struct rt6_info *rt, struct sk_buff *skb)
+-{
+-	struct flow_filter *frule;
+-	struct pkt_filter *filter;
+-	int res = 1;
+-
+-	if ((frule = rt->rt6i_filter) == NULL)
+-		goto out;
+-
+-	if (frule->type != FLR_INPUT) {
+-		res = 0;
+-		goto out;
+-	}
+-
+-	for (filter = frule->u.filter; filter; filter = filter->next) {
+-		__u32 *word;
+-
+-		word = (__u32 *) skb->h.raw;
+-		word += filter->offset;
+-
+-		if ((*word ^ filter->value) & filter->mask) {
+-			res = 0;
+-			break;
+-		}
+-	}
+-
+-out:
+-	return res;
+-}
+-
+-static int rt6_flow_match_out(struct rt6_info *rt, struct sock *sk)
+-{
+-	struct flow_filter *frule;
+-	int res = 1;
+-
+-	if ((frule = rt->rt6i_filter) == NULL)
+-		goto out;
+-
+-	if (frule->type != FLR_INPUT) {
+-		res = 0;
+-		goto out;
+-	}
+-
+-	if (frule->u.sk != sk)
+-		res = 0;
+-out:
+-	return res;
+-}
+-
+-static struct rt6_info *rt6_flow_lookup(struct rt6_info *rt,
+-					struct in6_addr *daddr,
+-					struct in6_addr *saddr,
+-					struct fl_acc_args *args)
+-{
+-	struct flow_rule *frule;
+-	struct rt6_info *nrt = NULL;
+-	struct pol_chain *pol;
+-
+-	for (pol = rt6_pol_list; pol; pol = pol->next) {
+-		struct fib6_node *fn;
+-		struct rt6_info *sprt;
+-
+-		fn = fib6_lookup(pol->rules, daddr, saddr);
+-
+-		do {
+-			for (sprt = fn->leaf; sprt; sprt=sprt->u.next) {
+-				int res;
+-
+-				frule = sprt->rt6i_flowr;
+-#if RT6_DEBUG >= 2
+-				if (frule == NULL) {
+-					printk(KERN_DEBUG "NULL flowr\n");
+-					goto error;
+-				}
+-#endif
+-				res = frule->ops->accept(rt, sprt, args, &nrt);
+-
+-				switch (res) {
+-				case FLOWR_SELECT:
+-					goto found;
+-				case FLOWR_CLEAR:
+-					goto next_policy;
+-				case FLOWR_NODECISION:
+-					break;
+-				default:
+-					goto error;
+-				};
+-			}
+-
+-			fn = fn->parent;
+-
+-		} while ((fn->fn_flags & RTN_TL_ROOT) == 0);
+-
+-	next_policy:
+-	}
+-
+-error:
+-	dst_hold(&ip6_null_entry.u.dst);
+-	return &ip6_null_entry;
+-
+-found:
+-	if (nrt == NULL)
+-		goto error;
+-
+-	nrt->rt6i_flags |= RTF_CACHE;
+-	dst_hold(&nrt->u.dst);
+-	err = rt6_ins(nrt, NULL);
+-	if (err)
+-		nrt->u.dst.error = err;
+-	return nrt;
+-}
+-#endif
+-
+ static int fib6_ifdown(struct rt6_info *rt, void *arg)
+ {
+ 	if (((void*)rt->rt6i_dev == arg || arg == NULL) &&
+@@ -1487,14 +1371,12 @@
+ 	   PMTU discouvery. 
+ 	 */
+ 	if (rt->rt6i_dev == arg->dev &&
+-	    !(rt->u.dst.mxlock&(1<<RTAX_MTU)) &&
+-	      (rt->u.dst.pmtu > arg->mtu ||
+-	       (rt->u.dst.pmtu < arg->mtu &&
+-		rt->u.dst.pmtu == idev->cnf.mtu6)))
+-		rt->u.dst.pmtu = arg->mtu;
+-	rt->u.dst.advmss = max_t(unsigned int, arg->mtu - 60, ip6_rt_min_advmss);
+-	if (rt->u.dst.advmss > 65535-20)
+-		rt->u.dst.advmss = 65535;
++	    !dst_metric_locked(&rt->u.dst, RTAX_MTU) &&
++            (dst_pmtu(&rt->u.dst) > arg->mtu ||
++             (dst_pmtu(&rt->u.dst) < arg->mtu &&
++	      dst_pmtu(&rt->u.dst) == idev->cnf.mtu6)))
++		rt->u.dst.metrics[RTAX_MTU-1] = arg->mtu;
++	rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(arg->mtu);
+ 	return 0;
+ }
+ 
+@@ -1556,7 +1438,7 @@
+ 
+ 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
+ 		return -EINVAL;
+-	return ip6_route_del(&rtmsg, nlh);
++	return ip6_route_del(&rtmsg, nlh, arg);
+ }
+ 
+ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
+@@ -1566,7 +1448,7 @@
+ 
+ 	if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
+ 		return -EINVAL;
+-	return ip6_route_add(&rtmsg, nlh);
++	return ip6_route_add(&rtmsg, nlh, arg);
+ }
+ 
+ struct rt6_rtnl_dump_arg
+@@ -1642,7 +1524,7 @@
+ 		if (ipv6_get_saddr(&rt->u.dst, dst, &saddr_buf) == 0)
+ 			RTA_PUT(skb, RTA_PREFSRC, 16, &saddr_buf);
+ 	}
+-	if (rtnetlink_put_metrics(skb, &rt->u.dst.mxlock) < 0)
++	if (rtnetlink_put_metrics(skb, rt->u.dst.metrics) < 0)
+ 		goto rtattr_failure;
+ 	if (rt->u.dst.neighbour)
+ 		RTA_PUT(skb, RTA_GATEWAY, 16, &rt->u.dst.neighbour->primary_key);
+@@ -1798,15 +1680,13 @@
+ 	skb->mac.raw = skb->data;
+ 	skb_reserve(skb, MAX_HEADER + sizeof(struct ipv6hdr));
+ 
+-	fl.proto = 0;
+-	fl.nl_u.ip6_u.daddr = NULL;
+-	fl.nl_u.ip6_u.saddr = NULL;
+-	fl.uli_u.icmpt.type = 0;
+-	fl.uli_u.icmpt.code = 0;
++	memset(&fl, 0, sizeof(fl));
+ 	if (rta[RTA_SRC-1])
+-		fl.nl_u.ip6_u.saddr = (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]);
++		ipv6_addr_copy(&fl.fl6_src,
++			       (struct in6_addr*)RTA_DATA(rta[RTA_SRC-1]));
+ 	if (rta[RTA_DST-1])
+-		fl.nl_u.ip6_u.daddr = (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]);
++		ipv6_addr_copy(&fl.fl6_dst,
++			       (struct in6_addr*)RTA_DATA(rta[RTA_DST-1]));
+ 
+ 	if (rta[RTA_IIF-1])
+ 		memcpy(&iif, RTA_DATA(rta[RTA_IIF-1]), sizeof(int));
+@@ -1830,8 +1710,7 @@
+ 
+ 	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
+ 	err = rt6_fill_node(skb, rt, 
+-			    fl.nl_u.ip6_u.daddr,
+-			    fl.nl_u.ip6_u.saddr,
++			    &fl.fl6_dst, &fl.fl6_src,
+ 			    iif,
+ 			    RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
+ 			    nlh->nlmsg_seq, nlh, 0);
+@@ -2043,7 +1922,6 @@
+ 
+ #endif
+ 
+-
+ void __init ip6_route_init(void)
+ {
+ 	ip6_dst_ops.kmem_cachep = kmem_cache_create("ip6_dst_cache",
+@@ -2055,6 +1933,9 @@
+ 	proc_net_create("ipv6_route", 0, rt6_proc_info);
+ 	proc_net_create("rt6_stats", 0, rt6_proc_stats);
+ #endif
++#ifdef CONFIG_XFRM
++	xfrm6_init();
++#endif
+ }
+ 
+ #ifdef MODULE
+@@ -2064,8 +1945,11 @@
+ 	proc_net_remove("ipv6_route");
+ 	proc_net_remove("rt6_stats");
+ #endif
+-
++#ifdef CONFIG_XFRM
++	xfrm6_fini();
++#endif
+ 	rt6_ifdown(NULL);
+ 	fib6_gc_cleanup();
++	kmem_cache_destroy(ip6_dst_ops.kmem_cachep);
+ }
+ #endif	/* MODULE */
+diff -Nru a/net/ipv6/sit.c b/net/ipv6/sit.c
+--- a/net/ipv6/sit.c	2005-02-13 21:25:10 +11:00
++++ b/net/ipv6/sit.c	2005-02-13 21:25:10 +11:00
+@@ -49,6 +49,7 @@
+ #include <net/icmp.h>
+ #include <net/ipip.h>
+ #include <net/inet_ecn.h>
++#include <net/xfrm.h>
+ 
+ /*
+    This version of net/ipv6/sit.c is cloned of net/ipv4/ip_gre.c
+@@ -392,6 +393,7 @@
+ 
+ 	read_lock(&ipip6_lock);
+ 	if ((tunnel = ipip6_tunnel_lookup(iph->saddr, iph->daddr)) != NULL) {
++		secpath_reset(skb);
+ 		skb->mac.raw = skb->nh.raw;
+ 		skb->nh.raw = skb->data;
+ 		memset(&(IPCB(skb)->opt), 0, sizeof(struct ip_options));
+@@ -416,13 +418,6 @@
+ 	return 0;
+ }
+ 
+-/* Need this wrapper because NF_HOOK takes the function address */
+-static inline int do_ip_send(struct sk_buff *skb)
+-{
+-	return ip_send(skb);
+-}
+-
+-
+ /* Returns the embedded IPv4 address if the IPv6 address
+    comes from 6to4 (draft-ietf-ngtrans-6to4-04) addr space */
+ 
+@@ -495,9 +490,17 @@
+ 		dst = addr6->s6_addr32[3];
+ 	}
+ 
+-	if (ip_route_output(&rt, dst, tiph->saddr, RT_TOS(tos), tunnel->parms.link)) {
+-		tunnel->stat.tx_carrier_errors++;
+-		goto tx_error_icmp;
++	{
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = dst,
++						.saddr = tiph->saddr,
++						.tos = RT_TOS(tos) } },
++				    .oif = tunnel->parms.link,
++				    .proto = IPPROTO_IPV6 };
++		if (ip_route_output_key(&rt, &fl)) {
++			tunnel->stat.tx_carrier_errors++;
++			goto tx_error_icmp;
++		}
+ 	}
+ 	if (rt->rt_type != RTN_UNICAST) {
+ 		ip_rt_put(rt);
+@@ -513,9 +516,9 @@
+ 	}
+ 
+ 	if (tiph->frag_off)
+-		mtu = rt->u.dst.pmtu - sizeof(struct iphdr);
++		mtu = dst_pmtu(&rt->u.dst) - sizeof(struct iphdr);
+ 	else
+-		mtu = skb->dst ? skb->dst->pmtu : dev->mtu;
++		mtu = skb->dst ? dst_pmtu(skb->dst) : dev->mtu;
+ 
+ 	if (mtu < 68) {
+ 		tunnel->stat.collisions++;
+@@ -524,15 +527,9 @@
+ 	}
+ 	if (mtu < IPV6_MIN_MTU)
+ 		mtu = IPV6_MIN_MTU;
+-	if (skb->dst && mtu < skb->dst->pmtu) {
+-		struct rt6_info *rt6 = (struct rt6_info*)skb->dst;
+-		if (mtu < rt6->u.dst.pmtu) {
+-			if (tunnel->parms.iph.daddr || rt6->rt6i_dst.plen == 128) {
+-				rt6->rt6i_flags |= RTF_MODIFIED;
+-				rt6->u.dst.pmtu = mtu;
+-			}
+-		}
+-	}
++	if (tunnel->parms.iph.daddr && skb->dst)
++		skb->dst->ops->update_pmtu(skb->dst, mtu);
++
+ 	if (skb->len > mtu) {
+ 		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev);
+ 		ip_rt_put(rt);
+@@ -550,7 +547,7 @@
+ 	/*
+ 	 * Okay, now see if we can stuff it in the buffer as-is.
+ 	 */
+-	max_headroom = (((tdev->hard_header_len+15)&~15)+sizeof(struct iphdr));
++	max_headroom = LL_RESERVED_SPACE(tdev)+sizeof(struct iphdr);
+ 
+ 	if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) {
+ 		struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom);
+@@ -765,8 +762,14 @@
+ 	ipip6_tunnel_init_gen(dev);
+ 
+ 	if (iph->daddr) {
++		struct flowi fl = { .nl_u = { .ip4_u =
++					      { .daddr = iph->daddr,
++						.saddr = iph->saddr,
++						.tos = RT_TOS(iph->tos) } },
++				    .oif = tunnel->parms.link,
++				    .proto = IPPROTO_IPV6 };
+ 		struct rtable *rt;
+-		if (!ip_route_output(&rt, iph->daddr, iph->saddr, RT_TOS(iph->tos), tunnel->parms.link)) {
++		if (!ip_route_output_key(&rt, &fl)) {
+ 			tdev = rt->u.dst.dev;
+ 			ip_rt_put(rt);
+ 		}
+@@ -823,19 +826,14 @@
+ }
+ 
+ static struct inet_protocol sit_protocol = {
+-	ipip6_rcv,
+-	ipip6_err,
+-	0,
+-	IPPROTO_IPV6,
+-	0,
+-	NULL,
+-	"IPv6"
++	.handler	=	ipip6_rcv,
++	.err_handler	=	ipip6_err,
+ };
+ 
+ #ifdef MODULE
+ void sit_cleanup(void)
+ {
+-	inet_del_protocol(&sit_protocol);
++	inet_del_protocol(&sit_protocol, IPPROTO_IPV6);
+ 	unregister_netdev(&ipip6_fb_tunnel_dev);
+ }
+ #endif
+@@ -844,9 +842,13 @@
+ {
+ 	printk(KERN_INFO "IPv6 over IPv4 tunneling driver\n");
+ 
++	if (inet_add_protocol(&sit_protocol, IPPROTO_IPV6) < 0) {
++		printk(KERN_INFO "sit init: Can't add protocol\n");
++		return -EAGAIN;
++	}
++
+ 	ipip6_fb_tunnel_dev.priv = (void*)&ipip6_fb_tunnel;
+ 	strcpy(ipip6_fb_tunnel_dev.name, ipip6_fb_tunnel.parms.name);
+ 	register_netdev(&ipip6_fb_tunnel_dev);
+-	inet_add_protocol(&sit_protocol);
+ 	return 0;
+ }
+diff -Nru a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
+--- a/net/ipv6/tcp_ipv6.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/tcp_ipv6.c	2005-02-13 21:25:09 +11:00
+@@ -38,6 +38,7 @@
+ #include <linux/init.h>
+ #include <linux/jhash.h>
+ #include <linux/ipsec.h>
++#include <net/xfrm.h>
+ 
+ #include <linux/ipv6.h>
+ #include <linux/icmpv6.h>
+@@ -553,7 +554,6 @@
+ 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+ 	struct tcp_opt *tp = &sk->tp_pinfo.af_tcp;
+ 	struct in6_addr *saddr = NULL;
+-	struct in6_addr saddr_buf;
+ 	struct flowi fl;
+ 	struct dst_entry *dst;
+ 	int addr_type;
+@@ -565,7 +565,8 @@
+ 	if (usin->sin6_family != AF_INET6) 
+ 		return(-EAFNOSUPPORT);
+ 
+-	fl.fl6_flowlabel = 0;
++	memset(&fl, 0, sizeof(fl));
++
+ 	if (np->sndflow) {
+ 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ 		IP6_ECN_flow_init(fl.fl6_flowlabel);
+@@ -659,43 +660,45 @@
+ 		saddr = &np->rcv_saddr;
+ 
+ 	fl.proto = IPPROTO_TCP;
+-	fl.fl6_dst = &np->daddr;
+-	fl.fl6_src = saddr;
++	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
++	ipv6_addr_copy(&fl.fl6_src,
++		       (saddr ? saddr : &np->saddr));
+ 	fl.oif = sk->bound_dev_if;
+-	fl.uli_u.ports.dport = usin->sin6_port;
+-	fl.uli_u.ports.sport = sk->sport;
++	fl.fl_ip_dport = usin->sin6_port;
++	fl.fl_ip_sport = sk->sport;
+ 
+ 	if (np->opt && np->opt->srcrt) {
+ 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+-		fl.nl_u.ip6_u.daddr = rt0->addr;
++		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 	}
+ 
+-	dst = ip6_route_output(sk, &fl);
++	err = ip6_dst_lookup(sk, &dst, &fl);
+ 
+-	if ((err = dst->error) != 0) {
+-		dst_release(dst);
++	if (err)
+ 		goto failure;
+-	}
+-
+-	ip6_dst_store(sk, dst, NULL);
+-	sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
+ 
+ 	if (saddr == NULL) {
+-		err = ipv6_get_saddr(dst, &np->daddr, &saddr_buf);
+-		if (err)
+-			goto failure;
+-
+-		saddr = &saddr_buf;
++		saddr = &fl.fl6_src;
++		ipv6_addr_copy(&np->rcv_saddr, saddr);
+ 	}
+ 
+ 	/* set the source address */
+-	ipv6_addr_copy(&np->rcv_saddr, saddr);
+ 	ipv6_addr_copy(&np->saddr, saddr);
+ 	sk->rcv_saddr= LOOPBACK4_IPV6;
+ 
++	ip6_dst_store(sk, dst, NULL);
++	sk->route_caps = dst->dev->features &
++		~(NETIF_F_IP_CSUM
++#ifdef NETIF_F_TSO
++		  | NETIF_F_TSO
++#endif
++			);
++
+ 	tp->ext_header_len = 0;
+ 	if (np->opt)
+ 		tp->ext_header_len = np->opt->opt_flen+np->opt->opt_nflen;
++	tp->ext2_header_len = dst->header_len;
++
+ 	tp->mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
+ 
+ 	sk->dport = usin->sin6_port;
+@@ -717,8 +720,8 @@
+ 
+ late_failure:
+ 	tcp_set_state(sk, TCP_CLOSE); 
+-failure:
+ 	__sk_dst_reset(sk);
++failure:
+ 	sk->dport = 0;
+ 	sk->route_caps = 0;
+ 	return err;
+@@ -781,21 +784,23 @@
+ 			   to handle rthdr case. Ignore this complexity
+ 			   for now.
+ 			 */
++			memset(&fl, 0, sizeof(fl));
+ 			fl.proto = IPPROTO_TCP;
+-			fl.nl_u.ip6_u.daddr = &np->daddr;
+-			fl.nl_u.ip6_u.saddr = &np->saddr;
++			ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
++			ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ 			fl.oif = sk->bound_dev_if;
+-			fl.uli_u.ports.dport = sk->dport;
+-			fl.uli_u.ports.sport = sk->sport;
++			fl.fl_ip_dport = sk->dport;
++			fl.fl_ip_sport = sk->sport;
+ 
+-			dst = ip6_route_output(sk, &fl);
++			if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
++				sk->err_soft = -err;
++				goto out;
++			}
+ 		} else
+ 			dst_hold(dst);
+ 
+-		if (dst->error) {
+-			sk->err_soft = -dst->error;
+-		} else if (tp->pmtu_cookie > dst->pmtu) {
+-			tcp_sync_mss(sk, dst->pmtu);
++		if (tp->pmtu_cookie > dst_pmtu(dst)) {
++			tcp_sync_mss(sk, dst_pmtu(dst));
+ 			tcp_simple_retransmit(sk);
+ 		} /* else let the usual retransmit timer handle it */
+ 		dst_release(dst);
+@@ -865,13 +870,14 @@
+ 	struct flowi fl;
+ 	int err = -1;
+ 
++	memset(&fl, 0, sizeof(fl));
+ 	fl.proto = IPPROTO_TCP;
+-	fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
+-	fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
++	ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
++	ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
+ 	fl.fl6_flowlabel = 0;
+ 	fl.oif = req->af.v6_req.iif;
+-	fl.uli_u.ports.dport = req->rmt_port;
+-	fl.uli_u.ports.sport = sk->sport;
++	fl.fl_ip_dport = req->rmt_port;
++	fl.fl_ip_sport = sk->sport;
+ 
+ 	if (dst == NULL) {
+ 		opt = sk->net_pinfo.af_inet6.opt;
+@@ -886,11 +892,11 @@
+ 
+ 		if (opt && opt->srcrt) {
+ 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+-			fl.nl_u.ip6_u.daddr = rt0->addr;
++			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 		}
+ 
+-		dst = ip6_route_output(sk, &fl);
+-		if (dst->error)
++		err = ip6_dst_lookup(sk, &dst, &fl);
++		if (err)
+ 			goto done;
+ 	}
+ 
+@@ -902,7 +908,7 @@
+ 					 &req->af.v6_req.loc_addr, &req->af.v6_req.rmt_addr,
+ 					 csum_partial((char *)th, skb->len, skb->csum));
+ 
+-		fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
++		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+ 		err = ip6_xmit(sk, skb, &fl, opt);
+ 		if (err == NET_XMIT_CN)
+ 			err = 0;
+@@ -970,7 +976,7 @@
+ 	if (th->rst)
+ 		return;
+ 
+-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
++	if (!ipv6_unicast_destination(skb))
+ 		return; 
+ 
+ 	/*
+@@ -1003,24 +1009,21 @@
+ 
+ 	buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
+ 
+-	fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
+-	fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
+-	fl.fl6_flowlabel = 0;
++	memset(&fl, 0, sizeof(fl));
++	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
++	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+ 
+-	t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
+-				    fl.nl_u.ip6_u.daddr, 
++	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
+ 				    sizeof(*t1), IPPROTO_TCP,
+ 				    buff->csum);
+ 
+ 	fl.proto = IPPROTO_TCP;
+ 	fl.oif = tcp_v6_iif(skb);
+-	fl.uli_u.ports.dport = t1->dest;
+-	fl.uli_u.ports.sport = t1->source;
++	fl.fl_ip_dport = t1->dest;
++	fl.fl_ip_sport = t1->source;
+ 
+ 	/* sk = NULL, but it is safe for now. RST socket required. */
+-	buff->dst = ip6_route_output(NULL, &fl);
+-
+-	if (buff->dst->error == 0) {
++	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
+ 		ip6_xmit(NULL, buff, &fl, NULL);
+ 		TCP_INC_STATS_BH(TcpOutSegs);
+ 		TCP_INC_STATS_BH(TcpOutRsts);
+@@ -1070,23 +1073,20 @@
+ 
+ 	buff->csum = csum_partial((char *)t1, tot_len, 0);
+ 
+-	fl.nl_u.ip6_u.daddr = &skb->nh.ipv6h->saddr;
+-	fl.nl_u.ip6_u.saddr = &skb->nh.ipv6h->daddr;
+-	fl.fl6_flowlabel = 0;
++	memset(&fl, 0, sizeof(fl));
++	ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
++	ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
+ 
+-	t1->check = csum_ipv6_magic(fl.nl_u.ip6_u.saddr,
+-				    fl.nl_u.ip6_u.daddr, 
++	t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
+ 				    tot_len, IPPROTO_TCP,
+ 				    buff->csum);
+ 
+ 	fl.proto = IPPROTO_TCP;
+ 	fl.oif = tcp_v6_iif(skb);
+-	fl.uli_u.ports.dport = t1->dest;
+-	fl.uli_u.ports.sport = t1->source;
++	fl.fl_ip_dport = t1->dest;
++	fl.fl_ip_sport = t1->source;
+ 
+-	buff->dst = ip6_route_output(NULL, &fl);
+-
+-	if (buff->dst->error == 0) {
++	if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
+ 		ip6_xmit(NULL, buff, &fl, NULL);
+ 		TCP_INC_STATS_BH(TcpOutSegs);
+ 		return;
+@@ -1177,8 +1177,7 @@
+ 	if (skb->protocol == htons(ETH_P_IP))
+ 		return tcp_v4_conn_request(sk, skb);
+ 
+-	/* FIXME: do the same check for anycast */
+-	if (ipv6_addr_is_multicast(&skb->nh.ipv6h->daddr))
++	if (!ipv6_unicast_destination(skb))
+ 		goto drop; 
+ 
+ 	/*
+@@ -1248,7 +1247,6 @@
+ 					  struct dst_entry *dst)
+ {
+ 	struct ipv6_pinfo *np;
+-	struct flowi fl;
+ 	struct tcp_opt *newtp;
+ 	struct sock *newsk;
+ 	struct ipv6_txoptions *opt;
+@@ -1310,23 +1308,23 @@
+ 	}
+ 
+ 	if (dst == NULL) {
++		struct flowi fl;
++
++		memset(&fl, 0, sizeof(fl));
+ 		fl.proto = IPPROTO_TCP;
+-		fl.nl_u.ip6_u.daddr = &req->af.v6_req.rmt_addr;
++		ipv6_addr_copy(&fl.fl6_dst, &req->af.v6_req.rmt_addr);
+ 		if (opt && opt->srcrt) {
+ 			struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
+-			fl.nl_u.ip6_u.daddr = rt0->addr;
++			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 		}
+-		fl.nl_u.ip6_u.saddr = &req->af.v6_req.loc_addr;
+-		fl.fl6_flowlabel = 0;
++		ipv6_addr_copy(&fl.fl6_src, &req->af.v6_req.loc_addr);
+ 		fl.oif = sk->bound_dev_if;
+-		fl.uli_u.ports.dport = req->rmt_port;
+-		fl.uli_u.ports.sport = sk->sport;
+-
+-		dst = ip6_route_output(sk, &fl);
+-	}
++		fl.fl_ip_dport = req->rmt_port;
++		fl.fl_ip_sport = sk->sport;
+ 
+-	if (dst->error)
+-		goto out;
++		if (ip6_dst_lookup(sk, &dst, &fl))
++			goto out;
++	} 
+ 
+ 	newsk = tcp_create_openreq_child(sk, req, skb);
+ 	if (newsk == NULL)
+@@ -1339,7 +1337,12 @@
+ 	MOD_INC_USE_COUNT;
+ 
+ 	ip6_dst_store(newsk, dst, NULL);
+-	sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
++	newsk->route_caps = dst->dev->features&
++		~(NETIF_F_IP_CSUM
++#ifdef NETIF_F_TSO
++		  | NETIF_F_TSO
++#endif
++		  );
+ 
+ 	newtp = &(newsk->tp_pinfo.af_tcp);
+ 
+@@ -1387,8 +1390,10 @@
+ 	if (np->opt)
+ 		newtp->ext_header_len = np->opt->opt_nflen + np->opt->opt_flen;
+ 
+-	tcp_sync_mss(newsk, dst->pmtu);
+-	newtp->advmss = dst->advmss;
++	newtp->ext2_header_len = dst->header_len;
++
++	tcp_sync_mss(newsk, dst_pmtu(dst));
++	newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
+ 	tcp_initialize_rcv_mss(newsk);
+ 
+ 	newsk->daddr	= LOOPBACK4_IPV6;
+@@ -1557,8 +1562,9 @@
+ 	return 0;
+ }
+ 
+-int tcp_v6_rcv(struct sk_buff *skb)
++static int tcp_v6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+ {
++	struct sk_buff *skb = *pskb;
+ 	struct tcphdr *th;	
+ 	struct sock *sk;
+ 	int ret;
+@@ -1601,11 +1607,12 @@
+ 		goto no_tcp_socket;
+ 
+ process:
+-	if(!ipsec_sk_policy(sk,skb))
+-		goto discard_and_relse;
+ 	if(sk->state == TCP_TIME_WAIT)
+ 		goto do_time_wait;
+ 
++	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
++		goto discard_and_relse;
++
+ 	if (sk_filter(sk, skb, 0))
+ 		goto discard_and_relse;
+ 		
+@@ -1621,9 +1628,12 @@
+ 	bh_unlock_sock(sk);
+ 
+ 	sock_put(sk);
+-	return ret;
++	return ret ? -1 : 0;
+ 
+ no_tcp_socket:
++	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
++		goto discard_and_relse;
++
+ 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ bad_packet:
+ 		TCP_INC_STATS_BH(TcpInErrs);
+@@ -1645,6 +1655,10 @@
+ 	goto discard_it;
+ 
+ do_time_wait:
++ 	if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
++		sock_put(sk);
++ 		goto discard_it;
++	} 
+ 	if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
+ 		TCP_INC_STATS_BH(TcpInErrs);
+ 		tcp_tw_put((struct tcp_tw_bucket *) sk);	
+@@ -1688,30 +1702,35 @@
+ 	if (dst == NULL) {
+ 		struct flowi fl;
+ 
++		memset(&fl, 0, sizeof(fl));
+ 		fl.proto = IPPROTO_TCP;
+-		fl.nl_u.ip6_u.daddr = &np->daddr;
+-		fl.nl_u.ip6_u.saddr = &np->saddr;
++		ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
++		ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ 		fl.fl6_flowlabel = np->flow_label;
+ 		fl.oif = sk->bound_dev_if;
+-		fl.uli_u.ports.dport = sk->dport;
+-		fl.uli_u.ports.sport = sk->sport;
++		fl.fl_ip_dport = sk->dport;
++		fl.fl_ip_sport = sk->sport;
+ 
+ 		if (np->opt && np->opt->srcrt) {
+ 			struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+-			fl.nl_u.ip6_u.daddr = rt0->addr;
++			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 		}
+ 
+-		dst = ip6_route_output(sk, &fl);
++		err = ip6_dst_lookup(sk, &dst, &fl);
+ 
+-		if (dst->error) {
+-			err = dst->error;
+-			dst_release(dst);
++		if (err) {
+ 			sk->route_caps = 0;
+ 			return err;
+ 		}
+ 
+ 		ip6_dst_store(sk, dst, NULL);
+-		sk->route_caps = dst->dev->features&~NETIF_F_IP_CSUM;
++		sk->route_caps = dst->dev->features&
++			~(NETIF_F_IP_CSUM
++#ifdef NETIF_F_TSO
++			  | NETIF_F_TSO
++#endif
++				);
++		tcp_sk(sk)->ext2_header_len = dst->header_len;
+ 	}
+ 
+ 	return 0;
+@@ -1724,38 +1743,45 @@
+ 	struct flowi fl;
+ 	struct dst_entry *dst;
+ 
++	memset(&fl, 0, sizeof(fl));
+ 	fl.proto = IPPROTO_TCP;
+-	fl.fl6_dst = &np->daddr;
+-	fl.fl6_src = &np->saddr;
++	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
++	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ 	fl.fl6_flowlabel = np->flow_label;
+ 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+ 	fl.oif = sk->bound_dev_if;
+-	fl.uli_u.ports.sport = sk->sport;
+-	fl.uli_u.ports.dport = sk->dport;
++	fl.fl_ip_sport = sk->sport;
++	fl.fl_ip_dport = sk->dport;
+ 
+ 	if (np->opt && np->opt->srcrt) {
+ 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+-		fl.nl_u.ip6_u.daddr = rt0->addr;
++		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 	}
+ 
+ 	dst = __sk_dst_check(sk, np->dst_cookie);
+ 
+ 	if (dst == NULL) {
+-		dst = ip6_route_output(sk, &fl);
++		int err = ip6_dst_lookup(sk, &dst, &fl);
+ 
+-		if (dst->error) {
+-			sk->err_soft = -dst->error;
+-			dst_release(dst);
+-			return -sk->err_soft;
++		if (err) {
++			sk->err_soft = -err;
++			return err;
+ 		}
+ 
+ 		ip6_dst_store(sk, dst, NULL);
++		sk->route_caps = dst->dev->features &
++			~(NETIF_F_IP_CSUM
++#ifdef NETIF_F_TSO
++			  | NETIF_F_TSO
++#endif
++				);
++		tcp_sk(sk)->ext2_header_len = dst->header_len;
+ 	}
+ 
+ 	skb->dst = dst_clone(dst);
+ 
+ 	/* Restore final destination back after routing done */
+-	fl.nl_u.ip6_u.daddr = &np->daddr;
++	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
+ 
+ 	return ip6_xmit(sk, skb, &fl, np->opt);
+ }
+@@ -1865,6 +1891,7 @@
+ static int tcp_v6_destroy_sock(struct sock *sk)
+ {
+ 	struct tcp_opt *tp = &(sk->tp_pinfo.af_tcp);
++	struct inet_opt *inet = inet_sk(sk);
+ 
+ 	tcp_clear_xmit_timers(sk);
+ 
+@@ -1882,8 +1909,8 @@
+ 		tcp_put_port(sk);
+ 
+ 	/* If sendmsg cached page exists, toss it. */
+-	if (tp->sndmsg_page != NULL)
+-		__free_page(tp->sndmsg_page);
++	if (inet->sndmsg_page != NULL)
++		__free_page(inet->sndmsg_page);
+ 
+ 	atomic_dec(&tcp_sockets_allocated);
+ 
+@@ -2143,15 +2170,10 @@
+ 	get_port:	tcp_v6_get_port,
+ };
+ 
+-static struct inet6_protocol tcpv6_protocol =
+-{
+-	tcp_v6_rcv,		/* TCP handler		*/
+-	tcp_v6_err,		/* TCP error control	*/
+-	NULL,			/* next			*/
+-	IPPROTO_TCP,		/* protocol ID		*/
+-	0,			/* copy			*/
+-	NULL,			/* data			*/
+-	"TCPv6"			/* name			*/
++static struct inet6_protocol tcpv6_protocol = {
++	.handler	=	tcp_v6_rcv,
++	.err_handler	=	tcp_v6_err,
++	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+ };
+ 
+ extern struct proto_ops inet6_stream_ops;
+@@ -2169,6 +2191,7 @@
+ void __init tcpv6_init(void)
+ {
+ 	/* register inet6 protocol */
+-	inet6_add_protocol(&tcpv6_protocol);
++	if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
++		printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
+ 	inet6_register_protosw(&tcpv6_protosw);
+ }
+diff -Nru a/net/ipv6/udp.c b/net/ipv6/udp.c
+--- a/net/ipv6/udp.c	2005-02-13 21:25:09 +11:00
++++ b/net/ipv6/udp.c	2005-02-13 21:25:09 +11:00
+@@ -14,6 +14,7 @@
+  *	YOSHIFUJI Hideaki @USAGI and:	Support IPV6_V6ONLY socket option, which
+  *	Alexey Kuznetsov		allow both IPv4 and IPv6 sockets to bind
+  *					a single port at the same time.
++ *      Kazunori MIYAZAWA @USAGI:       change process style to use ip6_append_data
+  *
+  *	This program is free software; you can redistribute it and/or
+  *      modify it under the terms of the GNU General Public License
+@@ -50,6 +51,7 @@
+ #include <net/inet_common.h>
+ 
+ #include <net/checksum.h>
++#include <net/xfrm.h>
+ 
+ struct udp_mib udp_stats_in6[NR_CPUS*2];
+ 
+@@ -226,7 +228,6 @@
+ 	struct sockaddr_in6	*usin = (struct sockaddr_in6 *) uaddr;
+ 	struct ipv6_pinfo      	*np = &sk->net_pinfo.af_inet6;
+ 	struct in6_addr		*daddr;
+-	struct in6_addr		saddr;
+ 	struct dst_entry	*dst;
+ 	struct flowi		fl;
+ 	struct ip6_flowlabel	*flowlabel = NULL;
+@@ -246,7 +247,7 @@
+ 	if (usin->sin6_family != AF_INET6) 
+ 	  	return -EAFNOSUPPORT;
+ 
+-	fl.fl6_flowlabel = 0;
++	memset(&fl, 0, sizeof(fl));
+ 	if (np->sndflow) {
+ 		fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+ 		if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+@@ -271,9 +272,10 @@
+ 	if (addr_type == IPV6_ADDR_MAPPED) {
+ 		struct sockaddr_in sin;
+ 
+-		if (__ipv6_only_sock(sk))
+-			return -ENETUNREACH;
+-
++		if (__ipv6_only_sock(sk)) {
++			err = -ENETUNREACH;
++			goto out;
++		}
+ 		sin.sin_family = AF_INET;
+ 		sin.sin_addr.s_addr = daddr->s6_addr32[3];
+ 		sin.sin_port = usin->sin6_port;
+@@ -281,8 +283,8 @@
+ 		err = udp_connect(sk, (struct sockaddr*) &sin, sizeof(sin));
+ 
+ ipv4_connected:
+-		if (err < 0)
+-			return err;
++		if (err)
++			goto out;
+ 		
+ 		ipv6_addr_set(&np->daddr, 0, 0, 
+ 			      htonl(0x0000ffff),
+@@ -299,15 +301,15 @@
+ 				      htonl(0x0000ffff),
+ 				      sk->rcv_saddr);
+ 		}
+-		return 0;
++		goto out;
+ 	}
+ 
+ 	if (addr_type&IPV6_ADDR_LINKLOCAL) {
+ 		if (addr_len >= sizeof(struct sockaddr_in6) &&
+ 		    usin->sin6_scope_id) {
+ 			if (sk->bound_dev_if && sk->bound_dev_if != usin->sin6_scope_id) {
+-				fl6_sock_release(flowlabel);
+-				return -EINVAL;
++				err = -EINVAL;
++				goto out;
+ 			}
+ 			sk->bound_dev_if = usin->sin6_scope_id;
+ 			if (!sk->bound_dev_if && (addr_type&IPV6_ADDR_MULTICAST))
+@@ -315,8 +317,10 @@
+ 		}
+ 
+ 		/* Connect to link-local address requires an interface */
+-		if (sk->bound_dev_if == 0)
+-			return -EINVAL;
++		if (sk->bound_dev_if == 0) {
++			err = -EINVAL;
++			goto out;
++		}
+ 	}
+ 
+ 	ipv6_addr_copy(&np->daddr, daddr);
+@@ -330,11 +334,11 @@
+ 	 */
+ 
+ 	fl.proto = IPPROTO_UDP;
+-	fl.fl6_dst = &np->daddr;
+-	fl.fl6_src = &saddr;
++	ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
++	ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+ 	fl.oif = sk->bound_dev_if;
+-	fl.uli_u.ports.dport = sk->dport;
+-	fl.uli_u.ports.sport = sk->sport;
++	fl.fl_ip_dport = sk->dport;
++	fl.fl_ip_sport = sk->sport;
+ 
+ 	if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST))
+ 		fl.oif = np->mcast_oif;
+@@ -342,37 +346,33 @@
+ 	if (flowlabel) {
+ 		if (flowlabel->opt && flowlabel->opt->srcrt) {
+ 			struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt;
+-			fl.fl6_dst = rt0->addr;
++			ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 		}
+ 	} else if (np->opt && np->opt->srcrt) {
+ 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+-		fl.fl6_dst = rt0->addr;
++		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 	}
+ 
+-	dst = ip6_route_output(sk, &fl);
+-
+-	if ((err = dst->error) != 0) {
+-		dst_release(dst);
+-		fl6_sock_release(flowlabel);
+-		return err;
+-	}
++	err = ip6_dst_lookup(sk, &dst, &fl);
++	if (err)
++		goto out;
+ 
+-	ip6_dst_store(sk, dst, fl.fl6_dst);
++	/* source address lookup done in ip6_dst_lookup */
+ 
+-	/* get the source adddress used in the apropriate device */
++	if (ipv6_addr_any(&np->saddr))
++		ipv6_addr_copy(&np->saddr, &fl.fl6_src);
+ 
+-	err = ipv6_get_saddr(dst, daddr, &saddr);
++	if (ipv6_addr_any(&np->rcv_saddr)) {
++		ipv6_addr_copy(&np->rcv_saddr, &fl.fl6_src);
++		sk->rcv_saddr = LOOPBACK4_IPV6;
++	}
+ 
+-	if (err == 0) {
+-		if(ipv6_addr_any(&np->saddr))
+-			ipv6_addr_copy(&np->saddr, &saddr);
++	ip6_dst_store(sk, dst,
++		      !ipv6_addr_cmp(&fl.fl6_dst, &np->daddr) ?
++		      &np->daddr : NULL);
+ 
+-		if(ipv6_addr_any(&np->rcv_saddr)) {
+-			ipv6_addr_copy(&np->rcv_saddr, &saddr);
+-			sk->rcv_saddr = LOOPBACK4_IPV6;
+-		}
+-		sk->state = TCP_ESTABLISHED;
+-	}
++	sk->state = TCP_ESTABLISHED;
++out:
+ 	fl6_sock_release(flowlabel);
+ 
+ 	return err;
+@@ -524,6 +524,11 @@
+ 
+ static inline int udpv6_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
+ {
++	if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
++		kfree_skb(skb);
++		return -1;
++	}
++
+ 	if (skb->ip_summed != CHECKSUM_UNNECESSARY) {
+ 		if ((unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum))) {
+ 			UDP6_INC_STATS_BH(UdpInErrors);
+@@ -610,8 +615,9 @@
+ 	read_unlock(&udp_hash_lock);
+ }
+ 
+-int udpv6_rcv(struct sk_buff *skb)
++static int udpv6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
+ {
++	struct sk_buff *skb = *pskb;
+ 	struct sock *sk;
+   	struct udphdr *uh;
+ 	struct net_device *dev = skb->dev;
+@@ -678,6 +684,9 @@
+ 	sk = udp_v6_lookup(saddr, uh->source, daddr, uh->dest, dev->ifindex);
+ 
+ 	if (sk == NULL) {
++		if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
++			goto discard;
++
+ 		if (skb->ip_summed != CHECKSUM_UNNECESSARY &&
+ 		    (unsigned short)csum_fold(skb_checksum(skb, 0, skb->len, skb->csum)))
+ 			goto discard;
+@@ -704,103 +713,126 @@
+ 	kfree_skb(skb);
+ 	return(0);	
+ }
+-
+ /*
+- *	Sending
++ * Throw away all pending data and cancel the corking. Socket is locked.
+  */
+-
+-struct udpv6fakehdr 
++static void udp_v6_flush_pending_frames(struct sock *sk)
+ {
+-	struct udphdr	uh;
+-	struct iovec	*iov;
+-	__u32		wcheck;
+-	__u32		pl_len;
+-	struct in6_addr *daddr;
+-};
++	struct udp_opt *up = udp_sk(sk);
++
++	if (up->pending) {
++		up->len = 0;
++		up->pending = 0;
++		ip6_flush_pending_frames(sk);
++        }
++}
+ 
+ /*
+- *	with checksum
++ *	Sending
+  */
+ 
+-static int udpv6_getfrag(const void *data, struct in6_addr *addr,
+-			 char *buff, unsigned int offset, unsigned int len)
++static int udp_v6_push_pending_frames(struct sock *sk, struct udp_opt *up)
+ {
+-	struct udpv6fakehdr *udh = (struct udpv6fakehdr *) data;
+-	char *dst;
+-	int final = 0;
+-	int clen = len;
++	struct sk_buff *skb;
++	struct udphdr *uh;
++	struct ipv6_pinfo *np = inet6_sk(sk);
++	struct flowi *fl = &np->cork.fl;
++	int err = 0;
+ 
+-	dst = buff;
++	/* Grab the skbuff where UDP header space exists. */
++	if ((skb = skb_peek(&sk->write_queue)) == NULL)
++		goto out;
+ 
+-	if (offset) {
+-		offset -= sizeof(struct udphdr);
++	/*
++	 * Create a UDP header
++	 */
++	uh = skb->h.uh;
++	uh->source = fl->fl_ip_sport;
++	uh->dest = fl->fl_ip_dport;
++	uh->len = htons(up->len);
++	uh->check = 0;
++
++	if (sk->no_check == UDP_CSUM_NOXMIT) {
++		skb->ip_summed = CHECKSUM_NONE;
++		goto send;
++	}
++
++	if (skb_queue_len(&sk->write_queue) == 1) {
++		skb->csum = csum_partial((char *)uh,
++				sizeof(struct udphdr), skb->csum);
++		uh->check = csum_ipv6_magic(&fl->fl6_src,
++					    &fl->fl6_dst,
++					    up->len, fl->proto, skb->csum);
+ 	} else {
+-		dst += sizeof(struct udphdr);
+-		final = 1;
+-		clen -= sizeof(struct udphdr);
+-	}
++		u32 tmp_csum = 0;
+ 
+-	if (csum_partial_copy_fromiovecend(dst, udh->iov, offset,
+-					   clen, &udh->wcheck))
+-		return -EFAULT;
+-
+-	if (final) {
+-		struct in6_addr *daddr;
+-		
+-		udh->wcheck = csum_partial((char *)udh, sizeof(struct udphdr),
+-					   udh->wcheck);
+-
+-		if (udh->daddr) {
+-			daddr = udh->daddr;
+-		} else {
+-			/*
+-			 *	use packet destination address
+-			 *	this should improve cache locality
+-			 */
+-			daddr = addr + 1;
+-		}
+-		udh->uh.check = csum_ipv6_magic(addr, daddr,
+-						udh->pl_len, IPPROTO_UDP,
+-						udh->wcheck);
+-		if (udh->uh.check == 0)
+-			udh->uh.check = -1;
++		skb_queue_walk(&sk->write_queue, skb) {
++			tmp_csum = csum_add(tmp_csum, skb->csum);
++		}
++		tmp_csum = csum_partial((char *)uh,
++				sizeof(struct udphdr), tmp_csum);
++                tmp_csum = csum_ipv6_magic(&fl->fl6_src,
++					   &fl->fl6_dst,
++					   up->len, fl->proto, tmp_csum);
++                uh->check = tmp_csum;
+ 
+-		memcpy(buff, udh, sizeof(struct udphdr));
+ 	}
+-	return 0;
++	if (uh->check == 0)
++		uh->check = -1;
++
++send:
++	err = ip6_push_pending_frames(sk);
++out:
++	up->len = 0;
++	up->pending = 0;
++	return err;
+ }
+ 
+-static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int ulen)
++static int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, int len)
+ {
+ 	struct ipv6_txoptions opt_space;
+-	struct udpv6fakehdr udh;
++	struct udp_opt *up = udp_sk(sk);
+ 	struct ipv6_pinfo *np = &sk->net_pinfo.af_inet6;
+ 	struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
++	struct in6_addr *daddr;
+ 	struct ipv6_txoptions *opt = NULL;
+ 	struct ip6_flowlabel *flowlabel = NULL;
+-	struct flowi fl;
++	struct flowi *fl = &np->cork.fl;
++	struct dst_entry *dst;
+ 	int addr_len = msg->msg_namelen;
+-	struct in6_addr *daddr;
+-	int len = ulen + sizeof(struct udphdr);
++	int ulen = len;
+ 	int addr_type;
+ 	int hlimit = -1;
+-	
++	int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
+ 	int err;
+ 	
+ 	/* Rough check on arithmetic overflow,
+ 	   better check is made in ip6_build_xmit
+ 	   */
+-	if (ulen < 0 || ulen > INT_MAX - sizeof(struct udphdr))
++	if (len < 0 || len > INT_MAX - sizeof(struct udphdr))
+ 		return -EMSGSIZE;
+ 	
+-	fl.fl6_flowlabel = 0;
+-	fl.oif = 0;
++	if (up->pending) {
++		/*
++		 * There are pending frames.
++		 * The socket lock must be held while it's corked.
++		 */
++		lock_sock(sk);
++		if (likely(up->pending)) {
++			dst = NULL;
++			goto do_append_data;
++		}
++		release_sock(sk);
++	}
++	ulen += sizeof(struct udphdr);
++
++	memset(fl, 0, sizeof(fl));
+ 
+ 	if (sin6) {
+ 		if (sin6->sin6_family == AF_INET) {
+ 			if (__ipv6_only_sock(sk))
+ 				return -ENETUNREACH;
+-			return udp_sendmsg(sk, msg, ulen);
++			return udp_sendmsg(sk, msg, len);
+ 		}
+ 
+ 		if (addr_len < SIN6_LEN_RFC2133)
+@@ -812,13 +844,13 @@
+ 		if (sin6->sin6_port == 0)
+ 			return -EINVAL;
+ 
+-		udh.uh.dest = sin6->sin6_port;
++		up->dport = sin6->sin6_port;
+ 		daddr = &sin6->sin6_addr;
+ 
+ 		if (np->sndflow) {
+-			fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+-			if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+-				flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
++			fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
++			if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
++				flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+ 				if (flowlabel == NULL)
+ 					return -EINVAL;
+ 				daddr = &flowlabel->dst;
+@@ -833,14 +865,14 @@
+ 		if (addr_len >= sizeof(struct sockaddr_in6) &&
+ 		    sin6->sin6_scope_id &&
+ 		    ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
+-			fl.oif = sin6->sin6_scope_id;
++			fl->oif = sin6->sin6_scope_id;
+ 	} else {
+ 		if (sk->state != TCP_ESTABLISHED)
+ 			return -EDESTADDRREQ;
+ 
+-		udh.uh.dest = sk->dport;
++		up->dport = sk->dport;
+ 		daddr = &sk->net_pinfo.af_inet6.daddr;
+-		fl.fl6_flowlabel = np->flow_label;
++		fl->fl6_flowlabel = np->flow_label;
+ 	}
+ 
+ 	addr_type = ipv6_addr_type(daddr);
+@@ -853,30 +885,28 @@
+ 
+ 		sin.sin_family = AF_INET;
+ 		sin.sin_addr.s_addr = daddr->s6_addr32[3];
+-		sin.sin_port = udh.uh.dest;
++		sin.sin_port = up->dport;
+ 		msg->msg_name = (struct sockaddr *)(&sin);
+ 		msg->msg_namelen = sizeof(sin);
+ 		fl6_sock_release(flowlabel);
+ 
+-		return udp_sendmsg(sk, msg, ulen);
++		return udp_sendmsg(sk, msg, len);
+ 	}
+ 
+-	udh.daddr = NULL;
+-	if (!fl.oif)
+-		fl.oif = sk->bound_dev_if;
+-	fl.fl6_src = NULL;
++	if (!fl->oif)
++		fl->oif = sk->bound_dev_if;
+ 
+ 	if (msg->msg_controllen) {
+ 		opt = &opt_space;
+ 		memset(opt, 0, sizeof(struct ipv6_txoptions));
+ 
+-		err = datagram_send_ctl(msg, &fl, opt, &hlimit);
++		err = datagram_send_ctl(msg, fl, opt, &hlimit);
+ 		if (err < 0) {
+ 			fl6_sock_release(flowlabel);
+ 			return err;
+ 		}
+-		if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+-			flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
++		if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
++			flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+ 			if (flowlabel == NULL)
+ 				return -EINVAL;
+ 		}
+@@ -887,44 +917,188 @@
+ 		opt = np->opt;
+ 	if (flowlabel)
+ 		opt = fl6_merge_options(&opt_space, flowlabel, opt);
+-	if (opt && opt->srcrt)
+-		udh.daddr = daddr;
+ 
+-	udh.uh.source = sk->sport;
+-	udh.uh.len = len < 0x10000 ? htons(len) : 0;
+-	udh.uh.check = 0;
+-	udh.iov = msg->msg_iov;
+-	udh.wcheck = 0;
+-	udh.pl_len = len;
++	fl->proto = IPPROTO_UDP;
++	ipv6_addr_copy(&fl->fl6_dst, daddr);
++	if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
++		ipv6_addr_copy(&fl->fl6_src, &np->saddr);
++	fl->fl_ip_dport = up->dport;
++	fl->fl_ip_sport = sk->sport;
++	
++	/* merge ip6_build_xmit from ip6_output */
++	if (opt && opt->srcrt) {
++		struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
++		ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
++	}
+ 
+-	fl.proto = IPPROTO_UDP;
+-	fl.fl6_dst = daddr;
+-	if (fl.fl6_src == NULL && !ipv6_addr_any(&np->saddr))
+-		fl.fl6_src = &np->saddr;
+-	fl.uli_u.ports.dport = udh.uh.dest;
+-	fl.uli_u.ports.sport = udh.uh.source;
++	if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst))
++		fl->oif = np->mcast_oif;
++
++	err = ip6_dst_lookup(sk, &dst, fl);
++	if (err)
++		goto out;
+ 
+-	err = ip6_build_xmit(sk, udpv6_getfrag, &udh, &fl, len, opt, hlimit,
+-			     msg->msg_flags);
++	if (hlimit < 0) {
++		if (ipv6_addr_is_multicast(&fl->fl6_dst))
++			hlimit = np->mcast_hops;
++		else
++			hlimit = np->hop_limit;
++		if (hlimit < 0)
++			hlimit = dst_metric(dst, RTAX_HOPLIMIT);
++	}
++
++	if (msg->msg_flags&MSG_CONFIRM)
++		goto do_confirm;
++back_from_confirm:
++
++	lock_sock(sk);
++	if (unlikely(up->pending)) {
++		/* The socket is already corked while preparing it. */
++		/* ... which is an evident application bug. --ANK */
++		release_sock(sk);
+ 
++		NETDEBUG(if (net_ratelimit()) printk(KERN_DEBUG "udp cork app bug 2\n"));
++		err = -EINVAL;
++		goto out;
++	}
++
++	up->pending = 1;
++
++do_append_data:
++	up->len += ulen;
++	err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen, sizeof(struct udphdr),
++			      hlimit, opt, fl, (struct rt6_info*)dst,
++			      corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
++	if (err)
++		udp_v6_flush_pending_frames(sk);
++	else if (!corkreq)
++		err = udp_v6_push_pending_frames(sk, up);
++
++	if (dst)
++		ip6_dst_store(sk, dst,
++			      !ipv6_addr_cmp(&fl->fl6_dst, &np->daddr) ?
++			      &np->daddr : NULL);
++	if (err > 0)
++		err = np->recverr ? net_xmit_errno(err) : 0;
++	release_sock(sk);
++out:
+ 	fl6_sock_release(flowlabel);
++	if (!err) {
++		UDP6_INC_STATS_USER(UdpOutDatagrams);
++		return len;
++	}
++	return err;
++
++do_confirm:
++	dst_confirm(dst);
++	if (!(msg->msg_flags&MSG_PROBE) || len)
++		goto back_from_confirm;
++	err = 0;
++	goto out;
++}
++
++static int udpv6_destroy_sock(struct sock *sk)
++{
++	lock_sock(sk);
++	udp_v6_flush_pending_frames(sk);
++	release_sock(sk);
+ 
+-	if (err < 0)
+-		return err;
++	inet6_destroy_sock(sk);
+ 
+-	UDP6_INC_STATS_USER(UdpOutDatagrams);
+-	return ulen;
++	return 0;
+ }
+ 
+-static struct inet6_protocol udpv6_protocol = 
++/*
++ *	Socket option code for UDP
++ */
++static int udpv6_setsockopt(struct sock *sk, int level, int optname, 
++			  char *optval, int optlen)
+ {
+-	udpv6_rcv,		/* UDP handler		*/
+-	udpv6_err,		/* UDP error control	*/
+-	NULL,			/* next			*/
+-	IPPROTO_UDP,		/* protocol ID		*/
+-	0,			/* copy			*/
+-	NULL,			/* data			*/
+-	"UDPv6"			/* name			*/
++	struct udp_opt *up = udp_sk(sk);
++	int val;
++	int err = 0;
++
++	if (level != SOL_UDP)
++		return ipv6_setsockopt(sk, level, optname, optval, optlen);
++
++	if(optlen<sizeof(int))
++		return -EINVAL;
++
++	if (get_user(val, (int *)optval))
++		return -EFAULT;
++
++	switch(optname) {
++	case UDP_CORK:
++		if (val != 0) {
++			up->corkflag = 1;
++		} else {
++			up->corkflag = 0;
++			lock_sock(sk);
++			udp_v6_push_pending_frames(sk, up);
++			release_sock(sk);
++		}
++		break;
++		
++	case UDP_ENCAP:
++		switch (val) {
++		case 0:
++			up->encap_type = val;
++			break;
++		default:
++			err = -ENOPROTOOPT;
++			break;
++		}
++		break;
++
++	default:
++		err = -ENOPROTOOPT;
++		break;
++	};
++
++	return err;
++}
++
++static int udpv6_getsockopt(struct sock *sk, int level, int optname, 
++			  char *optval, int *optlen)
++{
++	struct udp_opt *up = udp_sk(sk);
++	int val, len;
++
++	if (level != SOL_UDP)
++		return ipv6_getsockopt(sk, level, optname, optval, optlen);
++
++	if(get_user(len,optlen))
++		return -EFAULT;
++
++	len = min_t(unsigned int, len, sizeof(int));
++	
++	if(len < 0)
++		return -EINVAL;
++
++	switch(optname) {
++	case UDP_CORK:
++		val = up->corkflag;
++		break;
++
++	case UDP_ENCAP:
++		val = up->encap_type;
++		break;
++
++	default:
++		return -ENOPROTOOPT;
++	};
++
++  	if(put_user(len, optlen))
++  		return -EFAULT;
++	if(copy_to_user(optval, &val,len))
++		return -EFAULT;
++  	return 0;
++}
++
++static struct inet6_protocol udpv6_protocol = {
++	.handler	=	udpv6_rcv,
++	.err_handler	=	udpv6_err,
++	.flags		=	INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+ };
+ 
+ #define LINE_LEN 190
+@@ -1001,20 +1175,20 @@
+ }
+ 
+ struct proto udpv6_prot = {
+-	name:		"UDP",
+-	close:		udpv6_close,
+-	connect:	udpv6_connect,
+-	disconnect:	udp_disconnect,
+-	ioctl:		udp_ioctl,
+-	destroy:	inet6_destroy_sock,
+-	setsockopt:	ipv6_setsockopt,
+-	getsockopt:	ipv6_getsockopt,
+-	sendmsg:	udpv6_sendmsg,
+-	recvmsg:	udpv6_recvmsg,
+-	backlog_rcv:	udpv6_queue_rcv_skb,
+-	hash:		udp_v6_hash,
+-	unhash:		udp_v6_unhash,
+-	get_port:	udp_v6_get_port,
++	.name		=	"UDP",
++	.close		=	udpv6_close,
++	.connect	=	udpv6_connect,
++	.disconnect	=	udp_disconnect,
++	.ioctl		=	udp_ioctl,
++	.destroy	=	udpv6_destroy_sock,
++	.setsockopt	=	udpv6_setsockopt,
++	.getsockopt	=	udpv6_getsockopt,
++	.sendmsg	=	udpv6_sendmsg,
++	.recvmsg	=	udpv6_recvmsg,
++	.backlog_rcv	=	udpv6_queue_rcv_skb,
++	.hash		=	udp_v6_hash,
++	.unhash		=	udp_v6_unhash,
++	.get_port	=	udp_v6_get_port,
+ };
+ 
+ extern struct proto_ops inet6_dgram_ops;
+@@ -1032,6 +1206,7 @@
+ 
+ void __init udpv6_init(void)
+ {
+-	inet6_add_protocol(&udpv6_protocol);
++	if (inet6_add_protocol(&udpv6_protocol, IPPROTO_UDP) < 0)
++		printk(KERN_ERR "udpv6_init: Could not register protocol\n");
+ 	inet6_register_protosw(&udpv6_protosw);
+ }
+diff -Nru a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/xfrm6_input.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,148 @@
++/*
++ * xfrm6_input.c: based on net/ipv4/xfrm4_input.c
++ *
++ * Authors:
++ *	Mitsuru KANDA @USAGI
++ * 	Kazunori MIYAZAWA @USAGI
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ *	YOSHIFUJI Hideaki @USAGI
++ *		IPv6 support
++ */
++
++#include <linux/module.h>
++#include <linux/string.h>
++#include <net/inet_ecn.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++#include <net/xfrm.h>
++
++static inline void ipip6_ecn_decapsulate(struct sk_buff *skb)
++{
++	struct ipv6hdr *outer_iph = skb->nh.ipv6h;
++	struct ipv6hdr *inner_iph = skb->h.ipv6h;
++
++	if (INET_ECN_is_ce(ip6_get_dsfield(outer_iph)) &&
++	    INET_ECN_is_not_ce(ip6_get_dsfield(inner_iph)))
++		IP6_ECN_set_ce(inner_iph);
++}
++
++int xfrm6_rcv_spi(struct sk_buff **pskb, unsigned int *nhoffp, u32 spi)
++{
++	struct sk_buff *skb = *pskb;
++	int err;
++	u32 seq;
++	struct sec_decap_state xfrm_vec[XFRM_MAX_DEPTH];
++	struct xfrm_state *x;
++	int xfrm_nr = 0;
++	int decaps = 0;
++	int nexthdr;
++	unsigned int nhoff;
++
++	nhoff = *nhoffp;
++	nexthdr = skb->nh.raw[nhoff];
++
++	seq = 0;
++	if (!spi && (err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) != 0)
++		goto drop;
++	
++	do {
++		struct ipv6hdr *iph = skb->nh.ipv6h;
++
++		if (xfrm_nr == XFRM_MAX_DEPTH)
++			goto drop;
++
++		x = xfrm_state_lookup((xfrm_address_t *)&iph->daddr, spi, nexthdr, AF_INET6);
++		if (x == NULL)
++			goto drop;
++		spin_lock(&x->lock);
++		if (unlikely(x->km.state != XFRM_STATE_VALID))
++			goto drop_unlock;
++
++		if (x->props.replay_window && xfrm_replay_check(x, seq))
++			goto drop_unlock;
++
++		if (xfrm_state_check_expire(x))
++			goto drop_unlock;
++
++		nexthdr = x->type->input(x, &(xfrm_vec[xfrm_nr].decap), skb);
++		if (nexthdr <= 0)
++			goto drop_unlock;
++
++		skb->nh.raw[nhoff] = nexthdr;
++
++		if (x->props.replay_window)
++			xfrm_replay_advance(x, seq);
++
++		x->curlft.bytes += skb->len;
++		x->curlft.packets++;
++
++		spin_unlock(&x->lock);
++
++		xfrm_vec[xfrm_nr++].xvec = x;
++
++		if (x->props.mode) { /* XXX */
++			if (nexthdr != IPPROTO_IPV6)
++				goto drop;
++			if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
++				goto drop;
++			if (skb_cloned(skb) &&
++			    pskb_expand_head(skb, 0, 0, GFP_ATOMIC))
++				goto drop;
++			if (!(x->props.flags & XFRM_STATE_NOECN))
++				ipip6_ecn_decapsulate(skb);
++			skb->mac.raw = memmove(skb->data - skb->mac_len,
++					       skb->mac.raw, skb->mac_len);
++			skb->nh.raw = skb->data;
++			decaps = 1;
++			break;
++		}
++
++		if ((err = xfrm_parse_spi(skb, nexthdr, &spi, &seq)) < 0)
++			goto drop;
++	} while (!err);
++
++	/* Allocate new secpath or COW existing one. */
++	if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) {
++		struct sec_path *sp;
++		sp = secpath_dup(skb->sp);
++		if (!sp)
++			goto drop;
++		if (skb->sp)
++			secpath_put(skb->sp);
++		skb->sp = sp;
++	}
++
++	if (xfrm_nr + skb->sp->len > XFRM_MAX_DEPTH)
++		goto drop;
++
++	memcpy(skb->sp->x+skb->sp->len, xfrm_vec, xfrm_nr*sizeof(struct sec_decap_state));
++	skb->sp->len += xfrm_nr;
++	skb->ip_summed = CHECKSUM_NONE;
++
++	if (decaps) {
++		if (!(skb->dev->flags&IFF_LOOPBACK)) {
++			dst_release(skb->dst);
++			skb->dst = NULL;
++		}
++		netif_rx(skb);
++		return -1;
++	} else {
++		return 1;
++	}
++
++drop_unlock:
++	spin_unlock(&x->lock);
++	xfrm_state_put(x);
++drop:
++	while (--xfrm_nr >= 0)
++		xfrm_state_put(xfrm_vec[xfrm_nr].xvec);
++	kfree_skb(skb);
++	return -1;
++}
++
++EXPORT_SYMBOL(xfrm6_rcv_spi);
++
++int xfrm6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
++{
++	return xfrm6_rcv_spi(pskb, nhoffp, 0);
++}
+diff -Nru a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/xfrm6_output.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,137 @@
++/*
++ * xfrm6_output.c - Common IPsec encapsulation code for IPv6.
++ * Copyright (C) 2002 USAGI/WIDE Project
++ * Copyright (c) 2004 Herbert Xu <herbert at gondor.apana.org.au>
++ * 
++ * This program is free software; you can redistribute it and/or
++ * modify it under the terms of the GNU General Public License
++ * as published by the Free Software Foundation; either version
++ * 2 of the License, or (at your option) any later version.
++ */
++
++#include <linux/skbuff.h>
++#include <linux/spinlock.h>
++#include <linux/icmpv6.h>
++#include <net/inet_ecn.h>
++#include <net/ipv6.h>
++#include <net/xfrm.h>
++
++/* Add encapsulation header.
++ *
++ * In transport mode, the IP header and mutable extension headers will be moved
++ * forward to make space for the encapsulation header.
++ *
++ * In tunnel mode, the top IP header will be constructed per RFC 2401.
++ * The following fields in it shall be filled in by x->type->output:
++ *	payload_len
++ *
++ * On exit, skb->h will be set to the start of the encapsulation header to be
++ * filled in by x->type->output and skb->nh will be set to the nextheader field
++ * of the extension header directly preceding the encapsulation header, or in
++ * its absence, that of the top IP header.  The value of skb->data will always
++ * point to the top IP header.
++ */
++static void xfrm6_encap(struct sk_buff *skb)
++{
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x = dst->xfrm;
++	struct ipv6hdr *iph, *top_iph;
++
++	skb_push(skb, x->props.header_len);
++	iph = skb->nh.ipv6h;
++
++	if (!x->props.mode) {
++		u8 *prevhdr;
++		int hdr_len;
++
++		hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
++		skb->nh.raw = prevhdr - x->props.header_len;
++		skb->h.raw = skb->data + hdr_len;
++		memmove(skb->data, iph, hdr_len);
++		return;
++	}
++
++	skb->nh.raw = skb->data;
++	top_iph = skb->nh.ipv6h;
++	skb->nh.raw = &top_iph->nexthdr;
++	skb->h.ipv6h = top_iph + 1;
++
++	top_iph->version = 6;
++	top_iph->priority = iph->priority;
++	if (x->props.flags & XFRM_STATE_NOECN)
++		IP6_ECN_clear(top_iph);
++	top_iph->flow_lbl[0] = iph->flow_lbl[0];
++	top_iph->flow_lbl[1] = iph->flow_lbl[1];
++	top_iph->flow_lbl[2] = iph->flow_lbl[2];
++	top_iph->nexthdr = IPPROTO_IPV6; 
++	top_iph->hop_limit = dst_path_metric(dst, RTAX_HOPLIMIT);
++	ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
++	ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
++}
++
++static int xfrm6_tunnel_check_size(struct sk_buff *skb)
++{
++	int mtu, ret = 0;
++	struct dst_entry *dst = skb->dst;
++
++	mtu = dst_pmtu(dst) - dst->header_len - dst->trailer_len;
++	if (mtu < IPV6_MIN_MTU)
++		mtu = IPV6_MIN_MTU;
++
++	if (skb->len > mtu) {
++		icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
++		ret = -EMSGSIZE;
++	}
++
++	return ret;
++}
++
++int xfrm6_output(struct sk_buff *skb)
++{
++	struct dst_entry *dst = skb->dst;
++	struct xfrm_state *x = dst->xfrm;
++	int err;
++	
++	if (skb->ip_summed == CHECKSUM_HW && skb_checksum_help(skb) == NULL) {
++		err = -EINVAL;
++		goto error_nolock;
++	}
++
++	spin_lock_bh(&x->lock);
++	err = xfrm_state_check(x, skb);
++	if (err)
++		goto error;
++
++	if (x->props.mode) {
++		err = xfrm6_tunnel_check_size(skb);
++		if (err)
++			goto error;
++	}
++
++	xfrm6_encap(skb);
++
++	err = x->type->output(skb);
++	if (err)
++		goto error;
++
++	x->curlft.bytes += skb->len;
++	x->curlft.packets++;
++
++	spin_unlock_bh(&x->lock);
++
++	skb->nh.raw = skb->data;
++	
++	if (!(skb->dst = dst_pop(dst))) {
++		err = -EHOSTUNREACH;
++		goto error_nolock;
++	}
++	err = NET_XMIT_BYPASS;
++
++out_exit:
++	return err;
++error:
++	spin_unlock_bh(&x->lock);
++error_nolock:
++	kfree_skb(skb);
++	goto out_exit;
++}
+diff -Nru a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/xfrm6_policy.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,307 @@
++/*
++ * xfrm6_policy.c: based on xfrm4_policy.c
++ *
++ * Authors:
++ *	Mitsuru KANDA @USAGI
++ * 	Kazunori MIYAZAWA @USAGI
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ * 		IPv6 support
++ * 	YOSHIFUJI Hideaki
++ * 		Split up af-specific portion
++ * 
++ */
++
++#include <linux/config.h>
++#include <net/xfrm.h>
++#include <net/ip.h>
++#include <net/ipv6.h>
++#include <net/ip6_route.h>
++
++static struct dst_ops xfrm6_dst_ops;
++static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
++
++static struct xfrm_type_map xfrm6_type_map = { .lock = RW_LOCK_UNLOCKED };
++
++static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl)
++{
++	int err = 0;
++	*dst = (struct xfrm_dst*)ip6_route_output(NULL, fl);
++	if (!*dst)
++		err = -ENETUNREACH;
++	return err;
++}
++
++/* Check that the bundle accepts the flow and its components are
++ * still valid.
++ */
++
++static int __xfrm6_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl)
++{
++	do {
++		if (xdst->u.dst.ops != &xfrm6_dst_ops)
++			return 1;
++
++		if (!xfrm_selector_match(&xdst->u.dst.xfrm->sel, fl, AF_INET6))
++			return 0;
++		if (xdst->u.dst.xfrm->km.state != XFRM_STATE_VALID ||
++		    xdst->u.dst.path->obsolete > 0)
++			return 0;
++		xdst = (struct xfrm_dst*)xdst->u.dst.child;
++	} while (xdst);
++	return 0;
++}
++
++static struct dst_entry *
++__xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy)
++{
++	struct dst_entry *dst;
++	u32 ndisc_bit = 0;
++
++	if (fl->proto == IPPROTO_ICMPV6 &&
++	    (fl->fl_icmp_type == NDISC_NEIGHBOUR_ADVERTISEMENT ||
++	     fl->fl_icmp_type == NDISC_NEIGHBOUR_SOLICITATION  ||
++	     fl->fl_icmp_type == NDISC_ROUTER_SOLICITATION))
++		ndisc_bit = RTF_NDISC;
++
++	/* Still not clear if we should set fl->fl6_{src,dst}... */
++	read_lock_bh(&policy->lock);
++	for (dst = policy->bundles; dst; dst = dst->next) {
++		struct xfrm_dst *xdst = (struct xfrm_dst*)dst;
++		struct in6_addr fl_dst_prefix, fl_src_prefix;
++
++		if ((xdst->u.rt6.rt6i_flags & RTF_NDISC) != ndisc_bit)
++			continue;
++
++		ipv6_addr_prefix(&fl_dst_prefix,
++				 &fl->fl6_dst,
++				 xdst->u.rt6.rt6i_dst.plen);
++		ipv6_addr_prefix(&fl_src_prefix,
++				 &fl->fl6_src,
++				 xdst->u.rt6.rt6i_src.plen);
++		if (!ipv6_addr_cmp(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) &&
++		    !ipv6_addr_cmp(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) &&
++		    __xfrm6_bundle_ok(xdst, fl)) {
++			dst_clone(dst);
++			break;
++		}
++	}
++	read_unlock_bh(&policy->lock);
++	return dst;
++}
++
++/* Allocate chain of dst_entry's, attach known xfrm's, calculate
++ * all the metrics... Shortly, bundle a bundle.
++ */
++
++static int
++__xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
++		      struct flowi *fl, struct dst_entry **dst_p)
++{
++	struct dst_entry *dst, *dst_prev;
++	struct rt6_info *rt0 = (struct rt6_info*)(*dst_p);
++	struct rt6_info *rt  = rt0;
++	struct in6_addr *remote = &fl->fl6_dst;
++	struct in6_addr *local  = &fl->fl6_src;
++	int i;
++	int err = 0;
++	int header_len = 0;
++	int trailer_len = 0;
++
++	dst = dst_prev = NULL;
++
++	for (i = 0; i < nx; i++) {
++		struct dst_entry *dst1 = dst_alloc(&xfrm6_dst_ops);
++
++		if (unlikely(dst1 == NULL)) {
++			err = -ENOBUFS;
++			goto error;
++		}
++
++		if (!dst)
++			dst = dst1;
++		else {
++			dst_prev->child = dst1;
++			dst1->flags |= DST_NOHASH;
++			dst_clone(dst1);
++		}
++		dst_prev = dst1;
++		if (xfrm[i]->props.mode) {
++			remote = (struct in6_addr*)&xfrm[i]->id.daddr;
++			local  = (struct in6_addr*)&xfrm[i]->props.saddr;
++		}
++		header_len += xfrm[i]->props.header_len;
++		trailer_len += xfrm[i]->props.trailer_len;
++	}
++
++	if (ipv6_addr_cmp(remote, &fl->fl6_dst)) {
++		struct flowi fl_tunnel;
++
++		memset(&fl_tunnel, 0, sizeof(fl_tunnel));
++		ipv6_addr_copy(&fl_tunnel.fl6_dst, remote);
++		ipv6_addr_copy(&fl_tunnel.fl6_src, local);
++
++		err = xfrm_dst_lookup((struct xfrm_dst **) &rt,
++				      &fl_tunnel, AF_INET6);
++		if (err)
++			goto error;
++	} else {
++		dst_hold(&rt->u.dst);
++	}
++	dst_prev->child = &rt->u.dst;
++	i = 0;
++	for (dst_prev = dst; dst_prev != &rt->u.dst; dst_prev = dst_prev->child) {
++		struct xfrm_dst *x = (struct xfrm_dst*)dst_prev;
++
++		dst_prev->xfrm = xfrm[i++];
++		dst_prev->dev = rt->u.dst.dev;
++		if (rt->u.dst.dev)
++			dev_hold(rt->u.dst.dev);
++		dst_prev->obsolete	= -1;
++		dst_prev->flags	       |= DST_HOST;
++		dst_prev->lastuse	= jiffies;
++		dst_prev->header_len	= header_len;
++		dst_prev->trailer_len	= trailer_len;
++		memcpy(&dst_prev->metrics, &rt->u.dst.metrics, sizeof(dst_prev->metrics));
++		dst_prev->path		= &rt->u.dst;
++
++		/* Copy neighbour for reachability confirmation */
++		dst_prev->neighbour	= neigh_clone(rt->u.dst.neighbour);
++		dst_prev->input		= rt->u.dst.input;
++		dst_prev->output	= xfrm6_output;
++		/* Sheit... I remember I did this right. Apparently,
++		 * it was magically lost, so this code needs audit */
++		x->u.rt6.rt6i_flags    = rt0->rt6i_flags&(RTCF_BROADCAST|RTCF_MULTICAST|RTCF_LOCAL|RTF_NDISC);
++		x->u.rt6.rt6i_metric   = rt0->rt6i_metric;
++		x->u.rt6.rt6i_node     = rt0->rt6i_node;
++		x->u.rt6.rt6i_gateway  = rt0->rt6i_gateway;
++		memcpy(&x->u.rt6.rt6i_gateway, &rt0->rt6i_gateway, sizeof(x->u.rt6.rt6i_gateway)); 
++		x->u.rt6.rt6i_dst      = rt0->rt6i_dst;
++		x->u.rt6.rt6i_src      = rt0->rt6i_src;	
++		header_len -= x->u.dst.xfrm->props.header_len;
++		trailer_len -= x->u.dst.xfrm->props.trailer_len;
++	}
++	*dst_p = dst;
++	return 0;
++
++error:
++	if (dst)
++		dst_free(dst);
++	return err;
++}
++
++static inline void
++_decode_session6(struct sk_buff *skb, struct flowi *fl)
++{
++	u16 offset = sizeof(struct ipv6hdr);
++	struct ipv6hdr *hdr = skb->nh.ipv6h;
++	struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
++	u8 nexthdr = skb->nh.ipv6h->nexthdr;
++
++	memset(fl, 0, sizeof(struct flowi));
++	ipv6_addr_copy(&fl->fl6_dst, &hdr->daddr);
++	ipv6_addr_copy(&fl->fl6_src, &hdr->saddr);
++
++	while (pskb_may_pull(skb, skb->nh.raw + offset + 1 - skb->data)) {
++		switch (nexthdr) {
++		case NEXTHDR_ROUTING:
++		case NEXTHDR_HOP:
++		case NEXTHDR_DEST:
++			offset += ipv6_optlen(exthdr);
++			nexthdr = exthdr->nexthdr;
++			exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset);
++			break;
++
++		case IPPROTO_UDP:
++		case IPPROTO_TCP:
++		case IPPROTO_SCTP:
++			if (pskb_may_pull(skb, skb->nh.raw + offset + 4 - skb->data)) {
++				u16 *ports = (u16 *)exthdr;
++
++				fl->fl_ip_sport = ports[0];
++				fl->fl_ip_dport = ports[1];
++			}
++			fl->proto = nexthdr;
++			return;
++
++		case IPPROTO_ICMPV6:
++			if (pskb_may_pull(skb, skb->nh.raw + offset + 2 - skb->data)) {
++				u8 *icmp = (u8 *)exthdr;
++
++				fl->fl_icmp_type = icmp[0];
++				fl->fl_icmp_code = icmp[1];
++			}
++			fl->proto = nexthdr;
++			return;
++
++		/* XXX Why are there these headers? */
++		case IPPROTO_AH:
++		case IPPROTO_ESP:
++		case IPPROTO_COMP:
++		default:
++			fl->fl_ipsec_spi = 0;
++			fl->proto = nexthdr;
++			return;
++		};
++	}
++}
++
++static inline int xfrm6_garbage_collect(void)
++{
++	read_lock(&xfrm6_policy_afinfo.lock);
++	xfrm6_policy_afinfo.garbage_collect();
++	read_unlock(&xfrm6_policy_afinfo.lock);
++	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
++}
++
++static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
++{
++	struct dst_entry *path = dst->path;
++
++	if (mtu >= 1280 && mtu < dst_pmtu(dst))
++		return;
++
++	path->ops->update_pmtu(path, mtu);
++}
++
++static struct dst_ops xfrm6_dst_ops = {
++	.family =		AF_INET6,
++	.protocol =		__constant_htons(ETH_P_IPV6),
++	.gc =			xfrm6_garbage_collect,
++	.update_pmtu =		xfrm6_update_pmtu,
++	.gc_thresh =		1024,
++	.entry_size =		sizeof(struct xfrm_dst),
++};
++
++static struct xfrm_policy_afinfo xfrm6_policy_afinfo = {
++	.family =		AF_INET6,
++	.lock = 		RW_LOCK_UNLOCKED,
++	.type_map = 		&xfrm6_type_map,
++	.dst_ops =		&xfrm6_dst_ops,
++	.dst_lookup =		xfrm6_dst_lookup,
++	.find_bundle =		__xfrm6_find_bundle,
++	.bundle_create =	__xfrm6_bundle_create,
++	.decode_session =	_decode_session6,
++};
++
++static void __init xfrm6_policy_init(void)
++{
++	xfrm_policy_register_afinfo(&xfrm6_policy_afinfo);
++}
++
++static void __exit xfrm6_policy_fini(void)
++{
++	xfrm_policy_unregister_afinfo(&xfrm6_policy_afinfo);
++}
++
++void __init xfrm6_init(void)
++{
++	xfrm6_policy_init();
++	xfrm6_state_init();
++}
++
++void __exit xfrm6_fini(void)
++{
++	//xfrm6_input_fini();
++	xfrm6_policy_fini();
++	xfrm6_state_fini();
++}
+diff -Nru a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/xfrm6_state.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,132 @@
++/*
++ * xfrm6_state.c: based on xfrm4_state.c
++ *
++ * Authors:
++ *	Mitsuru KANDA @USAGI
++ * 	Kazunori MIYAZAWA @USAGI
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ * 		IPv6 support
++ * 	YOSHIFUJI Hideaki @USAGI
++ * 		Split up af-specific portion
++ * 	
++ */
++
++#include <net/xfrm.h>
++#include <linux/pfkeyv2.h>
++#include <linux/ipsec.h>
++#include <net/ipv6.h>
++
++extern struct xfrm_state_afinfo xfrm6_state_afinfo;
++
++static void
++__xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl,
++		     struct xfrm_tmpl *tmpl,
++		     xfrm_address_t *daddr, xfrm_address_t *saddr)
++{
++	/* Initialize temporary selector matching only
++	 * to current session. */
++	ipv6_addr_copy((struct in6_addr *)&x->sel.daddr, &fl->fl6_dst);
++	ipv6_addr_copy((struct in6_addr *)&x->sel.saddr, &fl->fl6_src);
++	x->sel.dport = fl->fl_ip_dport;
++	x->sel.dport_mask = ~0;
++	x->sel.sport = fl->fl_ip_sport;
++	x->sel.sport_mask = ~0;
++	x->sel.prefixlen_d = 128;
++	x->sel.prefixlen_s = 128;
++	x->sel.proto = fl->proto;
++	x->sel.ifindex = fl->oif;
++	x->id = tmpl->id;
++	if (ipv6_addr_any((struct in6_addr*)&x->id.daddr))
++		memcpy(&x->id.daddr, daddr, sizeof(x->sel.daddr));
++	memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr));
++	if (ipv6_addr_any((struct in6_addr*)&x->props.saddr))
++		memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr));
++	x->props.mode = tmpl->mode;
++	x->props.reqid = tmpl->reqid;
++	x->props.family = AF_INET6;
++}
++
++static struct xfrm_state *
++__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto)
++{
++	unsigned h = __xfrm6_spi_hash(daddr, spi, proto);
++	struct xfrm_state *x;
++
++	list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) {
++		if (x->props.family == AF_INET6 &&
++		    spi == x->id.spi &&
++		    !ipv6_addr_cmp((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
++		    proto == x->id.proto) {
++			xfrm_state_hold(x);
++			return x;
++		}
++	}
++	return NULL;
++}
++
++static struct xfrm_state *
++__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, 
++		 xfrm_address_t *daddr, xfrm_address_t *saddr, 
++		 int create)
++{
++	struct xfrm_state *x, *x0;
++	unsigned h = __xfrm6_dst_hash(daddr);
++
++	x0 = NULL;
++
++	list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) {
++		if (x->props.family == AF_INET6 &&
++		    !ipv6_addr_cmp((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) &&
++		    mode == x->props.mode &&
++		    proto == x->id.proto &&
++		    !ipv6_addr_cmp((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) &&
++		    reqid == x->props.reqid &&
++		    x->km.state == XFRM_STATE_ACQ &&
++		    !x->id.spi) {
++			    x0 = x;
++			    break;
++		    }
++	}
++	if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) {
++		memcpy(x0->sel.daddr.a6, daddr, sizeof(struct in6_addr));
++		memcpy(x0->sel.saddr.a6, saddr, sizeof(struct in6_addr));
++		x0->sel.prefixlen_d = 128;
++		x0->sel.prefixlen_s = 128;
++		memcpy(x0->props.saddr.a6, saddr, sizeof(struct in6_addr));
++		x0->km.state = XFRM_STATE_ACQ;
++		memcpy(x0->id.daddr.a6, daddr, sizeof(struct in6_addr));
++		x0->id.proto = proto;
++		x0->props.family = AF_INET6;
++		x0->props.mode = mode;
++		x0->props.reqid = reqid;
++		x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
++		xfrm_state_hold(x0);
++		x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
++		add_timer(&x0->timer);
++		xfrm_state_hold(x0);
++		list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h);
++		wake_up(&km_waitq);
++	}
++	if (x0)
++		xfrm_state_hold(x0);
++	return x0;
++}
++
++static struct xfrm_state_afinfo xfrm6_state_afinfo = {
++	.family			= AF_INET6,
++	.lock			= RW_LOCK_UNLOCKED,
++	.init_tempsel		= __xfrm6_init_tempsel,
++	.state_lookup		= __xfrm6_state_lookup,
++	.find_acq		= __xfrm6_find_acq,
++};
++
++void __init xfrm6_state_init(void)
++{
++	xfrm_state_register_afinfo(&xfrm6_state_afinfo);
++}
++
++void __exit xfrm6_state_fini(void)
++{
++	xfrm_state_unregister_afinfo(&xfrm6_state_afinfo);
++}
++
+diff -Nru a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/ipv6/xfrm6_tunnel.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,543 @@
++/*
++ * Copyright (C)2003,2004 USAGI/WIDE Project
++ *
++ * This program is free software; you can redistribute it and/or modify
++ * it under the terms of the GNU General Public License as published by
++ * the Free Software Foundation; either version 2 of the License, or
++ * (at your option) any later version.
++ * 
++ * This program is distributed in the hope that it will be useful,
++ * but WITHOUT ANY WARRANTY; without even the implied warranty of
++ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++ * GNU General Public License for more details.
++ * 
++ * You should have received a copy of the GNU General Public License
++ * along with this program; if not, write to the Free Software
++ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
++ *
++ * Authors	Mitsuru KANDA  <mk at linux-ipv6.org>
++ * 		YOSHIFUJI Hideaki <yoshfuji at linux-ipv6.org>
++ *
++ * Based on net/ipv4/xfrm4_tunnel.c
++ *
++ */
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/xfrm.h>
++#include <linux/list.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++#include <net/ipv6.h>
++#include <net/protocol.h>
++#include <linux/ipv6.h>
++#include <linux/icmpv6.h>
++
++#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
++# define X6TDEBUG	3
++#else
++# define X6TDEBUG	1
++#endif
++
++#define X6TPRINTK(fmt, args...)		printk(fmt, ## args)
++#define X6TNOPRINTK(fmt, args...)	do { ; } while(0)
++
++#if X6TDEBUG >= 1
++# define X6TPRINTK1	X6TPRINTK
++#else
++# define X6TPRINTK1	X6TNOPRINTK
++#endif
++
++#if X6TDEBUG >= 3
++# define X6TPRINTK3	X6TPRINTK
++#else
++# define X6TPRINTK3	X6TNOPRINTK
++#endif
++
++/*
++ * xfrm_tunnel_spi things are for allocating unique id ("spi") 
++ * per xfrm_address_t.
++ */
++struct xfrm6_tunnel_spi {
++	struct hlist_node list_byaddr;
++	struct hlist_node list_byspi;
++	xfrm_address_t addr;
++	u32 spi;
++	atomic_t refcnt;
++#ifdef XFRM6_TUNNEL_SPI_MAGIC
++	u32 magic;
++#endif
++};
++
++#ifdef CONFIG_IPV6_XFRM6_TUNNEL_DEBUG
++# define XFRM6_TUNNEL_SPI_MAGIC 0xdeadbeef
++#endif
++
++static rwlock_t xfrm6_tunnel_spi_lock = RW_LOCK_UNLOCKED;
++
++static u32 xfrm6_tunnel_spi;
++
++#define XFRM6_TUNNEL_SPI_MIN	1
++#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
++
++static kmem_cache_t *xfrm6_tunnel_spi_kmem;
++
++#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
++#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
++
++static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
++static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
++
++#ifdef XFRM6_TUNNEL_SPI_MAGIC
++static int x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
++			     const char *name)
++{
++	if (unlikely(x6spi->magic != XFRM6_TUNNEL_SPI_MAGIC)) {
++		X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
++				      "at %p has corrupted magic %08x "
++				      "(should be %08x)\n",
++			   name, x6spi, x6spi->magic, XFRM6_TUNNEL_SPI_MAGIC);
++		return -1;
++	}
++	return 0;
++}
++#else
++static int inline x6spi_check_magic(const struct xfrm6_tunnel_spi *x6spi,
++				    const char *name)
++{
++	return 0;
++}
++#endif
++
++#define X6SPI_CHECK_MAGIC(x6spi) x6spi_check_magic((x6spi), __FUNCTION__)
++
++
++static unsigned inline xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
++{
++	unsigned h;
++
++	X6TPRINTK3(KERN_DEBUG "%s(addr=%p)\n", __FUNCTION__, addr);
++
++	h = addr->a6[0] ^ addr->a6[1] ^ addr->a6[2] ^ addr->a6[3];
++	h ^= h >> 16;
++	h ^= h >> 8;
++	h &= XFRM6_TUNNEL_SPI_BYADDR_HSIZE - 1;
++
++	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, h);
++
++	return h;
++}
++
++static unsigned inline xfrm6_tunnel_spi_hash_byspi(u32 spi)
++{
++	return spi % XFRM6_TUNNEL_SPI_BYSPI_HSIZE;
++}
++
++
++static int xfrm6_tunnel_spi_init(void)
++{
++	int i;
++
++	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
++
++	xfrm6_tunnel_spi = 0;
++	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
++						  sizeof(struct xfrm6_tunnel_spi),
++						  0, SLAB_HWCACHE_ALIGN,
++						  NULL, NULL);
++	if (!xfrm6_tunnel_spi_kmem) {
++		X6TPRINTK1(KERN_ERR
++			   "%s(): failed to allocate xfrm6_tunnel_spi_kmem\n",
++		           __FUNCTION__);
++		return -ENOMEM;
++	}
++
++	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
++		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]);
++	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
++		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byspi[i]);
++	return 0;
++}
++
++static void xfrm6_tunnel_spi_fini(void)
++{
++	int i;
++
++	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
++
++	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) {
++		if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i]))
++			goto err;
++	}
++	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) {
++		if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
++			goto err;
++	}
++	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
++	xfrm6_tunnel_spi_kmem = NULL;
++	return;
++err:
++	X6TPRINTK1(KERN_ERR "%s(): table is not empty\n", __FUNCTION__);
++	return;
++}
++
++static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
++{
++	struct xfrm6_tunnel_spi *x6spi;
++	struct hlist_node *pos;
++
++	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
++
++	hlist_for_each_entry(x6spi, pos,
++			     &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
++			     list_byaddr) {
++		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
++			X6SPI_CHECK_MAGIC(x6spi);
++			X6TPRINTK3(KERN_DEBUG "%s() = %p(%u)\n", __FUNCTION__, x6spi, x6spi->spi);
++			return x6spi;
++		}
++	}
++
++	X6TPRINTK3(KERN_DEBUG "%s() = NULL(0)\n", __FUNCTION__);
++	return NULL;
++}
++
++u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
++{
++	struct xfrm6_tunnel_spi *x6spi;
++	u32 spi;
++
++	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
++
++	read_lock_bh(&xfrm6_tunnel_spi_lock);
++	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
++	spi = x6spi ? x6spi->spi : 0;
++	read_unlock_bh(&xfrm6_tunnel_spi_lock);
++	return spi;
++}
++
++EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
++
++static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
++{
++	u32 spi;
++	struct xfrm6_tunnel_spi *x6spi;
++	struct hlist_node *pos;
++	unsigned index;
++
++	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
++
++	if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
++	    xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
++		xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN;
++	else
++		xfrm6_tunnel_spi++;
++
++	for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
++		index = xfrm6_tunnel_spi_hash_byspi(spi);
++		hlist_for_each_entry(x6spi, pos, 
++				     &xfrm6_tunnel_spi_byspi[index], 
++				     list_byspi) {
++			if (x6spi->spi == spi)
++				goto try_next_1;
++		}
++		xfrm6_tunnel_spi = spi;
++		goto alloc_spi;
++try_next_1:;
++	}
++	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) {
++		index = xfrm6_tunnel_spi_hash_byspi(spi);
++		hlist_for_each_entry(x6spi, pos, 
++				     &xfrm6_tunnel_spi_byspi[index], 
++				     list_byspi) {
++			if (x6spi->spi == spi)
++				goto try_next_2;
++		}
++		xfrm6_tunnel_spi = spi;
++		goto alloc_spi;
++try_next_2:;
++	}
++	spi = 0;
++	goto out;
++alloc_spi:
++	X6TPRINTK3(KERN_DEBUG "%s(): allocate new spi for "
++			      "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n", 
++			      __FUNCTION__, 
++			      NIP6(*(struct in6_addr *)saddr));
++	x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, SLAB_ATOMIC);
++	if (!x6spi) {
++		X6TPRINTK1(KERN_ERR "%s(): kmem_cache_alloc() failed\n", 
++			   __FUNCTION__);
++		goto out;
++	}
++#ifdef XFRM6_TUNNEL_SPI_MAGIC
++	x6spi->magic = XFRM6_TUNNEL_SPI_MAGIC;
++#endif
++	memcpy(&x6spi->addr, saddr, sizeof(x6spi->addr));
++	x6spi->spi = spi;
++	atomic_set(&x6spi->refcnt, 1);
++
++	hlist_add_head(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
++
++	index = xfrm6_tunnel_spi_hash_byaddr(saddr);
++	hlist_add_head(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
++	X6SPI_CHECK_MAGIC(x6spi);
++out:
++	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
++	return spi;
++}
++
++u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
++{
++	struct xfrm6_tunnel_spi *x6spi;
++	u32 spi;
++
++	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
++
++	write_lock_bh(&xfrm6_tunnel_spi_lock);
++	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
++	if (x6spi) {
++		atomic_inc(&x6spi->refcnt);
++		spi = x6spi->spi;
++	} else
++		spi = __xfrm6_tunnel_alloc_spi(saddr);
++	write_unlock_bh(&xfrm6_tunnel_spi_lock);
++
++	X6TPRINTK3(KERN_DEBUG "%s() = %u\n", __FUNCTION__, spi);
++
++	return spi;
++}
++
++EXPORT_SYMBOL(xfrm6_tunnel_alloc_spi);
++
++void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
++{
++	struct xfrm6_tunnel_spi *x6spi;
++	struct hlist_node *pos, *n;
++
++	X6TPRINTK3(KERN_DEBUG "%s(saddr=%p)\n", __FUNCTION__, saddr);
++
++	write_lock_bh(&xfrm6_tunnel_spi_lock);
++
++	hlist_for_each_entry_safe(x6spi, pos, n, 
++				  &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
++				  list_byaddr)
++	{
++		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
++			X6TPRINTK3(KERN_DEBUG "%s(): x6spi object "
++					      "for %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
++					      "found at %p\n",
++				   __FUNCTION__, 
++				   NIP6(*(struct in6_addr *)saddr),
++				   x6spi);
++			X6SPI_CHECK_MAGIC(x6spi);
++			if (atomic_dec_and_test(&x6spi->refcnt)) {
++				hlist_del(&x6spi->list_byaddr);
++				hlist_del(&x6spi->list_byspi);
++				kmem_cache_free(xfrm6_tunnel_spi_kmem, x6spi);
++				break;
++			}
++		}
++	}
++	write_unlock_bh(&xfrm6_tunnel_spi_lock);
++}
++
++EXPORT_SYMBOL(xfrm6_tunnel_free_spi);
++
++static int xfrm6_tunnel_output(struct sk_buff *skb)
++{
++	struct ipv6hdr *top_iph;
++
++	top_iph = (struct ipv6hdr *)skb->data;
++	top_iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
++
++	return 0;
++}
++
++static int xfrm6_tunnel_input(struct xfrm_state *x, struct xfrm_decap_state *decap, struct sk_buff *skb)
++{
++	return 0;
++}
++
++static struct xfrm6_tunnel *xfrm6_tunnel_handler;
++static DECLARE_MUTEX(xfrm6_tunnel_sem);
++
++int xfrm6_tunnel_register(struct xfrm6_tunnel *handler)
++{
++	int ret;
++
++	down(&xfrm6_tunnel_sem);
++	ret = 0;
++	if (xfrm6_tunnel_handler != NULL)
++		ret = -EINVAL;
++	if (!ret)
++		xfrm6_tunnel_handler = handler;
++	up(&xfrm6_tunnel_sem);
++
++	return ret;
++}
++
++EXPORT_SYMBOL(xfrm6_tunnel_register);
++
++int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler)
++{
++	int ret;
++
++	down(&xfrm6_tunnel_sem);
++	ret = 0;
++	if (xfrm6_tunnel_handler != handler)
++		ret = -EINVAL;
++	if (!ret)
++		xfrm6_tunnel_handler = NULL;
++	up(&xfrm6_tunnel_sem);
++
++	synchronize_net();
++
++	return ret;
++}
++
++EXPORT_SYMBOL(xfrm6_tunnel_deregister);
++
++static int xfrm6_tunnel_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
++{
++	struct sk_buff *skb = *pskb;
++	struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
++	struct ipv6hdr *iph = skb->nh.ipv6h;
++	u32 spi;
++
++	/* device-like_ip6ip6_handler() */
++	if (handler && handler->handler(pskb, nhoffp) == 0)
++		return 0;
++
++	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
++	return xfrm6_rcv_spi(pskb, nhoffp, spi);
++}
++
++static void xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
++			     int type, int code, int offset, __u32 info)
++{
++	struct xfrm6_tunnel *handler = xfrm6_tunnel_handler;
++
++	/* call here first for device-like ip6ip6 err handling */
++	if (handler) {
++		handler->err_handler(skb, opt, type, code, offset, info);
++		return;
++	}
++
++	/* xfrm6_tunnel native err handling */
++	switch (type) {
++	case ICMPV6_DEST_UNREACH: 
++		switch (code) {
++		case ICMPV6_NOROUTE: 
++		case ICMPV6_ADM_PROHIBITED:
++		case ICMPV6_NOT_NEIGHBOUR:
++		case ICMPV6_ADDR_UNREACH:
++		case ICMPV6_PORT_UNREACH:
++		default:
++			X6TPRINTK3(KERN_DEBUG
++				   "xfrm6_tunnel: Destination Unreach.\n");
++			break;
++		}
++		break;
++	case ICMPV6_PKT_TOOBIG:
++			X6TPRINTK3(KERN_DEBUG 
++				   "xfrm6_tunnel: Packet Too Big.\n");
++		break;
++	case ICMPV6_TIME_EXCEED:
++		switch (code) {
++		case ICMPV6_EXC_HOPLIMIT:
++			X6TPRINTK3(KERN_DEBUG
++				   "xfrm6_tunnel: Too small Hoplimit.\n");
++			break;
++		case ICMPV6_EXC_FRAGTIME:
++		default: 
++			break;
++		}
++		break;
++	case ICMPV6_PARAMPROB:
++		switch (code) {
++		case ICMPV6_HDR_FIELD: break;
++		case ICMPV6_UNK_NEXTHDR: break;
++		case ICMPV6_UNK_OPTION: break;
++		}
++		break;
++	default:
++		break;
++	}
++	return;
++}
++
++static int xfrm6_tunnel_init_state(struct xfrm_state *x, void *args)
++{
++	if (!x->props.mode)
++		return -EINVAL;
++
++	if (x->encap)
++		return -EINVAL;
++
++	x->props.header_len = sizeof(struct ipv6hdr);
++
++	return 0;
++}
++
++static void xfrm6_tunnel_destroy(struct xfrm_state *x)
++{
++	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
++}
++
++static struct xfrm_type xfrm6_tunnel_type = {
++	.description	= "IP6IP6",
++	.owner          = THIS_MODULE,
++	.proto		= IPPROTO_IPV6,
++	.init_state	= xfrm6_tunnel_init_state,
++	.destructor	= xfrm6_tunnel_destroy,
++	.input		= xfrm6_tunnel_input,
++	.output		= xfrm6_tunnel_output,
++};
++
++static struct inet6_protocol xfrm6_tunnel_protocol = {
++	.handler	= xfrm6_tunnel_rcv,
++	.err_handler	= xfrm6_tunnel_err, 
++	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
++};
++
++static int __init xfrm6_tunnel_init(void)
++{
++	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
++
++	if (xfrm_register_type(&xfrm6_tunnel_type, AF_INET6) < 0) {
++		X6TPRINTK1(KERN_ERR
++			   "xfrm6_tunnel init: can't add xfrm type\n");
++		return -EAGAIN;
++	}
++	if (inet6_add_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0) {
++		X6TPRINTK1(KERN_ERR
++			   "xfrm6_tunnel init(): can't add protocol\n");
++		xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
++		return -EAGAIN;
++	}
++	if (xfrm6_tunnel_spi_init() < 0) {
++		X6TPRINTK1(KERN_ERR
++			   "xfrm6_tunnel init: failed to initialize spi\n");
++		inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6);
++		xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6);
++		return -EAGAIN;
++	}
++	return 0;
++}
++
++static void __exit xfrm6_tunnel_fini(void)
++{
++	X6TPRINTK3(KERN_DEBUG "%s()\n", __FUNCTION__);
++
++	xfrm6_tunnel_spi_fini();
++	if (inet6_del_protocol(&xfrm6_tunnel_protocol, IPPROTO_IPV6) < 0)
++		X6TPRINTK1(KERN_ERR 
++			   "xfrm6_tunnel close: can't remove protocol\n");
++	if (xfrm_unregister_type(&xfrm6_tunnel_type, AF_INET6) < 0)
++		X6TPRINTK1(KERN_ERR
++			   "xfrm6_tunnel close: can't remove xfrm type\n");
++}
++
++module_init(xfrm6_tunnel_init);
++module_exit(xfrm6_tunnel_fini);
++MODULE_LICENSE("GPL");
+diff -Nru a/net/key/Makefile b/net/key/Makefile
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/key/Makefile	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,9 @@
++#
++# Makefile for the key AF.
++#
++
++O_TARGET := key.o
++
++obj-$(CONFIG_NET_KEY) += af_key.o
++
++include $(TOPDIR)/Rules.make
+diff -Nru a/net/key/af_key.c b/net/key/af_key.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/key/af_key.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,2881 @@
++/*
++ * net/key/af_key.c	An implementation of PF_KEYv2 sockets.
++ *
++ *		This program is free software; you can redistribute it and/or
++ *		modify it under the terms of the GNU General Public License
++ *		as published by the Free Software Foundation; either version
++ *		2 of the License, or (at your option) any later version.
++ *
++ * Authors:	Maxim Giryaev	<gem at asplinux.ru>
++ *		David S. Miller	<davem at redhat.com>
++ *		Alexey Kuznetsov <kuznet at ms2.inr.ac.ru>
++ *		Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ *		Kazunori MIYAZAWA / USAGI Project <miyazawa at linux-ipv6.org>
++ *		Derek Atkins <derek at ihtfp.com>
++ */
++
++#include <linux/config.h>
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/socket.h>
++#include <linux/pfkeyv2.h>
++#include <linux/ipsec.h>
++#include <linux/skbuff.h>
++#include <linux/rtnetlink.h>
++#include <linux/in.h>
++#include <linux/in6.h>
++#include <linux/proc_fs.h>
++#include <linux/init.h>
++#include <net/xfrm.h>
++
++#include <net/sock.h>
++
++#define _X2KEY(x) ((x) == XFRM_INF ? 0 : (x))
++#define _KEY2X(x) ((x) == 0 ? XFRM_INF : (x))
++
++
++/* List of all pfkey sockets. */
++static struct sock * pfkey_table;
++static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait);
++static rwlock_t pfkey_table_lock = RW_LOCK_UNLOCKED;
++static atomic_t pfkey_table_users = ATOMIC_INIT(0);
++
++static atomic_t pfkey_socks_nr = ATOMIC_INIT(0);
++
++static void pfkey_sock_destruct(struct sock *sk)
++{
++	skb_queue_purge(&sk->receive_queue);
++
++	if (!sk->dead) {
++		printk("Attempt to release alive pfkey socket: %p\n", sk);
++		return;
++	}
++
++	BUG_TRAP(atomic_read(&sk->rmem_alloc)==0);
++	BUG_TRAP(atomic_read(&sk->wmem_alloc)==0);
++
++	kfree(pfkey_sk(sk));
++
++	atomic_dec(&pfkey_socks_nr);
++
++	MOD_DEC_USE_COUNT;
++}
++
++static void pfkey_table_grab(void)
++{
++	write_lock_bh(&pfkey_table_lock);
++
++	if (atomic_read(&pfkey_table_users)) {
++		DECLARE_WAITQUEUE(wait, current);
++
++		add_wait_queue_exclusive(&pfkey_table_wait, &wait);
++		for(;;) {
++			set_current_state(TASK_UNINTERRUPTIBLE);
++			if (atomic_read(&pfkey_table_users) == 0)
++				break;
++			write_unlock_bh(&pfkey_table_lock);
++			schedule();
++			write_lock_bh(&pfkey_table_lock);
++		}
++
++		__set_current_state(TASK_RUNNING);
++		remove_wait_queue(&pfkey_table_wait, &wait);
++	}
++}
++
++static __inline__ void pfkey_table_ungrab(void)
++{
++	write_unlock_bh(&pfkey_table_lock);
++	wake_up(&pfkey_table_wait);
++}
++
++static __inline__ void pfkey_lock_table(void)
++{
++	/* read_lock() synchronizes us to pfkey_table_grab */
++
++	read_lock(&pfkey_table_lock);
++	atomic_inc(&pfkey_table_users);
++	read_unlock(&pfkey_table_lock);
++}
++
++static __inline__ void pfkey_unlock_table(void)
++{
++	if (atomic_dec_and_test(&pfkey_table_users))
++		wake_up(&pfkey_table_wait);
++}
++
++
++static struct proto_ops pfkey_ops;
++
++static void pfkey_insert(struct sock *sk)
++{
++	pfkey_table_grab();
++	sk->next = pfkey_table;
++	pfkey_table = sk;
++	sock_hold(sk);
++	pfkey_table_ungrab();
++}
++
++static void pfkey_remove(struct sock *sk)
++{
++	struct sock **skp;
++
++	pfkey_table_grab();
++	for (skp = &pfkey_table; *skp; skp = &((*skp)->next)) {
++		if (*skp == sk) {
++			*skp = sk->next;
++			__sock_put(sk);
++			break;
++		}
++	}
++	pfkey_table_ungrab();
++}
++
++static int pfkey_create(struct socket *sock, int protocol)
++{
++	struct sock *sk;
++	struct pfkey_opt *pfk;
++	int err;
++
++	if (!capable(CAP_NET_ADMIN))
++		return -EPERM;
++	if (sock->type != SOCK_RAW)
++		return -ESOCKTNOSUPPORT;
++	if (protocol != PF_KEY_V2)
++		return -EPROTONOSUPPORT;
++
++	MOD_INC_USE_COUNT;
++
++	err = -ENOMEM;
++	sk = sk_alloc(PF_KEY, GFP_KERNEL, 1);
++	if (sk == NULL)
++		goto out;
++
++	sock->ops = &pfkey_ops;
++	sock_init_data(sock, sk);
++
++	err = -ENOMEM;
++	pfk = pfkey_sk(sk) = kmalloc(sizeof(*pfk), GFP_KERNEL);
++	if (!pfk) {
++		sk_free(sk);
++		goto out;
++	}
++	memset(pfk, 0, sizeof(*pfk));
++
++	sk->family = PF_KEY;
++	sk->destruct = pfkey_sock_destruct;
++
++	atomic_inc(&pfkey_socks_nr);
++
++	pfkey_insert(sk);
++
++	return 0;
++
++out:
++	MOD_DEC_USE_COUNT;
++	return err;
++}
++
++static int pfkey_release(struct socket *sock)
++{
++	struct sock *sk = sock->sk;
++
++	if (!sk)
++		return 0;
++
++	pfkey_remove(sk);
++
++	sock_orphan(sk);
++	sock->sk = NULL;
++	skb_queue_purge(&sk->write_queue);
++	sock_put(sk);
++
++	return 0;
++}
++
++static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
++			       int allocation, struct sock *sk)
++{
++	int err = -ENOBUFS;
++
++	sock_hold(sk);
++	if (*skb2 == NULL) {
++		if (atomic_read(&skb->users) != 1) {
++			*skb2 = skb_clone(skb, allocation);
++		} else {
++			*skb2 = skb;
++			atomic_inc(&skb->users);
++		}
++	}
++	if (*skb2 != NULL) {
++		if (atomic_read(&sk->rmem_alloc) <= sk->rcvbuf) {
++			skb_orphan(*skb2);
++			skb_set_owner_r(*skb2, sk);
++			skb_queue_tail(&sk->receive_queue, *skb2);
++			sk->data_ready(sk, (*skb2)->len);
++			*skb2 = NULL;
++			err = 0;
++		}
++	}
++	sock_put(sk);
++	return err;
++}
++
++/* Send SKB to all pfkey sockets matching selected criteria.  */
++#define BROADCAST_ALL		0
++#define BROADCAST_ONE		1
++#define BROADCAST_REGISTERED	2
++#define BROADCAST_PROMISC_ONLY	4
++static int pfkey_broadcast(struct sk_buff *skb, int allocation,
++			   int broadcast_flags, struct sock *one_sk)
++{
++	struct sock *sk;
++	struct sk_buff *skb2 = NULL;
++	int err = -ESRCH;
++
++	/* XXX Do we need something like netlink_overrun?  I think
++	 * XXX PF_KEY socket apps will not mind current behavior.
++	 */
++	if (!skb)
++		return -ENOMEM;
++
++	pfkey_lock_table();
++	for (sk = pfkey_table; sk; sk = sk->next) {
++		struct pfkey_opt *pfk = pfkey_sk(sk);
++		int err2;
++
++		/* Yes, it means that if you are meant to receive this
++		 * pfkey message you receive it twice as promiscuous
++		 * socket.
++		 */
++		if (pfk->promisc)
++			pfkey_broadcast_one(skb, &skb2, allocation, sk);
++
++		/* the exact target will be processed later */
++		if (sk == one_sk)
++			continue;
++		if (broadcast_flags != BROADCAST_ALL) {
++			if (broadcast_flags & BROADCAST_PROMISC_ONLY)
++				continue;
++			if ((broadcast_flags & BROADCAST_REGISTERED) &&
++			    !pfk->registered)
++				continue;
++			if (broadcast_flags & BROADCAST_ONE)
++				continue;
++		}
++
++		err2 = pfkey_broadcast_one(skb, &skb2, allocation, sk);
++
++		/* Error is cleare after succecful sending to at least one
++		 * registered KM */
++		if ((broadcast_flags & BROADCAST_REGISTERED) && err)
++			err = err2;
++	}
++	pfkey_unlock_table();
++
++	if (one_sk != NULL)
++		err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
++
++	if (skb2)
++		kfree_skb(skb2);
++	kfree_skb(skb);
++	return err;
++}
++
++static inline void pfkey_hdr_dup(struct sadb_msg *new, struct sadb_msg *orig)
++{
++	*new = *orig;
++}
++
++static int pfkey_error(struct sadb_msg *orig, int err, struct sock *sk)
++{
++	struct sk_buff *skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
++	struct sadb_msg *hdr;
++
++	if (!skb)
++		return -ENOBUFS;
++
++	/* Woe be to the platform trying to support PFKEY yet
++	 * having normal errnos outside the 1-255 range, inclusive.
++	 */
++	err = -err;
++	if (err == ERESTARTSYS ||
++	    err == ERESTARTNOHAND ||
++	    err == ERESTARTNOINTR)
++		err = EINTR;
++	if (err >= 512)
++		err = EINVAL;
++	if (err <= 0 || err >= 256)
++		BUG();
++
++	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
++	pfkey_hdr_dup(hdr, orig);
++	hdr->sadb_msg_errno = (uint8_t) err;
++	hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
++			     sizeof(uint64_t));
++
++	pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk);
++
++	return 0;
++}
++
++static u8 sadb_ext_min_len[] = {
++	[SADB_EXT_RESERVED]		= (u8) 0,
++	[SADB_EXT_SA]			= (u8) sizeof(struct sadb_sa),
++	[SADB_EXT_LIFETIME_CURRENT]	= (u8) sizeof(struct sadb_lifetime),
++	[SADB_EXT_LIFETIME_HARD]	= (u8) sizeof(struct sadb_lifetime),
++	[SADB_EXT_LIFETIME_SOFT]	= (u8) sizeof(struct sadb_lifetime),
++	[SADB_EXT_ADDRESS_SRC]		= (u8) sizeof(struct sadb_address),
++	[SADB_EXT_ADDRESS_DST]		= (u8) sizeof(struct sadb_address),
++	[SADB_EXT_ADDRESS_PROXY]	= (u8) sizeof(struct sadb_address),
++	[SADB_EXT_KEY_AUTH]		= (u8) sizeof(struct sadb_key),
++	[SADB_EXT_KEY_ENCRYPT]		= (u8) sizeof(struct sadb_key),
++	[SADB_EXT_IDENTITY_SRC]		= (u8) sizeof(struct sadb_ident),
++	[SADB_EXT_IDENTITY_DST]		= (u8) sizeof(struct sadb_ident),
++	[SADB_EXT_SENSITIVITY]		= (u8) sizeof(struct sadb_sens),
++	[SADB_EXT_PROPOSAL]		= (u8) sizeof(struct sadb_prop),
++	[SADB_EXT_SUPPORTED_AUTH]	= (u8) sizeof(struct sadb_supported),
++	[SADB_EXT_SUPPORTED_ENCRYPT]	= (u8) sizeof(struct sadb_supported),
++	[SADB_EXT_SPIRANGE]		= (u8) sizeof(struct sadb_spirange),
++	[SADB_X_EXT_KMPRIVATE]		= (u8) sizeof(struct sadb_x_kmprivate),
++	[SADB_X_EXT_POLICY]		= (u8) sizeof(struct sadb_x_policy),
++	[SADB_X_EXT_SA2]		= (u8) sizeof(struct sadb_x_sa2),
++	[SADB_X_EXT_NAT_T_TYPE]		= (u8) sizeof(struct sadb_x_nat_t_type),
++	[SADB_X_EXT_NAT_T_SPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
++	[SADB_X_EXT_NAT_T_DPORT]	= (u8) sizeof(struct sadb_x_nat_t_port),
++	[SADB_X_EXT_NAT_T_OA]		= (u8) sizeof(struct sadb_address),
++};
++
++/* Verify sadb_address_{len,prefixlen} against sa_family.  */
++static int verify_address_len(void *p)
++{
++	struct sadb_address *sp = p;
++	struct sockaddr *addr = (struct sockaddr *)(sp + 1);
++	struct sockaddr_in *sin;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct sockaddr_in6 *sin6;
++#endif
++	int len;
++
++	switch (addr->sa_family) {
++	case AF_INET:
++		len  = sizeof(*sp) + sizeof(*sin) + (sizeof(uint64_t) - 1);
++		len /= sizeof(uint64_t);
++		if (sp->sadb_address_len != len ||
++		    sp->sadb_address_prefixlen > 32)
++			return -EINVAL;
++		break;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case AF_INET6:
++		len  = sizeof(*sp) + sizeof(*sin6) + (sizeof(uint64_t) - 1);
++		len /= sizeof(uint64_t);
++		if (sp->sadb_address_len != len ||
++		    sp->sadb_address_prefixlen > 128)
++			return -EINVAL;
++		break;
++#endif
++	default:
++		/* It is user using kernel to keep track of security
++		 * associations for another protocol, such as
++		 * OSPF/RSVP/RIPV2/MIP.  It is user's job to verify
++		 * lengths.
++		 *
++		 * XXX Actually, association/policy database is not yet
++		 * XXX able to cope with arbitrary sockaddr families.
++		 * XXX When it can, remove this -EINVAL.  -DaveM
++		 */
++		return -EINVAL;
++		break;
++	};
++
++	return 0;
++}
++
++static int present_and_same_family(struct sadb_address *src,
++				   struct sadb_address *dst)
++{
++	struct sockaddr *s_addr, *d_addr;
++
++	if (!src || !dst)
++		return 0;
++
++	s_addr = (struct sockaddr *)(src + 1);
++	d_addr = (struct sockaddr *)(dst + 1);
++	if (s_addr->sa_family != d_addr->sa_family)
++		return 0;
++	if (s_addr->sa_family != AF_INET
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	    && s_addr->sa_family != AF_INET6
++#endif
++		)
++		return 0;
++
++	return 1;
++}
++
++static int parse_exthdrs(struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	char *p = (char *) hdr;
++	int len = skb->len;
++
++	len -= sizeof(*hdr);
++	p += sizeof(*hdr);
++	while (len > 0) {
++		struct sadb_ext *ehdr = (struct sadb_ext *) p;
++		uint16_t ext_type;
++		int ext_len;
++
++		ext_len  = ehdr->sadb_ext_len;
++		ext_len *= sizeof(uint64_t);
++		ext_type = ehdr->sadb_ext_type;
++		if (ext_len < sizeof(uint64_t) ||
++		    ext_len > len ||
++		    ext_type == SADB_EXT_RESERVED)
++			return -EINVAL;
++
++		if (ext_type <= SADB_EXT_MAX) {
++			int min = (int) sadb_ext_min_len[ext_type];
++			if (ext_len < min)
++				return -EINVAL;
++			if (ext_hdrs[ext_type-1] != NULL)
++				return -EINVAL;
++			if (ext_type == SADB_EXT_ADDRESS_SRC ||
++			    ext_type == SADB_EXT_ADDRESS_DST ||
++			    ext_type == SADB_EXT_ADDRESS_PROXY ||
++			    ext_type == SADB_X_EXT_NAT_T_OA) {
++				if (verify_address_len(p))
++					return -EINVAL;
++			}				
++			ext_hdrs[ext_type-1] = p;
++		}
++		p   += ext_len;
++		len -= ext_len;
++	}
++
++	return 0;
++}
++
++static uint16_t
++pfkey_satype2proto(uint8_t satype)
++{
++	switch (satype) {
++	case SADB_SATYPE_UNSPEC:
++		return IPSEC_PROTO_ANY;
++	case SADB_SATYPE_AH:
++		return IPPROTO_AH;
++	case SADB_SATYPE_ESP:
++		return IPPROTO_ESP;
++	case SADB_X_SATYPE_IPCOMP:
++		return IPPROTO_COMP;
++		break;
++	default:
++		return 0;
++	}
++	/* NOTREACHED */
++}
++
++static uint8_t
++pfkey_proto2satype(uint16_t proto)
++{
++	switch (proto) {
++	case IPPROTO_AH:
++		return SADB_SATYPE_AH;
++	case IPPROTO_ESP:
++		return SADB_SATYPE_ESP;
++	case IPPROTO_COMP:
++		return SADB_X_SATYPE_IPCOMP;
++		break;
++	default:
++		return 0;
++	}
++	/* NOTREACHED */
++}
++
++/* BTW, this scheme means that there is no way with PFKEY2 sockets to
++ * say specifically 'just raw sockets' as we encode them as 255.
++ */
++
++static uint8_t pfkey_proto_to_xfrm(uint8_t proto)
++{
++	return (proto == IPSEC_PROTO_ANY ? 0 : proto);
++}
++
++static uint8_t pfkey_proto_from_xfrm(uint8_t proto)
++{
++	return (proto ? proto : IPSEC_PROTO_ANY);
++}
++
++static int pfkey_sadb_addr2xfrm_addr(struct sadb_address *addr,
++				     xfrm_address_t *xaddr)
++{
++	switch (((struct sockaddr*)(addr + 1))->sa_family) {
++	case AF_INET:
++		xaddr->a4 = 
++			((struct sockaddr_in *)(addr + 1))->sin_addr.s_addr;
++		return AF_INET;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case AF_INET6:
++		memcpy(xaddr->a6, 
++		       &((struct sockaddr_in6 *)(addr + 1))->sin6_addr,
++		       sizeof(struct in6_addr));
++		return AF_INET6;
++#endif
++	default:
++		return 0;
++	}
++	/* NOTREACHED */
++}
++
++static struct  xfrm_state *pfkey_xfrm_state_lookup(struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct sadb_sa *sa;
++	struct sadb_address *addr;
++	uint16_t proto;
++	unsigned short family;
++	xfrm_address_t *xaddr;
++
++	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
++	if (sa == NULL)
++		return NULL;
++
++	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
++	if (proto == 0)
++		return NULL;
++
++	/* sadb_address_len should be checked by caller */
++	addr = (struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1];
++	if (addr == NULL)
++		return NULL;
++
++	family = ((struct sockaddr *)(addr + 1))->sa_family;
++	switch (family) {
++	case AF_INET:
++		xaddr = (xfrm_address_t *)&((struct sockaddr_in *)(addr + 1))->sin_addr;
++		break;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case AF_INET6:
++		xaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(addr + 1))->sin6_addr;
++		break;
++#endif
++	default:
++		xaddr = NULL;
++	}
++
++	if (!xaddr)
++		return NULL;
++
++	return xfrm_state_lookup(xaddr, sa->sadb_sa_spi, proto, family);
++}
++
++#define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1)))
++static int
++pfkey_sockaddr_size(sa_family_t family)
++{
++	switch (family) {
++	case AF_INET:
++		return PFKEY_ALIGN8(sizeof(struct sockaddr_in));
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case AF_INET6:
++		return PFKEY_ALIGN8(sizeof(struct sockaddr_in6));
++#endif
++	default:
++		return 0;
++	}
++	/* NOTREACHED */
++}
++
++static struct sk_buff * pfkey_xfrm_state2msg(struct xfrm_state *x, int add_keys, int hsc)
++{
++	struct sk_buff *skb;
++	struct sadb_msg *hdr;
++	struct sadb_sa *sa;
++	struct sadb_lifetime *lifetime;
++	struct sadb_address *addr;
++	struct sadb_key *key;
++	struct sadb_x_sa2 *sa2;
++	struct sockaddr_in *sin;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct sockaddr_in6 *sin6;
++#endif
++	int size;
++	int auth_key_size = 0;
++	int encrypt_key_size = 0;
++	int sockaddr_size;
++	struct xfrm_encap_tmpl *natt = NULL;
++
++	/* address family check */
++	sockaddr_size = pfkey_sockaddr_size(x->props.family);
++	if (!sockaddr_size)
++		ERR_PTR(-EINVAL);
++
++	/* base, SA, (lifetime (HSC),) address(SD), (address(P),)
++	   key(AE), (identity(SD),) (sensitivity)> */
++	size = sizeof(struct sadb_msg) +sizeof(struct sadb_sa) + 
++		sizeof(struct sadb_lifetime) +
++		((hsc & 1) ? sizeof(struct sadb_lifetime) : 0) +
++		((hsc & 2) ? sizeof(struct sadb_lifetime) : 0) +
++			sizeof(struct sadb_address)*2 + 
++				sockaddr_size*2 +
++					sizeof(struct sadb_x_sa2);
++	/* identity & sensitivity */
++
++	if ((x->props.family == AF_INET &&
++	     x->sel.saddr.a4 != x->props.saddr.a4)
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	    || (x->props.family == AF_INET6 &&
++		memcmp (x->sel.saddr.a6, x->props.saddr.a6, sizeof (struct in6_addr)))
++#endif
++		)
++		size += sizeof(struct sadb_address) + sockaddr_size;
++
++	if (add_keys) {
++		if (x->aalg && x->aalg->alg_key_len) {
++			auth_key_size = 
++				PFKEY_ALIGN8((x->aalg->alg_key_len + 7) / 8); 
++			size += sizeof(struct sadb_key) + auth_key_size;
++		}
++		if (x->ealg && x->ealg->alg_key_len) {
++			encrypt_key_size = 
++				PFKEY_ALIGN8((x->ealg->alg_key_len+7) / 8); 
++			size += sizeof(struct sadb_key) + encrypt_key_size;
++		}
++	}
++	if (x->encap)
++		natt = x->encap;
++
++	if (natt && natt->encap_type) {
++		size += sizeof(struct sadb_x_nat_t_type);
++		size += sizeof(struct sadb_x_nat_t_port);
++		size += sizeof(struct sadb_x_nat_t_port);
++	}
++
++	skb =  alloc_skb(size + 16, GFP_ATOMIC);
++	if (skb == NULL)
++		return ERR_PTR(-ENOBUFS);
++
++	/* call should fill header later */
++	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
++	memset(hdr, 0, size);	/* XXX do we need this ? */
++	hdr->sadb_msg_len = size / sizeof(uint64_t);
++
++	/* sa */
++	sa = (struct sadb_sa *)  skb_put(skb, sizeof(struct sadb_sa));
++	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
++	sa->sadb_sa_exttype = SADB_EXT_SA;
++	sa->sadb_sa_spi = x->id.spi;
++	sa->sadb_sa_replay = x->props.replay_window;
++	sa->sadb_sa_state = SADB_SASTATE_DYING;
++	if (x->km.state == XFRM_STATE_VALID && !x->km.dying)
++		sa->sadb_sa_state = SADB_SASTATE_MATURE;
++	else if (x->km.state == XFRM_STATE_ACQ)
++		sa->sadb_sa_state = SADB_SASTATE_LARVAL;
++	else if (x->km.state == XFRM_STATE_EXPIRED)
++		sa->sadb_sa_state = SADB_SASTATE_DEAD;
++	sa->sadb_sa_auth = 0;
++	if (x->aalg) {
++		struct xfrm_algo_desc *a = xfrm_aalg_get_byname(x->aalg->alg_name);
++		sa->sadb_sa_auth = a ? a->desc.sadb_alg_id : 0;
++	}
++	sa->sadb_sa_encrypt = 0;
++	BUG_ON(x->ealg && x->calg);
++	if (x->ealg) {
++		struct xfrm_algo_desc *a = xfrm_ealg_get_byname(x->ealg->alg_name);
++		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
++	}
++	/* KAME compatible: sadb_sa_encrypt is overloaded with calg id */
++	if (x->calg) {
++		struct xfrm_algo_desc *a = xfrm_calg_get_byname(x->calg->alg_name);
++		sa->sadb_sa_encrypt = a ? a->desc.sadb_alg_id : 0;
++	}
++
++	sa->sadb_sa_flags = 0;
++	if (x->props.flags & XFRM_STATE_NOECN)
++		sa->sadb_sa_flags |= SADB_SAFLAGS_NOECN;
++
++	/* hard time */
++	if (hsc & 2) {
++		lifetime = (struct sadb_lifetime *)  skb_put(skb, 
++							     sizeof(struct sadb_lifetime));
++		lifetime->sadb_lifetime_len =
++			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
++		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
++		lifetime->sadb_lifetime_allocations =  _X2KEY(x->lft.hard_packet_limit);
++		lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.hard_byte_limit);
++		lifetime->sadb_lifetime_addtime = x->lft.hard_add_expires_seconds;
++		lifetime->sadb_lifetime_usetime = x->lft.hard_use_expires_seconds;
++	}
++	/* soft time */
++	if (hsc & 1) {
++		lifetime = (struct sadb_lifetime *)  skb_put(skb, 
++							     sizeof(struct sadb_lifetime));
++		lifetime->sadb_lifetime_len =
++			sizeof(struct sadb_lifetime)/sizeof(uint64_t);
++		lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
++		lifetime->sadb_lifetime_allocations =  _X2KEY(x->lft.soft_packet_limit);
++		lifetime->sadb_lifetime_bytes = _X2KEY(x->lft.soft_byte_limit);
++		lifetime->sadb_lifetime_addtime = x->lft.soft_add_expires_seconds;
++		lifetime->sadb_lifetime_usetime = x->lft.soft_use_expires_seconds;
++	}
++	/* current time */
++	lifetime = (struct sadb_lifetime *)  skb_put(skb,
++						     sizeof(struct sadb_lifetime));
++	lifetime->sadb_lifetime_len =
++		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
++	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
++	lifetime->sadb_lifetime_allocations = x->curlft.packets;
++	lifetime->sadb_lifetime_bytes = x->curlft.bytes;
++	lifetime->sadb_lifetime_addtime = x->curlft.add_time;
++	lifetime->sadb_lifetime_usetime = x->curlft.use_time;
++	/* src address */
++	addr = (struct sadb_address*) skb_put(skb, 
++					      sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len = 
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
++	/* "if the ports are non-zero, then the sadb_address_proto field, 
++	   normally zero, MUST be filled in with the transport 
++	   protocol's number." - RFC2367 */
++	addr->sadb_address_proto = 0; 
++	addr->sadb_address_reserved = 0;
++	if (x->props.family == AF_INET) {
++		addr->sadb_address_prefixlen = 32;
++
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = x->props.saddr.a4;
++		sin->sin_port = 0;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (x->props.family == AF_INET6) {
++ 		addr->sadb_address_prefixlen = 128;
++
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = 0;
++		sin6->sin6_flowinfo = 0;
++ 		memcpy(&sin6->sin6_addr, x->props.saddr.a6,
++		       sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++ 	}
++#endif
++	else
++		BUG();
++
++	/* dst address */
++	addr = (struct sadb_address*) skb_put(skb, 
++					      sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len = 
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
++	addr->sadb_address_proto = 0; 
++	addr->sadb_address_prefixlen = 32; /* XXX */ 
++	addr->sadb_address_reserved = 0;
++	if (x->props.family == AF_INET) {
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = x->id.daddr.a4;
++		sin->sin_port = 0;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++
++		if (x->sel.saddr.a4 != x->props.saddr.a4) {
++			addr = (struct sadb_address*) skb_put(skb, 
++				sizeof(struct sadb_address)+sockaddr_size);
++			addr->sadb_address_len = 
++				(sizeof(struct sadb_address)+sockaddr_size)/
++				sizeof(uint64_t);
++			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
++			addr->sadb_address_proto =
++				pfkey_proto_from_xfrm(x->sel.proto);
++			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
++			addr->sadb_address_reserved = 0;
++
++			sin = (struct sockaddr_in *) (addr + 1);
++			sin->sin_family = AF_INET;
++			sin->sin_addr.s_addr = x->sel.saddr.a4;
++			sin->sin_port = x->sel.sport;
++			memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++		}
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (x->props.family == AF_INET6) {
++		addr->sadb_address_prefixlen = 128;
++
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = 0;
++		sin6->sin6_flowinfo = 0;
++		memcpy(&sin6->sin6_addr, x->id.daddr.a6, sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++
++		if (memcmp (x->sel.saddr.a6, x->props.saddr.a6,
++			    sizeof(struct in6_addr))) {
++			addr = (struct sadb_address *) skb_put(skb, 
++				sizeof(struct sadb_address)+sockaddr_size);
++			addr->sadb_address_len = 
++				(sizeof(struct sadb_address)+sockaddr_size)/
++				sizeof(uint64_t);
++			addr->sadb_address_exttype = SADB_EXT_ADDRESS_PROXY;
++			addr->sadb_address_proto =
++				pfkey_proto_from_xfrm(x->sel.proto);
++			addr->sadb_address_prefixlen = x->sel.prefixlen_s;
++			addr->sadb_address_reserved = 0;
++
++			sin6 = (struct sockaddr_in6 *) (addr + 1);
++			sin6->sin6_family = AF_INET6;
++			sin6->sin6_port = x->sel.sport;
++			sin6->sin6_flowinfo = 0;
++			memcpy(&sin6->sin6_addr, x->sel.saddr.a6,
++			       sizeof(struct in6_addr));
++			sin6->sin6_scope_id = 0;
++		}
++	}
++#endif
++	else
++		BUG();
++
++	/* auth key */
++	if (add_keys && auth_key_size) {
++		key = (struct sadb_key *) skb_put(skb, 
++						  sizeof(struct sadb_key)+auth_key_size);
++		key->sadb_key_len = (sizeof(struct sadb_key) + auth_key_size) /
++			sizeof(uint64_t);
++		key->sadb_key_exttype = SADB_EXT_KEY_AUTH;
++		key->sadb_key_bits = x->aalg->alg_key_len;
++		key->sadb_key_reserved = 0;
++		memcpy(key + 1, x->aalg->alg_key, (x->aalg->alg_key_len+7)/8);
++	}
++	/* encrypt key */
++	if (add_keys && encrypt_key_size) {
++		key = (struct sadb_key *) skb_put(skb, 
++						  sizeof(struct sadb_key)+encrypt_key_size);
++		key->sadb_key_len = (sizeof(struct sadb_key) + 
++				     encrypt_key_size) / sizeof(uint64_t);
++		key->sadb_key_exttype = SADB_EXT_KEY_ENCRYPT;
++		key->sadb_key_bits = x->ealg->alg_key_len;
++		key->sadb_key_reserved = 0;
++		memcpy(key + 1, x->ealg->alg_key, 
++		       (x->ealg->alg_key_len+7)/8);
++	}
++
++	/* sa */
++	sa2 = (struct sadb_x_sa2 *)  skb_put(skb, sizeof(struct sadb_x_sa2));
++	sa2->sadb_x_sa2_len = sizeof(struct sadb_x_sa2)/sizeof(uint64_t);
++	sa2->sadb_x_sa2_exttype = SADB_X_EXT_SA2;
++	sa2->sadb_x_sa2_mode = x->props.mode + 1;
++	sa2->sadb_x_sa2_reserved1 = 0;
++	sa2->sadb_x_sa2_reserved2 = 0;
++	sa2->sadb_x_sa2_sequence = 0;
++	sa2->sadb_x_sa2_reqid = x->props.reqid;
++
++	if (natt && natt->encap_type) {
++		struct sadb_x_nat_t_type *n_type;
++		struct sadb_x_nat_t_port *n_port;
++
++		/* type */
++		n_type = (struct sadb_x_nat_t_type*) skb_put(skb, sizeof(*n_type));
++		n_type->sadb_x_nat_t_type_len = sizeof(*n_type)/sizeof(uint64_t);
++		n_type->sadb_x_nat_t_type_exttype = SADB_X_EXT_NAT_T_TYPE;
++		n_type->sadb_x_nat_t_type_type = natt->encap_type;
++		n_type->sadb_x_nat_t_type_reserved[0] = 0;
++		n_type->sadb_x_nat_t_type_reserved[1] = 0;
++		n_type->sadb_x_nat_t_type_reserved[2] = 0;
++
++		/* source port */
++		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
++		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
++		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
++		n_port->sadb_x_nat_t_port_port = natt->encap_sport;
++		n_port->sadb_x_nat_t_port_reserved = 0;
++
++		/* dest port */
++		n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
++		n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
++		n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
++		n_port->sadb_x_nat_t_port_port = natt->encap_dport;
++		n_port->sadb_x_nat_t_port_reserved = 0;
++	}
++
++	return skb;
++}
++
++static struct xfrm_state * pfkey_msg2xfrm_state(struct sadb_msg *hdr, 
++						void **ext_hdrs)
++{
++	struct xfrm_state *x; 
++	struct sadb_lifetime *lifetime;
++	struct sadb_sa *sa;
++	struct sadb_key *key;
++	uint16_t proto;
++	int err;
++	
++
++	sa = (struct sadb_sa *) ext_hdrs[SADB_EXT_SA-1];
++	if (!sa ||
++	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
++				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
++		return ERR_PTR(-EINVAL);
++	if (hdr->sadb_msg_satype == SADB_SATYPE_ESP &&
++	    !ext_hdrs[SADB_EXT_KEY_ENCRYPT-1])
++		return ERR_PTR(-EINVAL);
++	if (hdr->sadb_msg_satype == SADB_SATYPE_AH &&
++	    !ext_hdrs[SADB_EXT_KEY_AUTH-1])
++		return ERR_PTR(-EINVAL);
++	if (!!ext_hdrs[SADB_EXT_LIFETIME_HARD-1] !=
++	    !!ext_hdrs[SADB_EXT_LIFETIME_SOFT-1])
++		return ERR_PTR(-EINVAL);
++
++	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
++	if (proto == 0)
++		return ERR_PTR(-EINVAL);
++
++	/* default error is no buffer space */
++	err = -ENOBUFS;
++
++	/* RFC2367:
++
++   Only SADB_SASTATE_MATURE SAs may be submitted in an SADB_ADD message.
++   SADB_SASTATE_LARVAL SAs are created by SADB_GETSPI and it is not
++   sensible to add a new SA in the DYING or SADB_SASTATE_DEAD state.
++   Therefore, the sadb_sa_state field of all submitted SAs MUST be
++   SADB_SASTATE_MATURE and the kernel MUST return an error if this is
++   not true.
++
++           However, KAME setkey always uses SADB_SASTATE_LARVAL.
++	   Hence, we have to _ignore_ sadb_sa_state, which is also reasonable.
++	 */
++	if (sa->sadb_sa_auth > SADB_AALG_MAX ||
++	    (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP &&
++	     sa->sadb_sa_encrypt > SADB_X_CALG_MAX) ||
++	    sa->sadb_sa_encrypt > SADB_EALG_MAX)
++		return ERR_PTR(-EINVAL);
++	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
++	if (key != NULL &&
++	    sa->sadb_sa_auth != SADB_X_AALG_NULL &&
++	    ((key->sadb_key_bits+7) / 8 == 0 ||
++	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
++		return ERR_PTR(-EINVAL);
++	key = ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
++	if (key != NULL &&
++	    sa->sadb_sa_encrypt != SADB_EALG_NULL &&
++	    ((key->sadb_key_bits+7) / 8 == 0 ||
++	     (key->sadb_key_bits+7) / 8 > key->sadb_key_len * sizeof(uint64_t)))
++		return ERR_PTR(-EINVAL);
++
++	x = xfrm_state_alloc();
++	if (x == NULL)
++		return ERR_PTR(-ENOBUFS);
++
++	x->id.proto = proto;
++	x->id.spi = sa->sadb_sa_spi;
++	x->props.replay_window = sa->sadb_sa_replay;
++	if (sa->sadb_sa_flags & SADB_SAFLAGS_NOECN)
++		x->props.flags |= XFRM_STATE_NOECN;
++
++	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_HARD-1];
++	if (lifetime != NULL) {
++		x->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
++		x->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
++		x->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
++		x->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
++	}
++	lifetime = (struct sadb_lifetime*) ext_hdrs[SADB_EXT_LIFETIME_SOFT-1];
++	if (lifetime != NULL) {
++		x->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
++		x->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
++		x->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
++		x->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
++	}
++	key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_AUTH-1];
++	if (sa->sadb_sa_auth) {
++		int keysize = 0;
++		struct xfrm_algo_desc *a = xfrm_aalg_get_byid(sa->sadb_sa_auth);
++		if (!a) {
++			err = -ENOSYS;
++			goto out;
++		}
++		if (key)
++			keysize = (key->sadb_key_bits + 7) / 8;
++		x->aalg = kmalloc(sizeof(*x->aalg) + keysize, GFP_KERNEL);
++		if (!x->aalg)
++			goto out;
++		strcpy(x->aalg->alg_name, a->name);
++		x->aalg->alg_key_len = 0;
++		if (key) {
++			x->aalg->alg_key_len = key->sadb_key_bits;
++			memcpy(x->aalg->alg_key, key+1, keysize);
++		}
++		x->props.aalgo = sa->sadb_sa_auth;
++		/* x->algo.flags = sa->sadb_sa_flags; */
++	}
++	if (sa->sadb_sa_encrypt) {
++		if (hdr->sadb_msg_satype == SADB_X_SATYPE_IPCOMP) {
++			struct xfrm_algo_desc *a = xfrm_calg_get_byid(sa->sadb_sa_encrypt);
++			if (!a) {
++				err = -ENOSYS;
++				goto out;
++			}
++			x->calg = kmalloc(sizeof(*x->calg), GFP_KERNEL);
++			if (!x->calg)
++				goto out;
++			strcpy(x->calg->alg_name, a->name);
++			x->props.calgo = sa->sadb_sa_encrypt;
++		} else {
++			int keysize = 0;
++			struct xfrm_algo_desc *a = xfrm_ealg_get_byid(sa->sadb_sa_encrypt);
++			if (!a) {
++				err = -ENOSYS;
++				goto out;
++			}
++			key = (struct sadb_key*) ext_hdrs[SADB_EXT_KEY_ENCRYPT-1];
++			if (key)
++				keysize = (key->sadb_key_bits + 7) / 8;
++			x->ealg = kmalloc(sizeof(*x->ealg) + keysize, GFP_KERNEL);
++			if (!x->ealg)
++				goto out;
++			strcpy(x->ealg->alg_name, a->name);
++			x->ealg->alg_key_len = 0;
++			if (key) {
++				x->ealg->alg_key_len = key->sadb_key_bits;
++				memcpy(x->ealg->alg_key, key+1, keysize);
++			}
++			x->props.ealgo = sa->sadb_sa_encrypt;
++		}
++	}
++	/* x->algo.flags = sa->sadb_sa_flags; */
++
++	x->props.family = pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
++						    &x->props.saddr);
++	if (!x->props.family) {
++		err = -EAFNOSUPPORT;
++		goto out;
++	}
++	pfkey_sadb_addr2xfrm_addr((struct sadb_address *) ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
++				  &x->id.daddr);
++
++	if (ext_hdrs[SADB_X_EXT_SA2-1]) {
++		struct sadb_x_sa2 *sa2 = (void*)ext_hdrs[SADB_X_EXT_SA2-1];
++		x->props.mode = sa2->sadb_x_sa2_mode;
++		if (x->props.mode)
++			x->props.mode--;
++		x->props.reqid = sa2->sadb_x_sa2_reqid;
++	}
++
++	if (ext_hdrs[SADB_EXT_ADDRESS_PROXY-1]) {
++		struct sadb_address *addr = ext_hdrs[SADB_EXT_ADDRESS_PROXY-1];
++
++		/* Nobody uses this, but we try. */
++		x->sel.family = pfkey_sadb_addr2xfrm_addr(addr, &x->sel.saddr);
++		x->sel.prefixlen_s = addr->sadb_address_prefixlen;
++	}
++
++	if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
++		struct sadb_x_nat_t_type* n_type;
++		struct xfrm_encap_tmpl *natt;
++
++		x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
++		if (!x->encap)
++			goto out;
++
++		natt = x->encap;
++		n_type = ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1];
++		natt->encap_type = n_type->sadb_x_nat_t_type_type;
++
++		if (ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1]) {
++			struct sadb_x_nat_t_port* n_port =
++				ext_hdrs[SADB_X_EXT_NAT_T_SPORT-1];
++			natt->encap_sport = n_port->sadb_x_nat_t_port_port;
++		}
++		if (ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1]) {
++			struct sadb_x_nat_t_port* n_port =
++				ext_hdrs[SADB_X_EXT_NAT_T_DPORT-1];
++			natt->encap_dport = n_port->sadb_x_nat_t_port_port;
++		}
++	}
++
++	x->type = xfrm_get_type(proto, x->props.family);
++	if (x->type == NULL) {
++		err = -ENOPROTOOPT;
++		goto out;
++	}
++	if (x->type->init_state(x, NULL)) {
++		err = -EINVAL;
++		goto out;
++	}
++	x->km.seq = hdr->sadb_msg_seq;
++	x->km.state = XFRM_STATE_VALID;
++	return x;
++
++out:
++	x->km.state = XFRM_STATE_DEAD;
++	xfrm_state_put(x);
++	return ERR_PTR(err);
++}
++
++static int pfkey_reserved(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	return -EOPNOTSUPP;
++}
++
++static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct sk_buff *resp_skb;
++	struct sadb_x_sa2 *sa2;
++	struct sadb_address *saddr, *daddr;
++	struct sadb_msg *out_hdr;
++	struct xfrm_state *x = NULL;
++	u8 mode;
++	u32 reqid;
++	u8 proto;
++	unsigned short family;
++	xfrm_address_t *xsaddr = NULL, *xdaddr = NULL;
++
++	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
++				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
++		return -EINVAL;
++
++	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
++	if (proto == 0)
++		return -EINVAL;
++
++	if ((sa2 = ext_hdrs[SADB_X_EXT_SA2-1]) != NULL) {
++		mode = sa2->sadb_x_sa2_mode - 1;
++		reqid = sa2->sadb_x_sa2_reqid;
++	} else {
++		mode = 0;
++		reqid = 0;
++	}
++
++	saddr = ext_hdrs[SADB_EXT_ADDRESS_SRC-1];
++	daddr = ext_hdrs[SADB_EXT_ADDRESS_DST-1];
++
++	family = ((struct sockaddr *)(saddr + 1))->sa_family;
++	switch (family) {
++	case AF_INET:
++		xdaddr = (xfrm_address_t *)&((struct sockaddr_in *)(daddr + 1))->sin_addr.s_addr;
++		xsaddr = (xfrm_address_t *)&((struct sockaddr_in *)(saddr + 1))->sin_addr.s_addr;
++		break;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case AF_INET6:
++		xdaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(daddr + 1))->sin6_addr;
++		xsaddr = (xfrm_address_t *)&((struct sockaddr_in6 *)(saddr + 1))->sin6_addr;
++		break;
++#endif
++	}
++
++	if (hdr->sadb_msg_seq) {
++		x = xfrm_find_acq_byseq(hdr->sadb_msg_seq);
++		if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) {
++			xfrm_state_put(x);
++			x = NULL;
++		}
++	}
++
++	if (!x)
++		x = xfrm_find_acq(mode, reqid, proto, xdaddr, xsaddr, 1, family);
++
++	if (x == NULL)
++		return -ENOENT;
++
++	resp_skb = ERR_PTR(-ENOENT);
++
++	spin_lock_bh(&x->lock);
++	if (x->km.state != XFRM_STATE_DEAD) {
++		struct sadb_spirange *range = ext_hdrs[SADB_EXT_SPIRANGE-1];
++		u32 min_spi, max_spi;
++
++		if (range != NULL) {
++			min_spi = range->sadb_spirange_min;
++			max_spi = range->sadb_spirange_max;
++		} else {
++			min_spi = htonl(0x100);
++			max_spi = htonl(0x0fffffff);
++		}
++		xfrm_alloc_spi(x, min_spi, max_spi);
++		if (x->id.spi)
++			resp_skb = pfkey_xfrm_state2msg(x, 0, 3);
++	}
++	spin_unlock_bh(&x->lock);
++
++	if (IS_ERR(resp_skb)) {
++		xfrm_state_put(x);
++		return  PTR_ERR(resp_skb);
++	}
++
++	out_hdr = (struct sadb_msg *) resp_skb->data;
++	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = SADB_GETSPI;
++	out_hdr->sadb_msg_satype = pfkey_proto2satype(proto);
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_reserved = 0;
++	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
++	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
++
++	xfrm_state_put(x);
++
++	pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk);
++
++	return 0;
++}
++
++static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct xfrm_state *x;
++
++	if (hdr->sadb_msg_len != sizeof(struct sadb_msg)/8)
++		return -EOPNOTSUPP;
++
++	if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0)
++		return 0;
++
++	x = xfrm_find_acq_byseq(hdr->sadb_msg_seq);
++	if (x == NULL)
++		return 0;
++
++	spin_lock_bh(&x->lock);
++	if (x->km.state == XFRM_STATE_ACQ) {
++		x->km.state = XFRM_STATE_ERROR;
++		wake_up(&km_waitq);
++	}
++	spin_unlock_bh(&x->lock);
++	xfrm_state_put(x);
++	return 0;
++}
++
++
++static int pfkey_add(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++	struct xfrm_state *x;
++	int err;
++
++	xfrm_probe_algs();
++	
++	x = pfkey_msg2xfrm_state(hdr, ext_hdrs);
++	if (IS_ERR(x))
++		return PTR_ERR(x);
++
++	if (hdr->sadb_msg_type == SADB_ADD)
++		err = xfrm_state_add(x);
++	else
++		err = xfrm_state_update(x);
++
++	if (err < 0) {
++		x->km.state = XFRM_STATE_DEAD;
++		xfrm_state_put(x);
++		return err;
++	}
++
++	out_skb = pfkey_xfrm_state2msg(x, 0, 3);
++	if (IS_ERR(out_skb))
++		return  PTR_ERR(out_skb); /* XXX Should we return 0 here ? */
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
++	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_reserved = 0;
++	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
++	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
++
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
++
++	return 0;
++}
++
++static int pfkey_delete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct xfrm_state *x;
++
++	if (!ext_hdrs[SADB_EXT_SA-1] ||
++	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
++				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
++		return -EINVAL;
++
++	x = pfkey_xfrm_state_lookup(hdr, ext_hdrs);
++	if (x == NULL)
++		return -ESRCH;
++
++	if (xfrm_state_kern(x)) {
++		xfrm_state_put(x);
++		return -EPERM;
++	}
++	
++	xfrm_state_delete(x);
++	xfrm_state_put(x);
++
++	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, 
++			BROADCAST_ALL, sk);
++
++	return 0;
++}
++
++static int pfkey_get(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	__u8 proto;
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++	struct xfrm_state *x;
++
++	if (!ext_hdrs[SADB_EXT_SA-1] ||
++	    !present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
++				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]))
++		return -EINVAL;
++
++	x = pfkey_xfrm_state_lookup(hdr, ext_hdrs);
++	if (x == NULL)
++		return -ESRCH;
++
++	out_skb = pfkey_xfrm_state2msg(x, 1, 3);
++	proto = x->id.proto;
++	xfrm_state_put(x);
++	if (IS_ERR(out_skb))
++		return  PTR_ERR(out_skb);
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = SADB_DUMP;
++	out_hdr->sadb_msg_satype = pfkey_proto2satype(proto);
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_reserved = 0;
++	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
++	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk);
++
++	return 0;
++}
++
++static struct sk_buff *compose_sadb_supported(struct sadb_msg *orig, int allocation)
++{
++	struct sk_buff *skb;
++	struct sadb_msg *hdr;
++	int len, auth_len, enc_len, i;
++
++	auth_len = xfrm_count_auth_supported();
++	if (auth_len) {
++		auth_len *= sizeof(struct sadb_alg);
++		auth_len += sizeof(struct sadb_supported);
++	}
++	
++	enc_len = xfrm_count_enc_supported();
++	if (enc_len) {
++		enc_len *= sizeof(struct sadb_alg);
++		enc_len += sizeof(struct sadb_supported);
++	}
++	
++	len = enc_len + auth_len + sizeof(struct sadb_msg);
++
++	skb = alloc_skb(len + 16, allocation);
++	if (!skb)
++		goto out_put_algs;
++
++	hdr = (struct sadb_msg *) skb_put(skb, sizeof(*hdr));
++	pfkey_hdr_dup(hdr, orig);
++	hdr->sadb_msg_errno = 0;
++	hdr->sadb_msg_len = len / sizeof(uint64_t);
++
++	if (auth_len) {
++		struct sadb_supported *sp;
++		struct sadb_alg *ap;
++
++		sp = (struct sadb_supported *) skb_put(skb, auth_len);
++		ap = (struct sadb_alg *) (sp + 1);
++
++		sp->sadb_supported_len = auth_len / sizeof(uint64_t);
++		sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_AUTH;
++
++		for (i = 0; ; i++) {
++			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
++			if (!aalg)
++				break;
++			if (aalg->available)
++				*ap++ = aalg->desc;
++		}
++	}
++
++	if (enc_len) {
++		struct sadb_supported *sp;
++		struct sadb_alg *ap;
++
++		sp = (struct sadb_supported *) skb_put(skb, enc_len);
++		ap = (struct sadb_alg *) (sp + 1);
++
++		sp->sadb_supported_len = enc_len / sizeof(uint64_t);
++		sp->sadb_supported_exttype = SADB_EXT_SUPPORTED_ENCRYPT;
++
++		for (i = 0; ; i++) {
++			struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
++			if (!ealg)
++				break;
++			if (ealg->available)
++				*ap++ = ealg->desc;
++		}
++	}
++
++out_put_algs:
++	return skb;
++}
++
++static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct pfkey_opt *pfk = pfkey_sk(sk);
++	struct sk_buff *supp_skb;
++
++	if (hdr->sadb_msg_satype > SADB_SATYPE_MAX)
++		return -EINVAL;
++
++	if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC) {
++		if (pfk->registered&(1<<hdr->sadb_msg_satype))
++			return -EEXIST;
++		pfk->registered |= (1<<hdr->sadb_msg_satype);
++	}
++
++	xfrm_probe_algs();
++	
++	supp_skb = compose_sadb_supported(hdr, GFP_KERNEL);
++	if (!supp_skb) {
++		if (hdr->sadb_msg_satype != SADB_SATYPE_UNSPEC)
++			pfk->registered &= ~(1<<hdr->sadb_msg_satype);
++
++		return -ENOBUFS;
++	}
++
++	pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk);
++
++	return 0;
++}
++
++static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	unsigned proto;
++	struct sk_buff *skb_out;
++	struct sadb_msg *hdr_out;
++
++	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
++	if (proto == 0)
++		return -EINVAL;
++
++	skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
++	if (!skb_out)
++		return -ENOBUFS;
++
++	xfrm_state_flush(proto);
++
++	hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
++	pfkey_hdr_dup(hdr_out, hdr);
++	hdr_out->sadb_msg_errno = (uint8_t) 0;
++	hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
++
++	pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
++
++	return 0;
++}
++
++struct pfkey_dump_data
++{
++	struct sk_buff *skb;
++	struct sadb_msg *hdr;
++	struct sock *sk;
++};
++
++static int dump_sa(struct xfrm_state *x, int count, void *ptr)
++{
++	struct pfkey_dump_data *data = ptr;
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++
++	out_skb = pfkey_xfrm_state2msg(x, 1, 3);
++	if (IS_ERR(out_skb))
++		return PTR_ERR(out_skb);
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = SADB_DUMP;
++	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_reserved = 0;
++	out_hdr->sadb_msg_seq = count;
++	out_hdr->sadb_msg_pid = data->hdr->sadb_msg_pid;
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, data->sk);
++	return 0;
++}
++
++static int pfkey_dump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	u8 proto;
++	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
++
++	proto = pfkey_satype2proto(hdr->sadb_msg_satype);
++	if (proto == 0)
++		return -EINVAL;
++
++	return xfrm_state_walk(proto, dump_sa, &data);
++}
++
++static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct pfkey_opt *pfk = pfkey_sk(sk);
++	int satype = hdr->sadb_msg_satype;
++
++	if (hdr->sadb_msg_len == (sizeof(*hdr) / sizeof(uint64_t))) {
++		/* XXX we mangle packet... */
++		hdr->sadb_msg_errno = 0;
++		if (satype != 0 && satype != 1)
++			return -EINVAL;
++		pfk->promisc = satype;
++	}
++	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_ALL, NULL);
++	return 0;
++}
++
++static int check_reqid(struct xfrm_policy *xp, int dir, int count, void *ptr)
++{
++	int i;
++	u32 reqid = *(u32*)ptr;
++
++	for (i=0; i<xp->xfrm_nr; i++) {
++		if (xp->xfrm_vec[i].reqid == reqid)
++			return -EEXIST;
++	}
++	return 0;
++}
++
++static u32 gen_reqid(void)
++{
++	u32 start;
++	static u32 reqid = IPSEC_MANUAL_REQID_MAX;
++
++	start = reqid;
++	do {
++		++reqid;
++		if (reqid == 0)
++			reqid = IPSEC_MANUAL_REQID_MAX+1;
++		if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST)
++			return reqid;
++	} while (reqid != start);
++	return 0;
++}
++
++static int
++parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq)
++{
++	struct xfrm_tmpl *t = xp->xfrm_vec + xp->xfrm_nr;
++	struct sockaddr_in *sin;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct sockaddr_in6 *sin6;
++#endif
++
++	if (xp->xfrm_nr >= XFRM_MAX_DEPTH)
++		return -ELOOP;
++
++	if (rq->sadb_x_ipsecrequest_mode == 0)
++		return -EINVAL;
++
++	t->id.proto = rq->sadb_x_ipsecrequest_proto; /* XXX check proto */
++	t->mode = rq->sadb_x_ipsecrequest_mode-1;
++	if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_USE)
++		t->optional = 1;
++	else if (rq->sadb_x_ipsecrequest_level == IPSEC_LEVEL_UNIQUE) {
++		t->reqid = rq->sadb_x_ipsecrequest_reqid;
++		if (t->reqid > IPSEC_MANUAL_REQID_MAX)
++			t->reqid = 0;
++		if (!t->reqid && !(t->reqid = gen_reqid()))
++			return -ENOBUFS;
++	}
++
++	/* addresses present only in tunnel mode */
++	if (t->mode) {
++		switch (xp->family) {
++		case AF_INET:
++			sin = (void*)(rq+1);
++			if (sin->sin_family != AF_INET)
++				return -EINVAL;
++			t->saddr.a4 = sin->sin_addr.s_addr;
++			sin++;
++			if (sin->sin_family != AF_INET)
++				return -EINVAL;
++			t->id.daddr.a4 = sin->sin_addr.s_addr;
++			break;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++		case AF_INET6:
++			sin6 = (void *)(rq+1);
++			if (sin6->sin6_family != AF_INET6)
++				return -EINVAL;
++			memcpy(t->saddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
++			sin6++;
++			if (sin6->sin6_family != AF_INET6)
++				return -EINVAL;
++			memcpy(t->id.daddr.a6, &sin6->sin6_addr, sizeof(struct in6_addr));
++			break;
++#endif
++		default:
++			return -EINVAL;
++		}
++	}
++	/* No way to set this via kame pfkey */
++	t->aalgos = t->ealgos = t->calgos = ~0;
++	xp->xfrm_nr++;
++	return 0;
++}
++
++static int
++parse_ipsecrequests(struct xfrm_policy *xp, struct sadb_x_policy *pol)
++{
++	int err;
++	int len = pol->sadb_x_policy_len*8 - sizeof(struct sadb_x_policy);
++	struct sadb_x_ipsecrequest *rq = (void*)(pol+1);
++
++	while (len >= sizeof(struct sadb_x_ipsecrequest)) {
++		if ((err = parse_ipsecrequest(xp, rq)) < 0)
++			return err;
++		len -= rq->sadb_x_ipsecrequest_len;
++		rq = (void*)((u8*)rq + rq->sadb_x_ipsecrequest_len);
++	}
++	return 0;
++}
++
++static int pfkey_xfrm_policy2msg_size(struct xfrm_policy *xp)
++{
++	int sockaddr_size = pfkey_sockaddr_size(xp->family);
++	int socklen = (xp->family == AF_INET ?
++		       sizeof(struct sockaddr_in) :
++		       sizeof(struct sockaddr_in6));
++
++	return sizeof(struct sadb_msg) +
++		(sizeof(struct sadb_lifetime) * 3) +
++		(sizeof(struct sadb_address) * 2) + 
++		(sockaddr_size * 2) +
++		sizeof(struct sadb_x_policy) +
++		(xp->xfrm_nr * (sizeof(struct sadb_x_ipsecrequest) +
++				(socklen * 2)));
++}
++
++static struct sk_buff * pfkey_xfrm_policy2msg_prep(struct xfrm_policy *xp)
++{
++	struct sk_buff *skb;
++	int size;
++
++	size = pfkey_xfrm_policy2msg_size(xp);
++
++	skb =  alloc_skb(size + 16, GFP_ATOMIC);
++	if (skb == NULL)
++		return ERR_PTR(-ENOBUFS);
++
++	return skb;
++}
++
++static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, int dir)
++{
++	struct sadb_msg *hdr;
++	struct sadb_address *addr;
++	struct sadb_lifetime *lifetime;
++	struct sadb_x_policy *pol;
++	struct sockaddr_in   *sin;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct sockaddr_in6  *sin6;
++#endif
++	int i;
++	int size;
++	int sockaddr_size = pfkey_sockaddr_size(xp->family);
++	int socklen = (xp->family == AF_INET ?
++		       sizeof(struct sockaddr_in) :
++		       sizeof(struct sockaddr_in6));
++
++	size = pfkey_xfrm_policy2msg_size(xp);
++
++	/* call should fill header later */
++	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
++	memset(hdr, 0, size);	/* XXX do we need this ? */
++
++	/* src address */
++	addr = (struct sadb_address*) skb_put(skb, 
++					      sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len = 
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
++	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
++	addr->sadb_address_prefixlen = xp->selector.prefixlen_s;
++	addr->sadb_address_reserved = 0;
++	/* src address */
++	if (xp->family == AF_INET) {
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = xp->selector.saddr.a4;
++		sin->sin_port = xp->selector.sport;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (xp->family == AF_INET6) {
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = xp->selector.sport;
++		sin6->sin6_flowinfo = 0;
++		memcpy(&sin6->sin6_addr, xp->selector.saddr.a6,
++		       sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++	}
++#endif
++	else
++		BUG();
++
++	/* dst address */
++	addr = (struct sadb_address*) skb_put(skb, 
++					      sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len =
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
++	addr->sadb_address_proto = pfkey_proto_from_xfrm(xp->selector.proto);
++	addr->sadb_address_prefixlen = xp->selector.prefixlen_d; 
++	addr->sadb_address_reserved = 0;
++	if (xp->family == AF_INET) {
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = xp->selector.daddr.a4;
++		sin->sin_port = xp->selector.dport;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (xp->family == AF_INET6) {
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = xp->selector.dport;
++		sin6->sin6_flowinfo = 0;
++		memcpy(&sin6->sin6_addr, xp->selector.daddr.a6,
++		       sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++	}
++#endif
++	else
++		BUG();
++
++	/* hard time */
++	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
++						     sizeof(struct sadb_lifetime));
++	lifetime->sadb_lifetime_len =
++		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
++	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_HARD;
++	lifetime->sadb_lifetime_allocations =  _X2KEY(xp->lft.hard_packet_limit);
++	lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.hard_byte_limit);
++	lifetime->sadb_lifetime_addtime = xp->lft.hard_add_expires_seconds;
++	lifetime->sadb_lifetime_usetime = xp->lft.hard_use_expires_seconds;
++	/* soft time */
++	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
++						     sizeof(struct sadb_lifetime));
++	lifetime->sadb_lifetime_len =
++		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
++	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_SOFT;
++	lifetime->sadb_lifetime_allocations =  _X2KEY(xp->lft.soft_packet_limit);
++	lifetime->sadb_lifetime_bytes = _X2KEY(xp->lft.soft_byte_limit);
++	lifetime->sadb_lifetime_addtime = xp->lft.soft_add_expires_seconds;
++	lifetime->sadb_lifetime_usetime = xp->lft.soft_use_expires_seconds;
++	/* current time */
++	lifetime = (struct sadb_lifetime *)  skb_put(skb, 
++						     sizeof(struct sadb_lifetime));
++	lifetime->sadb_lifetime_len =
++		sizeof(struct sadb_lifetime)/sizeof(uint64_t);
++	lifetime->sadb_lifetime_exttype = SADB_EXT_LIFETIME_CURRENT;
++	lifetime->sadb_lifetime_allocations = xp->curlft.packets;
++	lifetime->sadb_lifetime_bytes = xp->curlft.bytes;
++	lifetime->sadb_lifetime_addtime = xp->curlft.add_time;
++	lifetime->sadb_lifetime_usetime = xp->curlft.use_time;
++
++	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
++	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
++	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
++	pol->sadb_x_policy_type = IPSEC_POLICY_DISCARD;
++	if (xp->action == XFRM_POLICY_ALLOW) {
++		if (xp->xfrm_nr)
++			pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
++		else
++			pol->sadb_x_policy_type = IPSEC_POLICY_NONE;
++	}
++	pol->sadb_x_policy_dir = dir+1;
++	pol->sadb_x_policy_id = xp->index;
++	pol->sadb_x_policy_priority = xp->priority;
++
++	for (i=0; i<xp->xfrm_nr; i++) {
++		struct sadb_x_ipsecrequest *rq;
++		struct xfrm_tmpl *t = xp->xfrm_vec + i;
++		int req_size;
++
++		req_size = sizeof(struct sadb_x_ipsecrequest);
++		if (t->mode)
++			req_size += 2*socklen;
++		else
++			size -= 2*socklen;
++		rq = (void*)skb_put(skb, req_size);
++		pol->sadb_x_policy_len += req_size/8;
++		memset(rq, 0, sizeof(*rq));
++		rq->sadb_x_ipsecrequest_len = req_size;
++		rq->sadb_x_ipsecrequest_proto = t->id.proto;
++		rq->sadb_x_ipsecrequest_mode = t->mode+1;
++		rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_REQUIRE;
++		if (t->reqid)
++			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_UNIQUE;
++		if (t->optional)
++			rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE;
++		rq->sadb_x_ipsecrequest_reqid = t->reqid;
++		if (t->mode) {
++			switch (xp->family) {
++			case AF_INET:
++				sin = (void*)(rq+1);
++				sin->sin_family = AF_INET;
++				sin->sin_addr.s_addr = t->saddr.a4;
++				sin->sin_port = 0;
++				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++				sin++;
++				sin->sin_family = AF_INET;
++				sin->sin_addr.s_addr = t->id.daddr.a4;
++				sin->sin_port = 0;
++				memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++				break;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++			case AF_INET6:
++				sin6 = (void*)(rq+1);
++				sin6->sin6_family = AF_INET6;
++				sin6->sin6_port = 0;
++				sin6->sin6_flowinfo = 0;
++				memcpy(&sin6->sin6_addr, t->saddr.a6,
++				       sizeof(struct in6_addr));
++				sin6->sin6_scope_id = 0;
++
++				sin6++;
++				sin6->sin6_family = AF_INET6;
++				sin6->sin6_port = 0;
++				sin6->sin6_flowinfo = 0;
++				memcpy(&sin6->sin6_addr, t->id.daddr.a6,
++				       sizeof(struct in6_addr));
++				sin6->sin6_scope_id = 0;
++				break;
++#endif
++			default:
++				break;
++			}
++		}
++	}
++	hdr->sadb_msg_len = size / sizeof(uint64_t);
++	hdr->sadb_msg_reserved = atomic_read(&xp->refcnt);
++}
++
++static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	int err;
++	struct sadb_lifetime *lifetime;
++	struct sadb_address *sa;
++	struct sadb_x_policy *pol;
++	struct xfrm_policy *xp;
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++
++	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
++				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
++	    !ext_hdrs[SADB_X_EXT_POLICY-1])
++		return -EINVAL;
++
++	pol = ext_hdrs[SADB_X_EXT_POLICY-1];
++	if (pol->sadb_x_policy_type > IPSEC_POLICY_IPSEC)
++		return -EINVAL;
++	if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX)
++		return -EINVAL;
++
++	xp = xfrm_policy_alloc(GFP_KERNEL);
++	if (xp == NULL)
++		return -ENOBUFS;
++
++	xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ?
++		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
++	xp->priority = pol->sadb_x_policy_priority;
++
++	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
++	xp->family = pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.saddr);
++	if (!xp->family) {
++		err = -EINVAL;
++		goto out;
++	}
++	xp->selector.family = xp->family;
++	xp->selector.prefixlen_s = sa->sadb_address_prefixlen;
++	xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
++	xp->selector.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
++	if (xp->selector.sport)
++		xp->selector.sport_mask = ~0;
++
++	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
++	pfkey_sadb_addr2xfrm_addr(sa, &xp->selector.daddr);
++	xp->selector.prefixlen_d = sa->sadb_address_prefixlen;
++
++	/* Amusing, we set this twice.  KAME apps appear to set same value
++	 * in both addresses.
++	 */
++	xp->selector.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
++
++	xp->selector.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
++	if (xp->selector.dport)
++		xp->selector.dport_mask = ~0;
++
++	xp->lft.soft_byte_limit = XFRM_INF;
++	xp->lft.hard_byte_limit = XFRM_INF;
++	xp->lft.soft_packet_limit = XFRM_INF;
++	xp->lft.hard_packet_limit = XFRM_INF;
++	if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_HARD-1]) != NULL) {
++		xp->lft.hard_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
++		xp->lft.hard_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
++		xp->lft.hard_add_expires_seconds = lifetime->sadb_lifetime_addtime;
++		xp->lft.hard_use_expires_seconds = lifetime->sadb_lifetime_usetime;
++	}
++	if ((lifetime = ext_hdrs[SADB_EXT_LIFETIME_SOFT-1]) != NULL) {
++		xp->lft.soft_packet_limit = _KEY2X(lifetime->sadb_lifetime_allocations);
++		xp->lft.soft_byte_limit = _KEY2X(lifetime->sadb_lifetime_bytes);
++		xp->lft.soft_add_expires_seconds = lifetime->sadb_lifetime_addtime;
++		xp->lft.soft_use_expires_seconds = lifetime->sadb_lifetime_usetime;
++	}
++	xp->xfrm_nr = 0;
++	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
++	    (err = parse_ipsecrequests(xp, pol)) < 0)
++		goto out;
++
++	out_skb = pfkey_xfrm_policy2msg_prep(xp);
++	if (IS_ERR(out_skb)) {
++		err =  PTR_ERR(out_skb);
++		goto out;
++	}
++
++	err = xfrm_policy_insert(pol->sadb_x_policy_dir-1, xp,
++				 hdr->sadb_msg_type != SADB_X_SPDUPDATE);
++	if (err) {
++		kfree_skb(out_skb);
++		goto out;
++	}
++
++	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
++
++	xfrm_pol_put(xp);
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
++	out_hdr->sadb_msg_satype = 0;
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
++	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
++	return 0;
++
++out:
++	kfree(xp);
++	return err;
++}
++
++static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	int err;
++	struct sadb_address *sa;
++	struct sadb_x_policy *pol;
++	struct xfrm_policy *xp;
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++	struct xfrm_selector sel;
++
++	if (!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
++				     ext_hdrs[SADB_EXT_ADDRESS_DST-1]) ||
++	    !ext_hdrs[SADB_X_EXT_POLICY-1])
++		return -EINVAL;
++
++	pol = ext_hdrs[SADB_X_EXT_POLICY-1];
++	if (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir >= IPSEC_DIR_MAX)
++		return -EINVAL;
++
++	memset(&sel, 0, sizeof(sel));
++
++	sa = ext_hdrs[SADB_EXT_ADDRESS_SRC-1], 
++	sel.family = pfkey_sadb_addr2xfrm_addr(sa, &sel.saddr);
++	sel.prefixlen_s = sa->sadb_address_prefixlen;
++	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
++	sel.sport = ((struct sockaddr_in *)(sa+1))->sin_port;
++	if (sel.sport)
++		sel.sport_mask = ~0;
++
++	sa = ext_hdrs[SADB_EXT_ADDRESS_DST-1], 
++	pfkey_sadb_addr2xfrm_addr(sa, &sel.daddr);
++	sel.prefixlen_d = sa->sadb_address_prefixlen;
++	sel.proto = pfkey_proto_to_xfrm(sa->sadb_address_proto);
++	sel.dport = ((struct sockaddr_in *)(sa+1))->sin_port;
++	if (sel.dport)
++		sel.dport_mask = ~0;
++
++	xp = xfrm_policy_bysel(pol->sadb_x_policy_dir-1, &sel, 1);
++	if (xp == NULL)
++		return -ENOENT;
++
++	err = 0;
++
++	out_skb = pfkey_xfrm_policy2msg_prep(xp);
++	if (IS_ERR(out_skb)) {
++		err =  PTR_ERR(out_skb);
++		goto out;
++	}
++	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = SADB_X_SPDDELETE;
++	out_hdr->sadb_msg_satype = 0;
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
++	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
++	err = 0;
++
++out:
++	xfrm_pol_put(xp);
++	return err;
++}
++
++static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	int err;
++	struct sadb_x_policy *pol;
++	struct xfrm_policy *xp;
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++
++	if ((pol = ext_hdrs[SADB_X_EXT_POLICY-1]) == NULL)
++		return -EINVAL;
++
++	xp = xfrm_policy_byid(0, pol->sadb_x_policy_id,
++			      hdr->sadb_msg_type == SADB_X_SPDDELETE2);
++	if (xp == NULL)
++		return -ENOENT;
++
++	err = 0;
++
++	out_skb = pfkey_xfrm_policy2msg_prep(xp);
++	if (IS_ERR(out_skb)) {
++		err =  PTR_ERR(out_skb);
++		goto out;
++	}
++	pfkey_xfrm_policy2msg(out_skb, xp, pol->sadb_x_policy_dir-1);
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = hdr->sadb_msg_type;
++	out_hdr->sadb_msg_satype = 0;
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
++	out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, sk);
++	err = 0;
++
++out:
++	xfrm_pol_put(xp);
++	return err;
++}
++
++static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
++{
++	struct pfkey_dump_data *data = ptr;
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++
++	out_skb = pfkey_xfrm_policy2msg_prep(xp);
++	if (IS_ERR(out_skb))
++		return PTR_ERR(out_skb);
++
++	pfkey_xfrm_policy2msg(out_skb, xp, dir);
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = data->hdr->sadb_msg_version;
++	out_hdr->sadb_msg_type = SADB_X_SPDDUMP;
++	out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_seq = count;
++	out_hdr->sadb_msg_pid = data->hdr->sadb_msg_pid;
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, data->sk);
++	return 0;
++}
++
++static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk };
++
++	return xfrm_policy_walk(dump_sp, &data);
++}
++
++static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr, void **ext_hdrs)
++{
++	struct sk_buff *skb_out;
++	struct sadb_msg *hdr_out;
++
++	skb_out = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_KERNEL);
++	if (!skb_out)
++		return -ENOBUFS;
++
++	xfrm_policy_flush();
++
++	hdr_out = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg));
++	pfkey_hdr_dup(hdr_out, hdr);
++	hdr_out->sadb_msg_errno = (uint8_t) 0;
++	hdr_out->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
++	pfkey_broadcast(skb_out, GFP_KERNEL, BROADCAST_ALL, NULL);
++
++	return 0;
++}
++
++typedef int (*pfkey_handler)(struct sock *sk, struct sk_buff *skb,
++			     struct sadb_msg *hdr, void **ext_hdrs);
++static pfkey_handler pfkey_funcs[SADB_MAX + 1] = {
++	[SADB_RESERVED]		= pfkey_reserved,
++	[SADB_GETSPI]		= pfkey_getspi,
++	[SADB_UPDATE]		= pfkey_add,
++	[SADB_ADD]		= pfkey_add,
++	[SADB_DELETE]		= pfkey_delete,
++	[SADB_GET]		= pfkey_get,
++	[SADB_ACQUIRE]		= pfkey_acquire,
++	[SADB_REGISTER]		= pfkey_register,
++	[SADB_EXPIRE]		= NULL,
++	[SADB_FLUSH]		= pfkey_flush,
++	[SADB_DUMP]		= pfkey_dump,
++	[SADB_X_PROMISC]	= pfkey_promisc,
++	[SADB_X_PCHANGE]	= NULL,
++	[SADB_X_SPDUPDATE]	= pfkey_spdadd,
++	[SADB_X_SPDADD]		= pfkey_spdadd,
++	[SADB_X_SPDDELETE]	= pfkey_spddelete,
++	[SADB_X_SPDGET]		= pfkey_spdget,
++	[SADB_X_SPDACQUIRE]	= NULL,
++	[SADB_X_SPDDUMP]	= pfkey_spddump,
++	[SADB_X_SPDFLUSH]	= pfkey_spdflush,
++	[SADB_X_SPDSETIDX]	= pfkey_spdadd,
++	[SADB_X_SPDDELETE2]	= pfkey_spdget,
++};
++
++static int pfkey_process(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hdr)
++{
++	void *ext_hdrs[SADB_EXT_MAX];
++	int err;
++
++	pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
++			BROADCAST_PROMISC_ONLY, NULL);
++
++	memset(ext_hdrs, 0, sizeof(ext_hdrs));
++	err = parse_exthdrs(skb, hdr, ext_hdrs);
++	if (!err) {
++		err = -EOPNOTSUPP;
++		if (pfkey_funcs[hdr->sadb_msg_type])
++			err = pfkey_funcs[hdr->sadb_msg_type](sk, skb, hdr, ext_hdrs);
++	}
++	return err;
++}
++
++static struct sadb_msg *pfkey_get_base_msg(struct sk_buff *skb, int *errp)
++{
++	struct sadb_msg *hdr = NULL;
++
++	if (skb->len < sizeof(*hdr)) {
++		*errp = -EMSGSIZE;
++	} else {
++		hdr = (struct sadb_msg *) skb->data;
++		if (hdr->sadb_msg_version != PF_KEY_V2 ||
++		    hdr->sadb_msg_reserved != 0 ||
++		    (hdr->sadb_msg_type <= SADB_RESERVED ||
++		     hdr->sadb_msg_type > SADB_MAX)) {
++			hdr = NULL;
++			*errp = -EINVAL;
++		} else if (hdr->sadb_msg_len != (skb->len /
++						 sizeof(uint64_t)) ||
++			   hdr->sadb_msg_len < (sizeof(struct sadb_msg) /
++						sizeof(uint64_t))) {
++			hdr = NULL;
++			*errp = -EMSGSIZE;
++		} else {
++			*errp = 0;
++		}
++	}
++	return hdr;
++}
++
++static inline int aalg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
++{
++	return t->aalgos & (1 << d->desc.sadb_alg_id);
++}
++
++static inline int ealg_tmpl_set(struct xfrm_tmpl *t, struct xfrm_algo_desc *d)
++{
++	return t->ealgos & (1 << d->desc.sadb_alg_id);
++}
++
++static int count_ah_combs(struct xfrm_tmpl *t)
++{
++	int i, sz = 0;
++
++	for (i = 0; ; i++) {
++		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
++		if (!aalg)
++			break;
++		if (aalg_tmpl_set(t, aalg) && aalg->available)
++			sz += sizeof(struct sadb_comb);
++	}
++	return sz + sizeof(struct sadb_prop);
++}
++
++static int count_esp_combs(struct xfrm_tmpl *t)
++{
++	int i, k, sz = 0;
++
++	for (i = 0; ; i++) {
++		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
++		if (!ealg)
++			break;
++			
++		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
++			continue;
++			
++		for (k = 1; ; k++) {
++			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
++			if (!aalg)
++				break;
++				
++			if (aalg_tmpl_set(t, aalg) && aalg->available)
++				sz += sizeof(struct sadb_comb);
++		}
++	}
++	return sz + sizeof(struct sadb_prop);
++}
++
++static void dump_ah_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
++{
++	struct sadb_prop *p;
++	int i;
++
++	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
++	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
++	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
++	p->sadb_prop_replay = 32;
++	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
++
++	for (i = 0; ; i++) {
++		struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(i);
++		if (!aalg)
++			break;
++
++		if (aalg_tmpl_set(t, aalg) && aalg->available) {
++			struct sadb_comb *c;
++			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
++			memset(c, 0, sizeof(*c));
++			p->sadb_prop_len += sizeof(struct sadb_comb)/8;
++			c->sadb_comb_auth = aalg->desc.sadb_alg_id;
++			c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits;
++			c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits;
++			c->sadb_comb_hard_addtime = 24*60*60;
++			c->sadb_comb_soft_addtime = 20*60*60;
++			c->sadb_comb_hard_usetime = 8*60*60;
++			c->sadb_comb_soft_usetime = 7*60*60;
++		}
++	}
++}
++
++static void dump_esp_combs(struct sk_buff *skb, struct xfrm_tmpl *t)
++{
++	struct sadb_prop *p;
++	int i, k;
++
++	p = (struct sadb_prop*)skb_put(skb, sizeof(struct sadb_prop));
++	p->sadb_prop_len = sizeof(struct sadb_prop)/8;
++	p->sadb_prop_exttype = SADB_EXT_PROPOSAL;
++	p->sadb_prop_replay = 32;
++	memset(p->sadb_prop_reserved, 0, sizeof(p->sadb_prop_reserved));
++
++	for (i=0; ; i++) {
++		struct xfrm_algo_desc *ealg = xfrm_ealg_get_byidx(i);
++		if (!ealg)
++			break;
++	
++		if (!(ealg_tmpl_set(t, ealg) && ealg->available))
++			continue;
++			
++		for (k = 1; ; k++) {
++			struct sadb_comb *c;
++			struct xfrm_algo_desc *aalg = xfrm_aalg_get_byidx(k);
++			if (!aalg)
++				break;
++			if (!(aalg_tmpl_set(t, aalg) && aalg->available))
++				continue;
++			c = (struct sadb_comb*)skb_put(skb, sizeof(struct sadb_comb));
++			memset(c, 0, sizeof(*c));
++			p->sadb_prop_len += sizeof(struct sadb_comb)/8;
++			c->sadb_comb_auth = aalg->desc.sadb_alg_id;
++			c->sadb_comb_auth_minbits = aalg->desc.sadb_alg_minbits;
++			c->sadb_comb_auth_maxbits = aalg->desc.sadb_alg_maxbits;
++			c->sadb_comb_encrypt = ealg->desc.sadb_alg_id;
++			c->sadb_comb_encrypt_minbits = ealg->desc.sadb_alg_minbits;
++			c->sadb_comb_encrypt_maxbits = ealg->desc.sadb_alg_maxbits;
++			c->sadb_comb_hard_addtime = 24*60*60;
++			c->sadb_comb_soft_addtime = 20*60*60;
++			c->sadb_comb_hard_usetime = 8*60*60;
++			c->sadb_comb_soft_usetime = 7*60*60;
++		}
++	}
++}
++
++static int pfkey_send_notify(struct xfrm_state *x, int hard)
++{
++	struct sk_buff *out_skb;
++	struct sadb_msg *out_hdr;
++	int hsc = (hard ? 2 : 1);
++
++	out_skb = pfkey_xfrm_state2msg(x, 0, hsc);
++	if (IS_ERR(out_skb))
++		return PTR_ERR(out_skb);
++
++	out_hdr = (struct sadb_msg *) out_skb->data;
++	out_hdr->sadb_msg_version = PF_KEY_V2;
++	out_hdr->sadb_msg_type = SADB_EXPIRE;
++	out_hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
++	out_hdr->sadb_msg_errno = 0;
++	out_hdr->sadb_msg_reserved = 0;
++	out_hdr->sadb_msg_seq = 0;
++	out_hdr->sadb_msg_pid = 0;
++
++	pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
++	return 0;
++}
++
++static u32 get_acqseq(void)
++{
++	u32 res;
++	static u32 acqseq;
++	static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
++
++	spin_lock_bh(&acqseq_lock);
++	res = (++acqseq ? : ++acqseq);
++	spin_unlock_bh(&acqseq_lock);
++	return res;
++}
++
++static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *xp, int dir)
++{
++	struct sk_buff *skb;
++	struct sadb_msg *hdr;
++	struct sadb_address *addr;
++	struct sadb_x_policy *pol;
++	struct sockaddr_in *sin;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct sockaddr_in6 *sin6;
++#endif
++	int sockaddr_size;
++	int size;
++	
++	sockaddr_size = pfkey_sockaddr_size(x->props.family);
++	if (!sockaddr_size)
++		return -EINVAL;
++
++	size = sizeof(struct sadb_msg) +
++		(sizeof(struct sadb_address) * 2) +
++		(sockaddr_size * 2) +
++		sizeof(struct sadb_x_policy);
++	
++	if (x->id.proto == IPPROTO_AH)
++		size += count_ah_combs(t);
++	else if (x->id.proto == IPPROTO_ESP)
++		size += count_esp_combs(t);
++
++	skb =  alloc_skb(size + 16, GFP_ATOMIC);
++	if (skb == NULL)
++		return -ENOMEM;
++	
++	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
++	hdr->sadb_msg_version = PF_KEY_V2;
++	hdr->sadb_msg_type = SADB_ACQUIRE;
++	hdr->sadb_msg_satype = pfkey_proto2satype(x->id.proto);
++	hdr->sadb_msg_len = size / sizeof(uint64_t);
++	hdr->sadb_msg_errno = 0;
++	hdr->sadb_msg_reserved = 0;
++	hdr->sadb_msg_seq = x->km.seq = get_acqseq();
++	hdr->sadb_msg_pid = 0;
++
++	/* src address */
++	addr = (struct sadb_address*) skb_put(skb, 
++					      sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len = 
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
++	addr->sadb_address_proto = 0;
++	addr->sadb_address_reserved = 0;
++	if (x->props.family == AF_INET) {
++		addr->sadb_address_prefixlen = 32;
++
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = x->props.saddr.a4;
++		sin->sin_port = 0;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (x->props.family == AF_INET6) {
++		addr->sadb_address_prefixlen = 128;
++
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = 0;
++		sin6->sin6_flowinfo = 0;
++		memcpy(&sin6->sin6_addr,
++		       x->props.saddr.a6, sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++	}
++#endif
++	else
++		BUG();
++	
++	/* dst address */
++	addr = (struct sadb_address*) skb_put(skb, 
++					      sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len =
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
++	addr->sadb_address_proto = 0;
++	addr->sadb_address_reserved = 0;
++	if (x->props.family == AF_INET) {
++		addr->sadb_address_prefixlen = 32; 
++
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = x->id.daddr.a4;
++		sin->sin_port = 0;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (x->props.family == AF_INET6) {
++		addr->sadb_address_prefixlen = 128; 
++
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = 0;
++		sin6->sin6_flowinfo = 0;
++		memcpy(&sin6->sin6_addr,
++		       x->id.daddr.a6, sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++	}
++#endif
++	else
++		BUG();
++
++	pol = (struct sadb_x_policy *)  skb_put(skb, sizeof(struct sadb_x_policy));
++	pol->sadb_x_policy_len = sizeof(struct sadb_x_policy)/sizeof(uint64_t);
++	pol->sadb_x_policy_exttype = SADB_X_EXT_POLICY;
++	pol->sadb_x_policy_type = IPSEC_POLICY_IPSEC;
++	pol->sadb_x_policy_dir = dir+1;
++	pol->sadb_x_policy_id = xp->index;
++
++	/* Set sadb_comb's. */
++	if (x->id.proto == IPPROTO_AH)
++		dump_ah_combs(skb, t);
++	else if (x->id.proto == IPPROTO_ESP)
++		dump_esp_combs(skb, t);
++
++	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
++}
++
++static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt,
++                                                u8 *data, int len, int *dir)
++{
++	struct xfrm_policy *xp;
++	struct sadb_x_policy *pol = (struct sadb_x_policy*)data;
++
++	switch (family) {
++	case AF_INET:
++		if (opt != IP_IPSEC_POLICY) {
++			*dir = -EOPNOTSUPP;
++			return NULL;
++		}
++		break;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case AF_INET6:
++		if (opt != IPV6_IPSEC_POLICY) {
++			*dir = -EOPNOTSUPP;
++			return NULL;
++		}
++		break;
++#endif
++	default:
++		*dir = -EINVAL;
++		return NULL;
++	}
++
++	*dir = -EINVAL;
++
++	if (len < sizeof(struct sadb_x_policy) ||
++	    pol->sadb_x_policy_len*8 > len ||
++	    pol->sadb_x_policy_type > IPSEC_POLICY_BYPASS ||
++	    (!pol->sadb_x_policy_dir || pol->sadb_x_policy_dir > IPSEC_DIR_OUTBOUND))
++		return NULL;
++
++	xp = xfrm_policy_alloc(GFP_ATOMIC);
++	if (xp == NULL) {
++		*dir = -ENOBUFS;
++		return NULL;
++	}
++
++	xp->action = (pol->sadb_x_policy_type == IPSEC_POLICY_DISCARD ?
++		      XFRM_POLICY_BLOCK : XFRM_POLICY_ALLOW);
++
++	xp->lft.soft_byte_limit = XFRM_INF;
++	xp->lft.hard_byte_limit = XFRM_INF;
++	xp->lft.soft_packet_limit = XFRM_INF;
++	xp->lft.hard_packet_limit = XFRM_INF;
++	xp->family = family;
++
++	xp->xfrm_nr = 0;
++	if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC &&
++	    (*dir = parse_ipsecrequests(xp, pol)) < 0)
++		goto out;
++
++	*dir = pol->sadb_x_policy_dir-1;
++	return xp;
++
++out:
++	kfree(xp);
++	return NULL;
++}
++
++static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
++{
++	struct sk_buff *skb;
++	struct sadb_msg *hdr;
++	struct sadb_sa *sa;
++	struct sadb_address *addr;
++	struct sadb_x_nat_t_port *n_port;
++	struct sockaddr_in *sin;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	struct sockaddr_in6 *sin6;
++#endif
++	int sockaddr_size;
++	int size;
++	__u8 satype = (x->id.proto == IPPROTO_ESP ? SADB_SATYPE_ESP : 0);
++	struct xfrm_encap_tmpl *natt = NULL;
++
++	sockaddr_size = pfkey_sockaddr_size(x->props.family);
++	if (!sockaddr_size)
++		return -EINVAL;
++
++	if (!satype)
++		return -EINVAL;
++
++	if (!x->encap)
++		return -EINVAL;
++
++	natt = x->encap;
++
++	/* Build an SADB_X_NAT_T_NEW_MAPPING message:
++	 *
++	 * HDR | SA | ADDRESS_SRC (old addr) | NAT_T_SPORT (old port) |
++	 * ADDRESS_DST (new addr) | NAT_T_DPORT (new port)
++	 */
++	
++	size = sizeof(struct sadb_msg) +
++		sizeof(struct sadb_sa) +
++		(sizeof(struct sadb_address) * 2) +
++		(sockaddr_size * 2) +
++		(sizeof(struct sadb_x_nat_t_port) * 2);
++	
++	skb =  alloc_skb(size + 16, GFP_ATOMIC);
++	if (skb == NULL)
++		return -ENOMEM;
++	
++	hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg));
++	hdr->sadb_msg_version = PF_KEY_V2;
++	hdr->sadb_msg_type = SADB_X_NAT_T_NEW_MAPPING;
++	hdr->sadb_msg_satype = satype;
++	hdr->sadb_msg_len = size / sizeof(uint64_t);
++	hdr->sadb_msg_errno = 0;
++	hdr->sadb_msg_reserved = 0;
++	hdr->sadb_msg_seq = x->km.seq = get_acqseq();
++	hdr->sadb_msg_pid = 0;
++
++	/* SA */
++	sa = (struct sadb_sa *) skb_put(skb, sizeof(struct sadb_sa));
++	sa->sadb_sa_len = sizeof(struct sadb_sa)/sizeof(uint64_t);
++	sa->sadb_sa_exttype = SADB_EXT_SA;
++	sa->sadb_sa_spi = x->id.spi;
++	sa->sadb_sa_replay = 0;
++	sa->sadb_sa_state = 0;
++	sa->sadb_sa_auth = 0;
++	sa->sadb_sa_encrypt = 0;
++	sa->sadb_sa_flags = 0;
++
++	/* ADDRESS_SRC (old addr) */
++	addr = (struct sadb_address*)
++		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len = 
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_SRC;
++	addr->sadb_address_proto = 0;
++	addr->sadb_address_reserved = 0;
++	if (x->props.family == AF_INET) {
++		addr->sadb_address_prefixlen = 32;
++
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = x->props.saddr.a4;
++		sin->sin_port = 0;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (x->props.family == AF_INET6) {
++		addr->sadb_address_prefixlen = 128;
++
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = 0;
++		sin6->sin6_flowinfo = 0;
++		memcpy(&sin6->sin6_addr,
++		       x->props.saddr.a6, sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++	}
++#endif
++	else
++		BUG();
++
++	/* NAT_T_SPORT (old port) */
++	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
++	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
++	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_SPORT;
++	n_port->sadb_x_nat_t_port_port = natt->encap_sport;
++	n_port->sadb_x_nat_t_port_reserved = 0;
++
++	/* ADDRESS_DST (new addr) */
++	addr = (struct sadb_address*)
++		skb_put(skb, sizeof(struct sadb_address)+sockaddr_size);
++	addr->sadb_address_len = 
++		(sizeof(struct sadb_address)+sockaddr_size)/
++			sizeof(uint64_t);
++	addr->sadb_address_exttype = SADB_EXT_ADDRESS_DST;
++	addr->sadb_address_proto = 0;
++	addr->sadb_address_reserved = 0;
++	if (x->props.family == AF_INET) {
++		addr->sadb_address_prefixlen = 32;
++
++		sin = (struct sockaddr_in *) (addr + 1);
++		sin->sin_family = AF_INET;
++		sin->sin_addr.s_addr = ipaddr->a4;
++		sin->sin_port = 0;
++		memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
++	}
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	else if (x->props.family == AF_INET6) {
++		addr->sadb_address_prefixlen = 128;
++
++		sin6 = (struct sockaddr_in6 *) (addr + 1);
++		sin6->sin6_family = AF_INET6;
++		sin6->sin6_port = 0;
++		sin6->sin6_flowinfo = 0;
++		memcpy(&sin6->sin6_addr, &ipaddr->a6, sizeof(struct in6_addr));
++		sin6->sin6_scope_id = 0;
++	}
++#endif
++	else
++		BUG();
++
++	/* NAT_T_DPORT (new port) */
++	n_port = (struct sadb_x_nat_t_port*) skb_put(skb, sizeof (*n_port));
++	n_port->sadb_x_nat_t_port_len = sizeof(*n_port)/sizeof(uint64_t);
++	n_port->sadb_x_nat_t_port_exttype = SADB_X_EXT_NAT_T_DPORT;
++	n_port->sadb_x_nat_t_port_port = sport;
++	n_port->sadb_x_nat_t_port_reserved = 0;
++
++	return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL);
++}
++
++static int pfkey_sendmsg(struct socket *sock, struct msghdr *msg, int len,
++			 struct scm_cookie *scm)
++{
++	struct sock *sk = sock->sk;
++	struct sk_buff *skb = NULL;
++	struct sadb_msg *hdr = NULL;
++	int err;
++
++	err = -EOPNOTSUPP;
++	if (msg->msg_flags & MSG_OOB)
++		goto out;
++
++	err = -EMSGSIZE;
++	if ((unsigned)len > sk->sndbuf-32)
++		goto out;
++
++	err = -ENOBUFS;
++	skb = alloc_skb(len, GFP_KERNEL);
++	if (skb == NULL)
++		goto out;
++
++	err = -EFAULT;
++	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
++		goto out;
++
++	hdr = pfkey_get_base_msg(skb, &err);
++	if (!hdr)
++		goto out;
++
++	down(&xfrm_cfg_sem);
++	err = pfkey_process(sk, skb, hdr);
++	up(&xfrm_cfg_sem);
++
++out:
++	if (err && hdr && pfkey_error(hdr, err, sk) == 0)
++		err = 0;
++	if (skb)
++		kfree_skb(skb);
++
++	return err ? : len;
++}
++
++static int pfkey_recvmsg(struct socket *sock, struct msghdr *msg, int len,
++			 int flags, struct scm_cookie *scm)
++{
++	struct sock *sk = sock->sk;
++	struct sk_buff *skb;
++	int copied, err;
++
++	err = -EINVAL;
++	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC))
++		goto out;
++
++	msg->msg_namelen = 0;
++	skb = skb_recv_datagram(sk, flags, flags & MSG_DONTWAIT, &err);
++	if (skb == NULL)
++		goto out;
++
++	copied = skb->len;
++	if (copied > len) {
++		msg->msg_flags |= MSG_TRUNC;
++		copied = len;
++	}
++
++	skb->h.raw = skb->data;
++	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
++	if (err)
++		goto out_free;
++
++	sock_recv_timestamp(msg, sk, skb);
++
++	err = (flags & MSG_TRUNC) ? skb->len : copied;
++
++out_free:
++	skb_free_datagram(sk, skb);
++out:
++	return err;
++}
++
++static struct proto_ops pfkey_ops = {
++	.family		=	PF_KEY,
++
++	/* Operations that make no sense on pfkey sockets. */
++	.bind		=	sock_no_bind,
++	.connect	=	sock_no_connect,
++	.socketpair	=	sock_no_socketpair,
++	.accept		=	sock_no_accept,
++	.getname	=	sock_no_getname,
++	.ioctl		=	sock_no_ioctl,
++	.listen		=	sock_no_listen,
++	.shutdown	=	sock_no_shutdown,
++	.setsockopt	=	sock_no_setsockopt,
++	.getsockopt	=	sock_no_getsockopt,
++	.mmap		=	sock_no_mmap,
++	.sendpage	=	sock_no_sendpage,
++
++	/* Now the operations that really occur. */
++	.release	=	pfkey_release,
++	.poll		=	datagram_poll,
++	.sendmsg	=	pfkey_sendmsg,
++	.recvmsg	=	pfkey_recvmsg,
++};
++
++static struct net_proto_family pfkey_family_ops = {
++	.family	=	PF_KEY,
++	.create	=	pfkey_create,
++};
++
++#ifdef CONFIG_PROC_FS
++static int pfkey_read_proc(char *buffer, char **start, off_t offset,
++			   int length, int *eof, void *data)
++{
++	off_t pos = 0;
++	off_t begin = 0;
++	int len = 0;
++	struct sock *s;
++
++	len += sprintf(buffer,"sk       RefCnt Rmem   Wmem   User   Inode\n");
++
++	read_lock(&pfkey_table_lock);
++
++	for (s = pfkey_table; s; s = s->next) {
++		len += sprintf(buffer+len,"%p %-6d %-6u %-6u %-6u %-6lu",
++			       s,
++			       atomic_read(&s->refcnt),
++			       atomic_read(&s->rmem_alloc),
++			       atomic_read(&s->wmem_alloc),
++			       sock_i_uid(s),
++			       sock_i_ino(s)
++			       );
++
++		buffer[len++] = '\n';
++		
++		pos = begin + len;
++		if (pos < offset) {
++			len = 0;
++			begin = pos;
++		}
++		if(pos > offset + length)
++			goto done;
++	}
++	*eof = 1;
++
++done:
++	read_unlock(&pfkey_table_lock);
++
++	*start = buffer + (offset - begin);
++	len -= (offset - begin);
++
++	if (len > length)
++		len = length;
++	if (len < 0)
++		len = 0;
++
++	return len;
++}
++#endif
++
++static struct xfrm_mgr pfkeyv2_mgr =
++{
++	.id		= "pfkeyv2",
++	.notify		= pfkey_send_notify,
++	.acquire	= pfkey_send_acquire,
++	.compile_policy	= pfkey_compile_policy,
++	.new_mapping	= pfkey_send_new_mapping,
++};
++
++static void __exit ipsec_pfkey_exit(void)
++{
++	xfrm_unregister_km(&pfkeyv2_mgr);
++	remove_proc_entry("net/pfkey", 0);
++	sock_unregister(PF_KEY);
++}
++
++static int __init ipsec_pfkey_init(void)
++{
++	sock_register(&pfkey_family_ops);
++#ifdef CONFIG_PROC_FS
++	create_proc_read_entry("net/pfkey", 0, 0, pfkey_read_proc, NULL);
++#endif
++	xfrm_register_km(&pfkeyv2_mgr);
++	return 0;
++}
++
++module_init(ipsec_pfkey_init);
++module_exit(ipsec_pfkey_exit);
++MODULE_LICENSE("GPL");
+diff -Nru a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
+--- a/net/netlink/af_netlink.c	2005-02-13 21:25:09 +11:00
++++ b/net/netlink/af_netlink.c	2005-02-13 21:25:09 +11:00
+@@ -658,6 +658,7 @@
+ 	u32 pid;
+ 	u32 group;
+ 	int failure;
++	int delivered;
+ 	int allocation;
+ 	struct sk_buff *skb, *skb2;
+ };
+@@ -694,16 +695,18 @@
+ 		p->failure = 1;
+ 	} else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+ 		netlink_overrun(sk);
+-	} else
++	} else {
++		p->delivered = 1;
+ 		p->skb2 = NULL;
++	}
+ 	sock_put(sk);
+ 
+ out:
+ 	return 0;
+ }
+ 
+-void netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
+-		       u32 group, int allocation)
++int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid,
++		      u32 group, int allocation)
+ {
+ 	struct netlink_broadcast_data info;
+ 	struct sock *sk;
+@@ -712,6 +715,7 @@
+ 	info.pid = pid;
+ 	info.group = group;
+ 	info.failure = 0;
++	info.delivered = 0;
+ 	info.allocation = allocation;
+ 	info.skb = skb;
+ 	info.skb2 = NULL;
+@@ -728,6 +732,12 @@
+ 	if (info.skb2)
+ 		kfree_skb(info.skb2);
+ 	kfree_skb(skb);
++
++	if (info.delivered)
++		return 0;
++	if (info.failure)
++		return -ENOBUFS;
++	return -ESRCH;
+ }
+ 
+ struct netlink_set_err_data {
+diff -Nru a/net/netsyms.c b/net/netsyms.c
+--- a/net/netsyms.c	2005-02-13 21:25:09 +11:00
++++ b/net/netsyms.c	2005-02-13 21:25:09 +11:00
+@@ -57,6 +57,12 @@
+ #include <linux/inet.h>
+ #include <linux/mroute.h>
+ #include <linux/igmp.h>
++#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
++#include <net/ah.h>
++#endif
++#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
++#include <net/esp.h>
++#endif
+ 
+ extern struct net_proto_family inet_family_ops;
+ 
+@@ -191,6 +197,7 @@
+ #endif
+ #ifdef CONFIG_SYSCTL
+ EXPORT_SYMBOL(neigh_sysctl_register);
++EXPORT_SYMBOL(neigh_sysctl_unregister);
+ #endif
+ EXPORT_SYMBOL(pneigh_lookup);
+ EXPORT_SYMBOL(pneigh_enqueue);
+@@ -287,6 +294,7 @@
+ EXPORT_SYMBOL(inetdev_by_index);
+ EXPORT_SYMBOL(in_dev_finish_destroy);
+ EXPORT_SYMBOL(ip_defrag);
++EXPORT_SYMBOL(inet_peer_idlock);
+ EXPORT_SYMBOL(ipfrag_flush);
+ 
+ /* Route manipulation */
+@@ -303,6 +311,14 @@
+ EXPORT_SYMBOL(dlci_ioctl_hook);
+ #endif
+ 
++#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
++EXPORT_SYMBOL_GPL(skb_cow_data);
++EXPORT_SYMBOL_GPL(pskb_put);
++EXPORT_SYMBOL_GPL(skb_to_sgvec);
++#endif
++
++EXPORT_SYMBOL(flow_cache_lookup);
++EXPORT_SYMBOL(flow_cache_genid);
+ 
+ #if defined (CONFIG_IPV6_MODULE) || defined (CONFIG_KHTTPD) || defined (CONFIG_KHTTPD_MODULE) || defined (CONFIG_IP_SCTP_MODULE)
+ /* inet functions common to v4 and v6 */
+@@ -417,8 +433,9 @@
+ EXPORT_SYMBOL(secure_ipv6_id);
+ #endif
+ 
+-#endif
++EXPORT_SYMBOL(ip_generic_getfrag);
+ 
++#endif
+ EXPORT_SYMBOL(tcp_read_sock);
+ 
+ #ifdef CONFIG_IP_SCTP_MODULE
+@@ -495,6 +512,7 @@
+ EXPORT_SYMBOL(loopback_dev);
+ EXPORT_SYMBOL(register_netdevice);
+ EXPORT_SYMBOL(unregister_netdevice);
++EXPORT_SYMBOL(synchronize_net);
+ EXPORT_SYMBOL(netdev_state_change);
+ EXPORT_SYMBOL(dev_new_index);
+ EXPORT_SYMBOL(dev_get_by_flags);
+diff -Nru a/net/sched/cls_route.c b/net/sched/cls_route.c
+--- a/net/sched/cls_route.c	2005-02-13 21:25:09 +11:00
++++ b/net/sched/cls_route.c	2005-02-13 21:25:09 +11:00
+@@ -154,7 +154,7 @@
+ 	if (head == NULL)
+ 		goto old_method;
+ 
+-	iif = ((struct rtable*)dst)->key.iif;
++	iif = ((struct rtable*)dst)->fl.iif;
+ 
+ 	h = route4_fastmap_hash(id, iif);
+ 	if (id == head->fastmap[h].id &&
+diff -Nru a/net/sctp/input.c b/net/sctp/input.c
+--- a/net/sctp/input.c	2005-02-13 21:25:09 +11:00
++++ b/net/sctp/input.c	2005-02-13 21:25:09 +11:00
+@@ -58,6 +58,7 @@
+ #include <net/snmp.h>
+ #include <net/sock.h>
+ #include <linux/ipsec.h>
++#include <net/xfrm.h>
+ #include <net/sctp/sctp.h>
+ #include <net/sctp/sm.h>
+ 
+@@ -183,7 +184,7 @@
+ 	rcvr = asoc ? &asoc->base : &ep->base;
+ 	sk = rcvr->sk;
+ 
+-	if (!ipsec_sk_policy(sk, skb))
++	if (!xfrm_policy_check(sk, XFRM_POLICY_IN, skb, family))
+ 		goto discard_release;
+ 
+ 	ret = sk_filter(sk, skb, 1);
+diff -Nru a/net/sctp/ipv6.c b/net/sctp/ipv6.c
+--- a/net/sctp/ipv6.c	2005-02-13 21:25:09 +11:00
++++ b/net/sctp/ipv6.c	2005-02-13 21:25:09 +11:00
+@@ -83,17 +83,6 @@
+ 	.notifier_call = sctp_inetaddr_event,
+ };
+ 
+-/* FIXME: This macro needs to be moved to a common header file. */
+-#define NIP6(addr) \
+-	ntohs((addr)->s6_addr16[0]), \
+-	ntohs((addr)->s6_addr16[1]), \
+-	ntohs((addr)->s6_addr16[2]), \
+-	ntohs((addr)->s6_addr16[3]), \
+-	ntohs((addr)->s6_addr16[4]), \
+-	ntohs((addr)->s6_addr16[5]), \
+-	ntohs((addr)->s6_addr16[6]), \
+-	ntohs((addr)->s6_addr16[7])
+-
+ /* ICMP error handler. */
+ SCTP_STATIC void sctp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+ 			     int type, int code, int offset, __u32 info)
+@@ -174,12 +163,12 @@
+ 	/* Fill in the dest address from the route entry passed with the skb
+ 	 * and the source address from the transport.
+ 	 */
+-	fl.fl6_dst = &transport->ipaddr.v6.sin6_addr;
+-	fl.fl6_src = &transport->saddr.v6.sin6_addr;
++	ipv6_addr_copy(&fl.fl6_dst, &transport->ipaddr.v6.sin6_addr);
++	ipv6_addr_copy(&fl.fl6_src, &transport->saddr.v6.sin6_addr);
+ 
+ 	fl.fl6_flowlabel = np->flow_label;
+ 	IP6_ECN_flow_xmit(sk, fl.fl6_flowlabel);
+-	if (ipv6_addr_type(fl.fl6_src) & IPV6_ADDR_LINKLOCAL)
++	if (ipv6_addr_type(&fl.fl6_src) & IPV6_ADDR_LINKLOCAL)
+ 		fl.oif = transport->saddr.v6.sin6_scope_id;
+ 	else
+ 		fl.oif = sk->bound_dev_if;
+@@ -188,7 +177,7 @@
+ 
+ 	if (np->opt && np->opt->srcrt) {
+ 		struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt;
+-		fl.fl6_dst = rt0->addr;
++		ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
+ 	}
+ 
+ 	SCTP_DEBUG_PRINTK("%s: skb:%p, len:%d, "
+@@ -213,7 +202,7 @@
+ 	struct flowi fl;
+ 
+ 	memset(&fl, 0, sizeof(fl));
+-	fl.fl6_dst = &daddr->v6.sin6_addr;
++	ipv6_addr_copy(&fl.fl6_dst, &daddr->v6.sin6_addr);
+ 	if (ipv6_addr_type(&daddr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL)
+ 		fl.oif = daddr->v6.sin6_scope_id;
+ 	
+@@ -222,7 +211,7 @@
+ 			  __FUNCTION__, NIP6(fl.fl6_dst));
+ 
+ 	if (saddr) {
+-		fl.fl6_src = &saddr->v6.sin6_addr;
++		ipv6_addr_copy(&fl.fl6_src, &saddr->v6.sin6_addr);
+ 		SCTP_DEBUG_PRINTK(
+ 			"SRC=%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x - ",
+ 			NIP6(fl.fl6_src));
+@@ -235,7 +224,7 @@
+ 		SCTP_DEBUG_PRINTK(
+ 			"rt6_dst:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x "
+ 			"rt6_src:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+-			NIP6(&rt->rt6i_dst.addr), NIP6(&rt->rt6i_src.addr));
++			NIP6(rt->rt6i_dst.addr), NIP6(rt->rt6i_src.addr));
+ 	} else {
+ 		SCTP_DEBUG_PRINTK("NO ROUTE\n");
+ 	}
+@@ -284,13 +273,13 @@
+ 
+ 	SCTP_DEBUG_PRINTK("%s: asoc:%p dst:%p "
+ 			  "daddr:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
+-			  __FUNCTION__, asoc, dst, NIP6(&daddr->v6.sin6_addr));
++			  __FUNCTION__, asoc, dst, NIP6(daddr->v6.sin6_addr));
+ 
+ 	if (!asoc) {
+ 		ipv6_get_saddr(dst, &daddr->v6.sin6_addr,&saddr->v6.sin6_addr);
+ 		SCTP_DEBUG_PRINTK("saddr from ipv6_get_saddr: "
+ 				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+-				  NIP6(&saddr->v6.sin6_addr));
++				  NIP6(saddr->v6.sin6_addr));
+ 		return;
+ 	}
+ 
+@@ -319,12 +308,12 @@
+ 		memcpy(saddr, baddr, sizeof(union sctp_addr));
+ 		SCTP_DEBUG_PRINTK("saddr: "
+ 				  "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+-				  NIP6(&saddr->v6.sin6_addr));
++				  NIP6(saddr->v6.sin6_addr));
+ 	} else {
+ 		printk(KERN_ERR "%s: asoc:%p Could not find a valid source "
+ 		       "address for the "
+ 		       "dest:%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x\n",
+-		       __FUNCTION__, asoc, NIP6(&daddr->v6.sin6_addr));
++		       __FUNCTION__, asoc, NIP6(daddr->v6.sin6_addr));
+ 	}
+ 
+ 	sctp_read_unlock(addr_lock);
+@@ -640,7 +629,7 @@
+ 	/* Init the ipv4 part of the socket since we can have sockets
+ 	 * using v6 API for ipv4.
+ 	 */
+-	newinet->ttl = sysctl_ip_default_ttl;
++	newinet->uc_ttl = -1;
+ 	newinet->mc_loop = 1;
+ 	newinet->mc_ttl = 1;
+ 	newinet->mc_index = 0;
+@@ -689,7 +678,7 @@
+ static void sctp_v6_seq_dump_addr(struct seq_file *seq, union sctp_addr *addr)
+ {
+ 	seq_printf(seq, "%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x ",
+-		   NIP6(&addr->v6.sin6_addr));
++		   NIP6(addr->v6.sin6_addr));
+ }
+ 
+ /* Initialize a PF_INET6 socket msg_name. */
+@@ -923,14 +912,15 @@
+ 	.flags         = SCTP_PROTOSW_FLAG,
+ };
+ 
++static int sctp6_rcv(struct sk_buff **pskb, unsigned int *nhoffp)
++{
++	return sctp_rcv(*pskb) ? -1 : 0;
++}
++
+ static struct inet6_protocol sctpv6_protocol = {
+-	.handler      = sctp_rcv,
++	.handler      = sctp6_rcv,
+ 	.err_handler  = sctp_v6_err,
+-	.next         = NULL,
+-	.protocol     = IPPROTO_SCTP,
+-	.copy         = 0,
+-	.data         = NULL,
+-	.name         = "SCTPv6",
++	.flags        = INET6_PROTO_NOPOLICY | INET6_PROTO_FINAL,
+ };
+ 
+ static struct sctp_af sctp_ipv6_specific = {
+@@ -978,7 +968,8 @@
+ int sctp_v6_init(void)
+ {
+ 	/* Register inet6 protocol. */
+-	inet6_add_protocol(&sctpv6_protocol);
++	if (inet6_add_protocol(&sctpv6_protocol, IPPROTO_SCTP) < 0)
++		return -EAGAIN;
+ 
+ 	/* Add SCTPv6(UDP and TCP style) to inetsw6 linked list. */
+ 	inet6_register_protosw(&sctpv6_seqpacket_protosw);
+@@ -1000,7 +991,7 @@
+ void sctp_v6_exit(void)
+ {
+ 	list_del(&sctp_ipv6_specific.list);
+-	inet6_del_protocol(&sctpv6_protocol);
++	inet6_del_protocol(&sctpv6_protocol, IPPROTO_SCTP);
+ 	inet6_unregister_protosw(&sctpv6_seqpacket_protosw);
+ 	inet6_unregister_protosw(&sctpv6_stream_protosw);
+ 	unregister_inet6addr_notifier(&sctp_inet6addr_notifier);
+diff -Nru a/net/sctp/protocol.c b/net/sctp/protocol.c
+--- a/net/sctp/protocol.c	2005-02-13 21:25:09 +11:00
++++ b/net/sctp/protocol.c	2005-02-13 21:25:09 +11:00
+@@ -433,7 +433,7 @@
+ 					 union sctp_addr *saddr)
+ {
+ 	struct rtable *rt;
+-	struct rt_key key;
++	struct flowi fl;
+ 	struct sctp_bind_addr *bp;
+ 	rwlock_t *addr_lock;
+ 	struct sctp_sockaddr_entry *laddr;
+@@ -441,21 +441,21 @@
+ 	struct dst_entry *dst = NULL;
+ 	union sctp_addr dst_saddr;
+ 
+-	memset(&key, 0x0, sizeof(struct rt_key));
+-	key.dst = daddr->v4.sin_addr.s_addr;
+-
++	memset(&fl, 0x0, sizeof(struct flowi));
++	fl.fl4_dst = daddr->v4.sin_addr.s_addr;
++	fl.proto = IPPROTO_SCTP;
+ 	if (asoc) {
+-		key.tos = RT_CONN_FLAGS(asoc->base.sk);
+-		key.oif = asoc->base.sk->bound_dev_if;
++		fl.fl4_tos = RT_CONN_FLAGS(asoc->base.sk);
++		fl.oif = asoc->base.sk->bound_dev_if;
+ 	}
+ 	if (saddr)
+-		key.src = saddr->v4.sin_addr.s_addr;
++		fl.fl4_src = saddr->v4.sin_addr.s_addr;
+ 
+ 	SCTP_DEBUG_PRINTK("%s: DST:%u.%u.%u.%u, SRC:%u.%u.%u.%u - ",
+-			  __FUNCTION__, NIPQUAD(key.dst),
+-			  NIPQUAD(key.src));
++			  __FUNCTION__, NIPQUAD(fl.fl4_dst),
++			  NIPQUAD(fl.fl4_src));
+ 
+-	if (!ip_route_output_key(&rt, &key)) {
++	if (!ip_route_output_key(&rt, &fl)) {
+ 		dst = &rt->u.dst;
+ 	}
+ 
+@@ -497,8 +497,8 @@
+ 		laddr = list_entry(pos, struct sctp_sockaddr_entry, list);
+ 
+ 		if (AF_INET == laddr->a.sa.sa_family) {
+-			key.src = laddr->a.v4.sin_addr.s_addr;
+-			if (!ip_route_output_key(&rt, &key)) {
++			fl.fl4_src = laddr->a.v4.sin_addr.s_addr;
++			if (!ip_route_output_key(&rt, &fl)) {
+ 				dst = &rt->u.dst;
+ 				goto out_unlock;
+ 			}
+@@ -587,7 +587,7 @@
+ 	newinet->pmtudisc = inet->pmtudisc;
+       	newinet->id = 0;
+ 
+-	newinet->ttl = sysctl_ip_default_ttl;
++	newinet->uc_ttl = -1;
+ 	newinet->mc_loop = 1;
+ 	newinet->mc_ttl = 1;
+ 	newinet->mc_index = 0;
+@@ -656,7 +656,7 @@
+ 		return err;
+ 	}
+ 	sctp_ctl_socket->sk->allocation = GFP_ATOMIC;
+-	inet_sk(sctp_ctl_socket->sk)->ttl = MAXTTL;
++	inet_sk(sctp_ctl_socket->sk)->uc_ttl = -1;
+ 
+ 	return 0;
+ }
+@@ -872,8 +872,7 @@
+ static struct inet_protocol sctp_protocol = {
+ 	.handler     = sctp_rcv,
+ 	.err_handler = sctp_v4_err,
+-	.protocol    = IPPROTO_SCTP,
+-	.name        = "SCTP"
++	.no_policy   = 1,
+ };
+ 
+ /* IPv4 address related functions.  */
+@@ -960,7 +959,8 @@
+ 		return -EINVAL;
+ 
+ 	/* Add SCTP to inet_protos hash table.  */
+-	inet_add_protocol(&sctp_protocol);
++	if (inet_add_protocol(&sctp_protocol, IPPROTO_SCTP) < 0)
++		return -EAGAIN;
+ 
+ 	/* Add SCTP(TCP and UDP style) to inetsw linked list.  */
+ 	inet_register_protosw(&sctp_seqpacket_protosw);
+@@ -1154,7 +1154,7 @@
+ err_init_mibs:
+ 	kmem_cache_destroy(sctp_chunk_cachep);
+ err_chunk_cachep:
+-	inet_del_protocol(&sctp_protocol);
++	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+ 	inet_unregister_protosw(&sctp_seqpacket_protosw);
+ 	inet_unregister_protosw(&sctp_stream_protosw);
+ 	return status;
+@@ -1194,7 +1194,7 @@
+ 	sctp_proc_exit();
+ 	cleanup_sctp_mibs();
+ 
+-	inet_del_protocol(&sctp_protocol);
++	inet_del_protocol(&sctp_protocol, IPPROTO_SCTP);
+ 	inet_unregister_protosw(&sctp_seqpacket_protosw);
+ 	inet_unregister_protosw(&sctp_stream_protosw);
+ }
+diff -Nru a/net/xfrm/Config.in b/net/xfrm/Config.in
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/Config.in	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,4 @@
++#
++# XFRM configuration
++#
++tristate '  IP: IPsec user configuration interface' CONFIG_XFRM_USER
+diff -Nru a/net/xfrm/Makefile b/net/xfrm/Makefile
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/Makefile	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,13 @@
++#
++# Makefile for the XFRM subsystem.
++#
++
++O_TARGET := xfrm.o
++
++export-objs = xfrm_export.o
++
++obj-$(CONFIG_XFRM) := xfrm_policy.o xfrm_state.o xfrm_input.o xfrm_algo.o \
++	xfrm_export.o
++obj-$(CONFIG_XFRM_USER) += xfrm_user.o
++
++include $(TOPDIR)/Rules.make
+diff -Nru a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/xfrm_algo.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,729 @@
++/* 
++ * xfrm algorithm interface
++ *
++ * Copyright (c) 2002 James Morris <jmorris at intercode.com.au>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the Free
++ * Software Foundation; either version 2 of the License, or (at your option) 
++ * any later version.
++ */
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/pfkeyv2.h>
++#include <net/xfrm.h>
++#if defined(CONFIG_INET_AH) || defined(CONFIG_INET_AH_MODULE) || defined(CONFIG_INET6_AH) || defined(CONFIG_INET6_AH_MODULE)
++#include <net/ah.h>
++#endif
++#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
++#include <net/esp.h>
++#endif
++#include <asm/scatterlist.h>
++
++/*
++ * Algorithms supported by IPsec.  These entries contain properties which
++ * are used in key negotiation and xfrm processing, and are used to verify
++ * that instantiated crypto transforms have correct parameters for IPsec
++ * purposes.
++ */
++static struct xfrm_algo_desc aalg_list[] = {
++{
++	.name = "digest_null",
++	
++	.uinfo = {
++		.auth = {
++			.icv_truncbits = 0,
++			.icv_fullbits = 0,
++		}
++	},
++	
++	.desc = {
++		.sadb_alg_id = SADB_X_AALG_NULL,
++		.sadb_alg_ivlen = 0,
++		.sadb_alg_minbits = 0,
++		.sadb_alg_maxbits = 0
++	}
++},
++{
++	.name = "md5",
++
++	.uinfo = {
++		.auth = {
++			.icv_truncbits = 96,
++			.icv_fullbits = 128,
++		}
++	},
++	
++	.desc = {
++		.sadb_alg_id = SADB_AALG_MD5HMAC,
++		.sadb_alg_ivlen = 0,
++		.sadb_alg_minbits = 128,
++		.sadb_alg_maxbits = 128
++	}
++},
++{
++	.name = "sha1",
++
++	.uinfo = {
++		.auth = {
++			.icv_truncbits = 96,
++			.icv_fullbits = 160,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_AALG_SHA1HMAC,
++		.sadb_alg_ivlen = 0,
++		.sadb_alg_minbits = 160,
++		.sadb_alg_maxbits = 160
++	}
++},
++{
++	.name = "sha256",
++
++	.uinfo = {
++		.auth = {
++			.icv_truncbits = 96,
++			.icv_fullbits = 256,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_X_AALG_SHA2_256HMAC,
++		.sadb_alg_ivlen = 0,
++		.sadb_alg_minbits = 256,
++		.sadb_alg_maxbits = 256
++	}
++},
++{
++	.name = "ripemd160",
++
++	.uinfo = {
++		.auth = {
++			.icv_truncbits = 96,
++			.icv_fullbits = 160,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_X_AALG_RIPEMD160HMAC,
++		.sadb_alg_ivlen = 0,
++		.sadb_alg_minbits = 160,
++		.sadb_alg_maxbits = 160
++	}
++},
++};
++
++static struct xfrm_algo_desc ealg_list[] = {
++{
++	.name = "cipher_null",
++	
++	.uinfo = {
++		.encr = {
++			.blockbits = 8,
++			.defkeybits = 0,
++		}
++	},
++	
++	.desc = {
++		.sadb_alg_id =	SADB_EALG_NULL,
++		.sadb_alg_ivlen = 0,
++		.sadb_alg_minbits = 0,
++		.sadb_alg_maxbits = 0
++	}
++},
++{
++	.name = "des",
++
++	.uinfo = {
++		.encr = {
++			.blockbits = 64,
++			.defkeybits = 64,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_EALG_DESCBC,
++		.sadb_alg_ivlen = 8,
++		.sadb_alg_minbits = 64,
++		.sadb_alg_maxbits = 64
++	}
++},
++{
++	.name = "des3_ede",
++
++	.uinfo = {
++		.encr = {
++			.blockbits = 64,
++			.defkeybits = 192,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_EALG_3DESCBC,
++		.sadb_alg_ivlen = 8,
++		.sadb_alg_minbits = 192,
++		.sadb_alg_maxbits = 192
++	}
++},
++{
++	.name = "cast128",
++
++	.uinfo = {
++		.encr = {
++			.blockbits = 64,
++			.defkeybits = 128,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_X_EALG_CASTCBC,
++		.sadb_alg_ivlen = 8,
++		.sadb_alg_minbits = 40,
++		.sadb_alg_maxbits = 128
++	}
++},
++{
++	.name = "blowfish",
++
++	.uinfo = {
++		.encr = {
++			.blockbits = 64,
++			.defkeybits = 128,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_X_EALG_BLOWFISHCBC,
++		.sadb_alg_ivlen = 8,
++		.sadb_alg_minbits = 40,
++		.sadb_alg_maxbits = 448
++	}
++},
++{
++	.name = "aes",
++
++	.uinfo = {
++		.encr = {
++			.blockbits = 128,
++			.defkeybits = 128,
++		}
++	},
++
++	.desc = {
++		.sadb_alg_id = SADB_X_EALG_AESCBC,
++		.sadb_alg_ivlen = 8,
++		.sadb_alg_minbits = 128,
++		.sadb_alg_maxbits = 256
++	}
++},
++{
++        .name = "serpent",
++
++        .uinfo = {
++                .encr = {
++                        .blockbits = 128,
++                        .defkeybits = 128,
++                }
++        },
++
++        .desc = {
++                .sadb_alg_id = SADB_X_EALG_SERPENTCBC,
++                .sadb_alg_ivlen = 8,
++                .sadb_alg_minbits = 128,
++                .sadb_alg_maxbits = 256,
++        }
++},
++{
++        .name = "twofish",
++                 
++        .uinfo = {
++                .encr = {
++                        .blockbits = 128,
++                        .defkeybits = 128,
++                }
++        },
++
++        .desc = {
++                .sadb_alg_id = SADB_X_EALG_TWOFISHCBC,
++                .sadb_alg_ivlen = 8,
++                .sadb_alg_minbits = 128,
++                .sadb_alg_maxbits = 256
++        }
++},
++};
++
++static struct xfrm_algo_desc calg_list[] = {
++{
++	.name = "deflate",
++	.uinfo = {
++		.comp = {
++			.threshold = 90,
++		}
++	},
++	.desc = { .sadb_alg_id = SADB_X_CALG_DEFLATE }
++},
++{
++	.name = "lzs",
++	.uinfo = {
++		.comp = {
++			.threshold = 90,
++		}
++	},
++	.desc = { .sadb_alg_id = SADB_X_CALG_LZS }
++},
++{
++	.name = "lzjh",
++	.uinfo = {
++		.comp = {
++			.threshold = 50,
++		}
++	},
++	.desc = { .sadb_alg_id = SADB_X_CALG_LZJH }
++},
++};
++
++static inline int aalg_entries(void)
++{
++	return sizeof(aalg_list) / sizeof(aalg_list[0]);
++}
++
++static inline int ealg_entries(void)
++{
++	return sizeof(ealg_list) / sizeof(ealg_list[0]);
++}
++
++static inline int calg_entries(void)
++{
++	return sizeof(calg_list) / sizeof(calg_list[0]);
++}
++
++/* Todo: generic iterators */
++struct xfrm_algo_desc *xfrm_aalg_get_byid(int alg_id)
++{
++	int i;
++
++	for (i = 0; i < aalg_entries(); i++) {
++		if (aalg_list[i].desc.sadb_alg_id == alg_id) {
++			if (aalg_list[i].available)
++				return &aalg_list[i];
++			else
++				break;
++		}
++	}
++	return NULL;
++}
++
++struct xfrm_algo_desc *xfrm_ealg_get_byid(int alg_id)
++{
++	int i;
++
++	for (i = 0; i < ealg_entries(); i++) {
++		if (ealg_list[i].desc.sadb_alg_id == alg_id) {
++			if (ealg_list[i].available)
++				return &ealg_list[i];
++			else
++				break;
++		}
++	}
++	return NULL;
++}
++
++struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id)
++{
++	int i;
++
++	for (i = 0; i < calg_entries(); i++) {
++		if (calg_list[i].desc.sadb_alg_id == alg_id) {
++			if (calg_list[i].available)
++				return &calg_list[i];
++			else
++				break;
++		}
++	}
++	return NULL;
++}
++
++struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name)
++{
++	int i;
++
++	if (!name)
++		return NULL;
++
++	for (i=0; i < aalg_entries(); i++) {
++		if (strcmp(name, aalg_list[i].name) == 0) {
++			if (aalg_list[i].available)
++				return &aalg_list[i];
++			else
++				break;
++		}
++	}
++	return NULL;
++}
++
++struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name)
++{
++	int i;
++
++	if (!name)
++		return NULL;
++
++	for (i=0; i < ealg_entries(); i++) {
++		if (strcmp(name, ealg_list[i].name) == 0) {
++			if (ealg_list[i].available)
++				return &ealg_list[i];
++			else
++				break;
++		}
++	}
++	return NULL;
++}
++
++struct xfrm_algo_desc *xfrm_calg_get_byname(char *name)
++{
++	int i;
++
++	if (!name)
++		return NULL;
++
++	for (i=0; i < calg_entries(); i++) {
++		if (strcmp(name, calg_list[i].name) == 0) {
++			if (calg_list[i].available)
++				return &calg_list[i];
++			else
++				break;
++		}
++	}
++	return NULL;
++}
++
++struct xfrm_algo_desc *xfrm_aalg_get_byidx(unsigned int idx)
++{
++	if (idx >= aalg_entries())
++		return NULL;
++
++	return &aalg_list[idx];
++}
++
++struct xfrm_algo_desc *xfrm_ealg_get_byidx(unsigned int idx)
++{
++	if (idx >= ealg_entries())
++		return NULL;
++
++	return &ealg_list[idx];
++}
++
++struct xfrm_algo_desc *xfrm_calg_get_byidx(unsigned int idx)
++{
++	if (idx >= calg_entries())
++		return NULL;
++
++	return &calg_list[idx];
++}
++
++/*
++ * Probe for the availability of crypto algorithms, and set the available
++ * flag for any algorithms found on the system.  This is typically called by
++ * pfkey during userspace SA add, update or register.
++ */
++void xfrm_probe_algs(void)
++{
++#ifdef CONFIG_CRYPTO
++	int i, status;
++	
++	BUG_ON(in_softirq());
++
++	for (i = 0; i < aalg_entries(); i++) {
++		status = crypto_alg_available(aalg_list[i].name, 0);
++		if (aalg_list[i].available != status)
++			aalg_list[i].available = status;
++	}
++	
++	for (i = 0; i < ealg_entries(); i++) {
++		status = crypto_alg_available(ealg_list[i].name, 0);
++		if (ealg_list[i].available != status)
++			ealg_list[i].available = status;
++	}
++	
++	for (i = 0; i < calg_entries(); i++) {
++		status = crypto_alg_available(calg_list[i].name, 0);
++		if (calg_list[i].available != status)
++			calg_list[i].available = status;
++	}
++#endif
++}
++
++int xfrm_count_auth_supported(void)
++{
++	int i, n;
++
++	for (i = 0, n = 0; i < aalg_entries(); i++)
++		if (aalg_list[i].available)
++			n++;
++	return n;
++}
++
++int xfrm_count_enc_supported(void)
++{
++	int i, n;
++
++	for (i = 0, n = 0; i < ealg_entries(); i++)
++		if (ealg_list[i].available)
++			n++;
++	return n;
++}
++
++/* Move to common area: it is shared with AH. */
++
++void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm,
++		  int offset, int len, icv_update_fn_t icv_update)
++{
++	int start = skb->len - skb->data_len;
++	int i, copy = start - offset;
++	struct scatterlist sg;
++
++	/* Checksum header. */
++	if (copy > 0) {
++		if (copy > len)
++			copy = len;
++		
++		sg.page = virt_to_page(skb->data + offset);
++		sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
++		sg.length = copy;
++		
++		icv_update(tfm, &sg, 1);
++		
++		if ((len -= copy) == 0)
++			return;
++		offset += copy;
++	}
++
++	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++		int end;
++
++		BUG_TRAP(start <= offset + len);
++
++		end = start + skb_shinfo(skb)->frags[i].size;
++		if ((copy = end - offset) > 0) {
++			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
++
++			if (copy > len)
++				copy = len;
++			
++			sg.page = frag->page;
++			sg.offset = frag->page_offset + offset-start;
++			sg.length = copy;
++			
++			icv_update(tfm, &sg, 1);
++
++			if (!(len -= copy))
++				return;
++			offset += copy;
++		}
++		start = end;
++	}
++
++	if (skb_shinfo(skb)->frag_list) {
++		struct sk_buff *list = skb_shinfo(skb)->frag_list;
++
++		for (; list; list = list->next) {
++			int end;
++
++			BUG_TRAP(start <= offset + len);
++
++			end = start + list->len;
++			if ((copy = end - offset) > 0) {
++				if (copy > len)
++					copy = len;
++				skb_icv_walk(list, tfm, offset-start, copy, icv_update);
++				if ((len -= copy) == 0)
++					return;
++				offset += copy;
++			}
++			start = end;
++		}
++	}
++	if (len)
++		BUG();
++}
++
++#if defined(CONFIG_INET_ESP) || defined(CONFIG_INET_ESP_MODULE) || defined(CONFIG_INET6_ESP) || defined(CONFIG_INET6_ESP_MODULE)
++
++/* Looking generic it is not used in another places. */
++
++int
++skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
++{
++	int start = skb->len - skb->data_len;
++	int i, copy = start - offset;
++	int elt = 0;
++
++	if (copy > 0) {
++		if (copy > len)
++			copy = len;
++		sg[elt].page = virt_to_page(skb->data + offset);
++		sg[elt].offset = (unsigned long)(skb->data + offset) % PAGE_SIZE;
++		sg[elt].length = copy;
++		elt++;
++		if ((len -= copy) == 0)
++			return elt;
++		offset += copy;
++	}
++
++	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
++		int end;
++
++		BUG_TRAP(start <= offset + len);
++
++		end = start + skb_shinfo(skb)->frags[i].size;
++		if ((copy = end - offset) > 0) {
++			skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
++
++			if (copy > len)
++				copy = len;
++			sg[elt].page = frag->page;
++			sg[elt].offset = frag->page_offset+offset-start;
++			sg[elt].length = copy;
++			elt++;
++			if (!(len -= copy))
++				return elt;
++			offset += copy;
++		}
++		start = end;
++	}
++
++	if (skb_shinfo(skb)->frag_list) {
++		struct sk_buff *list = skb_shinfo(skb)->frag_list;
++
++		for (; list; list = list->next) {
++			int end;
++
++			BUG_TRAP(start <= offset + len);
++
++			end = start + list->len;
++			if ((copy = end - offset) > 0) {
++				if (copy > len)
++					copy = len;
++				elt += skb_to_sgvec(list, sg+elt, offset - start, copy);
++				if ((len -= copy) == 0)
++					return elt;
++				offset += copy;
++			}
++			start = end;
++		}
++	}
++	if (len)
++		BUG();
++	return elt;
++}
++
++/* Check that skb data bits are writable. If they are not, copy data
++ * to newly created private area. If "tailbits" is given, make sure that
++ * tailbits bytes beyond current end of skb are writable.
++ *
++ * Returns amount of elements of scatterlist to load for subsequent
++ * transformations and pointer to writable trailer skb.
++ */
++
++int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer)
++{
++	int copyflag;
++	int elt;
++	struct sk_buff *skb1, **skb_p;
++
++	/* If skb is cloned or its head is paged, reallocate
++	 * head pulling out all the pages (pages are considered not writable
++	 * at the moment even if they are anonymous).
++	 */
++	if ((skb_cloned(skb) || skb_shinfo(skb)->nr_frags) &&
++	    __pskb_pull_tail(skb, skb_pagelen(skb)-skb_headlen(skb)) == NULL)
++		return -ENOMEM;
++
++	/* Easy case. Most of packets will go this way. */
++	if (!skb_shinfo(skb)->frag_list) {
++		/* A little of trouble, not enough of space for trailer.
++		 * This should not happen, when stack is tuned to generate
++		 * good frames. OK, on miss we reallocate and reserve even more
++		 * space, 128 bytes is fair. */
++
++		if (skb_tailroom(skb) < tailbits &&
++		    pskb_expand_head(skb, 0, tailbits-skb_tailroom(skb)+128, GFP_ATOMIC))
++			return -ENOMEM;
++
++		/* Voila! */
++		*trailer = skb;
++		return 1;
++	}
++
++	/* Misery. We are in troubles, going to mincer fragments... */
++
++	elt = 1;
++	skb_p = &skb_shinfo(skb)->frag_list;
++	copyflag = 0;
++
++	while ((skb1 = *skb_p) != NULL) {
++		int ntail = 0;
++
++		/* The fragment is partially pulled by someone,
++		 * this can happen on input. Copy it and everything
++		 * after it. */
++
++		if (skb_shared(skb1))
++			copyflag = 1;
++
++		/* If the skb is the last, worry about trailer. */
++
++		if (skb1->next == NULL && tailbits) {
++			if (skb_shinfo(skb1)->nr_frags ||
++			    skb_shinfo(skb1)->frag_list ||
++			    skb_tailroom(skb1) < tailbits)
++				ntail = tailbits + 128;
++		}
++
++		if (copyflag ||
++		    skb_cloned(skb1) ||
++		    ntail ||
++		    skb_shinfo(skb1)->nr_frags ||
++		    skb_shinfo(skb1)->frag_list) {
++			struct sk_buff *skb2;
++
++			/* Fuck, we are miserable poor guys... */
++			if (ntail == 0)
++				skb2 = skb_copy(skb1, GFP_ATOMIC);
++			else
++				skb2 = skb_copy_expand(skb1,
++						       skb_headroom(skb1),
++						       ntail,
++						       GFP_ATOMIC);
++			if (unlikely(skb2 == NULL))
++				return -ENOMEM;
++
++			if (skb1->sk)
++				skb_set_owner_w(skb, skb1->sk);
++
++			/* Looking around. Are we still alive?
++			 * OK, link new skb, drop old one */
++
++			skb2->next = skb1->next;
++			*skb_p = skb2;
++			kfree_skb(skb1);
++			skb1 = skb2;
++		}
++		elt++;
++		*trailer = skb1;
++		skb_p = &skb1->next;
++	}
++
++	return elt;
++}
++
++void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len)
++{
++	if (tail != skb) {
++		skb->data_len += len;
++		skb->len += len;
++	}
++	return skb_put(tail, len);
++}
++#endif
+diff -Nru a/net/xfrm/xfrm_export.c b/net/xfrm/xfrm_export.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/xfrm_export.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,63 @@
++#include <linux/module.h>
++#include <net/xfrm.h>
++
++EXPORT_SYMBOL(xfrm_user_policy);
++EXPORT_SYMBOL(km_waitq);
++EXPORT_SYMBOL(km_new_mapping);
++EXPORT_SYMBOL(xfrm_cfg_sem);
++EXPORT_SYMBOL(xfrm_policy_alloc);
++EXPORT_SYMBOL(__xfrm_policy_destroy);
++EXPORT_SYMBOL(xfrm_lookup);
++EXPORT_SYMBOL(__xfrm_policy_check);
++EXPORT_SYMBOL(__xfrm_route_forward);
++EXPORT_SYMBOL(xfrm_state_alloc);
++EXPORT_SYMBOL(__xfrm_state_destroy);
++EXPORT_SYMBOL(xfrm_state_insert);
++EXPORT_SYMBOL(xfrm_state_add);
++EXPORT_SYMBOL(xfrm_state_update);
++EXPORT_SYMBOL(xfrm_state_check_expire);
++EXPORT_SYMBOL(xfrm_state_check);
++EXPORT_SYMBOL(xfrm_state_lookup);
++EXPORT_SYMBOL(xfrm_state_register_afinfo);
++EXPORT_SYMBOL(xfrm_state_unregister_afinfo);
++EXPORT_SYMBOL(xfrm_state_delete_tunnel);
++EXPORT_SYMBOL(xfrm_replay_check);
++EXPORT_SYMBOL(xfrm_replay_advance);
++EXPORT_SYMBOL(__secpath_destroy);
++EXPORT_SYMBOL(secpath_dup);
++EXPORT_SYMBOL(xfrm_get_acqseq);
++EXPORT_SYMBOL(xfrm_parse_spi);
++EXPORT_SYMBOL(xfrm_register_type);
++EXPORT_SYMBOL(xfrm_unregister_type);
++EXPORT_SYMBOL(xfrm_get_type);
++EXPORT_SYMBOL(xfrm_register_km);
++EXPORT_SYMBOL(xfrm_unregister_km);
++EXPORT_SYMBOL(xfrm_state_delete);
++EXPORT_SYMBOL(xfrm_state_walk);
++EXPORT_SYMBOL(xfrm_find_acq_byseq);
++EXPORT_SYMBOL(xfrm_find_acq);
++EXPORT_SYMBOL(xfrm_alloc_spi);
++EXPORT_SYMBOL(xfrm_state_flush);
++EXPORT_SYMBOL(xfrm_policy_bysel);
++EXPORT_SYMBOL(xfrm_policy_insert);
++EXPORT_SYMBOL(xfrm_policy_walk);
++EXPORT_SYMBOL(xfrm_policy_flush);
++EXPORT_SYMBOL(xfrm_policy_byid);
++EXPORT_SYMBOL(xfrm_policy_list);
++EXPORT_SYMBOL(xfrm_dst_lookup);
++EXPORT_SYMBOL(xfrm_policy_register_afinfo);
++EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
++
++EXPORT_SYMBOL_GPL(xfrm_probe_algs);
++EXPORT_SYMBOL_GPL(xfrm_count_auth_supported);
++EXPORT_SYMBOL_GPL(xfrm_count_enc_supported);
++EXPORT_SYMBOL_GPL(xfrm_aalg_get_byidx);
++EXPORT_SYMBOL_GPL(xfrm_ealg_get_byidx);
++EXPORT_SYMBOL_GPL(xfrm_calg_get_byidx);
++EXPORT_SYMBOL_GPL(xfrm_aalg_get_byid);
++EXPORT_SYMBOL_GPL(xfrm_ealg_get_byid);
++EXPORT_SYMBOL_GPL(xfrm_calg_get_byid);
++EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname);
++EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname);
++EXPORT_SYMBOL_GPL(xfrm_calg_get_byname);
++EXPORT_SYMBOL_GPL(skb_icv_walk);
+diff -Nru a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/xfrm_input.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,85 @@
++/*
++ * xfrm_input.c
++ *
++ * Changes:
++ * 	YOSHIFUJI Hideaki @USAGI
++ * 		Split up af-specific portion
++ * 	
++ */
++
++#include <linux/slab.h>
++#include <net/ip.h>
++#include <net/xfrm.h>
++
++static kmem_cache_t *secpath_cachep;
++
++void __secpath_destroy(struct sec_path *sp)
++{
++	int i;
++	for (i = 0; i < sp->len; i++)
++		xfrm_state_put(sp->x[i].xvec);
++	kmem_cache_free(secpath_cachep, sp);
++}
++
++struct sec_path *secpath_dup(struct sec_path *src)
++{
++	struct sec_path *sp;
++
++	sp = kmem_cache_alloc(secpath_cachep, SLAB_ATOMIC);
++	if (!sp)
++		return NULL;
++
++	sp->len = 0;
++	if (src) {
++		int i;
++
++		memcpy(sp, src, sizeof(*sp));
++		for (i = 0; i < sp->len; i++)
++			xfrm_state_hold(sp->x[i].xvec);
++	}
++	atomic_set(&sp->refcnt, 1);
++	return sp;
++}
++
++/* Fetch spi and seq from ipsec header */
++
++int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq)
++{
++	int offset, offset_seq;
++
++	switch (nexthdr) {
++	case IPPROTO_AH:
++		offset = offsetof(struct ip_auth_hdr, spi);
++		offset_seq = offsetof(struct ip_auth_hdr, seq_no);
++		break;
++	case IPPROTO_ESP:
++		offset = offsetof(struct ip_esp_hdr, spi);
++		offset_seq = offsetof(struct ip_esp_hdr, seq_no);
++		break;
++	case IPPROTO_COMP:
++		if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr)))
++			return -EINVAL;
++		*spi = ntohl(ntohs(*(u16*)(skb->h.raw + 2)));
++		*seq = 0;
++		return 0;
++	default:
++		return 1;
++	}
++
++	if (!pskb_may_pull(skb, 16))
++		return -EINVAL;
++
++	*spi = *(u32*)(skb->h.raw + offset);
++	*seq = *(u32*)(skb->h.raw + offset_seq);
++	return 0;
++}
++
++void __init xfrm_input_init(void)
++{
++	secpath_cachep = kmem_cache_create("secpath_cache",
++					   sizeof(struct sec_path),
++					   0, SLAB_HWCACHE_ALIGN,
++					   NULL, NULL);
++	if (!secpath_cachep)
++		panic("XFRM: failed to allocate secpath_cache\n");
++}
+diff -Nru a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/xfrm_output.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,46 @@
++/* 
++ * generic xfrm output routines
++ *
++ * Copyright (c) 2003 James Morris <jmorris at intercode.com.au>
++ *
++ * This program is free software; you can redistribute it and/or modify it
++ * under the terms of the GNU General Public License as published by the Free
++ * Software Foundation; either version 2 of the License, or (at your option) 
++ * any later version.
++ */
++#include <linux/config.h>
++#include <linux/kernel.h>
++#include <linux/skbuff.h>
++#include <net/xfrm.h>
++
++int xfrm_check_output(struct xfrm_state *x,
++                      struct sk_buff *skb, unsigned short family)
++{
++	int err;
++	
++	err = xfrm_state_check_expire(x);
++	if (err)
++		goto out;
++		
++	if (x->props.mode) {
++		switch (family) {
++		case AF_INET:
++			err = xfrm4_tunnel_check_size(skb);
++			break;
++			
++		case AF_INET6:
++			err = xfrm6_tunnel_check_size(skb);
++			break;
++			
++		default:
++			err = -EINVAL;
++		}
++		
++		if (err)
++			goto out;
++	}
++
++	err = xfrm_state_check_space(x, skb);
++out:
++	return err;
++}
+diff -Nru a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/xfrm_policy.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,1259 @@
++/* 
++ * xfrm_policy.c
++ *
++ * Changes:
++ *	Mitsuru KANDA @USAGI
++ * 	Kazunori MIYAZAWA @USAGI
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ * 		IPv6 support
++ * 	Kazunori MIYAZAWA @USAGI
++ * 	YOSHIFUJI Hideaki
++ * 		Split up af-specific portion
++ *	Derek Atkins <derek at ihtfp.com>		Add the post_input processor
++ * 	
++ */
++
++#include <linux/config.h>
++#include <linux/slab.h>
++#include <linux/kmod.h>
++#include <linux/list.h>
++#include <linux/spinlock.h>
++#include <linux/tqueue.h>
++#include <linux/notifier.h>
++#include <linux/netdevice.h>
++#include <net/xfrm.h>
++#include <net/ip.h>
++
++DECLARE_MUTEX(xfrm_cfg_sem);
++
++static rwlock_t xfrm_policy_lock = RW_LOCK_UNLOCKED;
++
++struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2];
++
++static rwlock_t xfrm_policy_afinfo_lock = RW_LOCK_UNLOCKED;
++static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO];
++
++kmem_cache_t *xfrm_dst_cache;
++
++static struct tq_struct xfrm_policy_gc_work;
++static struct list_head xfrm_policy_gc_list =
++	LIST_HEAD_INIT(xfrm_policy_gc_list);
++static spinlock_t xfrm_policy_gc_lock = SPIN_LOCK_UNLOCKED;
++
++static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family);
++static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo);
++
++int xfrm_register_type(struct xfrm_type *type, unsigned short family)
++{
++	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
++	struct xfrm_type_map *typemap;
++	int err = 0;
++
++	if (unlikely(afinfo == NULL))
++		return -EAFNOSUPPORT;
++	typemap = afinfo->type_map;
++
++	write_lock(&typemap->lock);
++	if (likely(typemap->map[type->proto] == NULL))
++		typemap->map[type->proto] = type;
++	else
++		err = -EEXIST;
++	write_unlock(&typemap->lock);
++	xfrm_policy_put_afinfo(afinfo);
++	return err;
++}
++
++int xfrm_unregister_type(struct xfrm_type *type, unsigned short family)
++{
++	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
++	struct xfrm_type_map *typemap;
++	int err = 0;
++
++	if (unlikely(afinfo == NULL))
++		return -EAFNOSUPPORT;
++	typemap = afinfo->type_map;
++
++	write_lock(&typemap->lock);
++	if (unlikely(typemap->map[type->proto] != type))
++		err = -ENOENT;
++	else
++		typemap->map[type->proto] = NULL;
++	write_unlock(&typemap->lock);
++	xfrm_policy_put_afinfo(afinfo);
++	return err;
++}
++
++struct xfrm_type *xfrm_get_type(u8 proto, unsigned short family)
++{
++	struct xfrm_policy_afinfo *afinfo;
++	struct xfrm_type_map *typemap;
++	struct xfrm_type *type;
++	int modload_attempted = 0;
++
++retry:
++	afinfo = xfrm_policy_get_afinfo(family);
++	if (unlikely(afinfo == NULL))
++		return NULL;
++	typemap = afinfo->type_map;
++
++	read_lock(&typemap->lock);
++	type = typemap->map[proto];
++	if (type && type->owner)
++		__MOD_INC_USE_COUNT(type->owner);
++	read_unlock(&typemap->lock);
++	if (!type && !modload_attempted) {
++		char module_name[36];
++
++		xfrm_policy_put_afinfo(afinfo);
++		sprintf(module_name, "xfrm-type-%d-%d",
++			(int) family, (int) proto);
++		request_module(module_name);
++		modload_attempted = 1;
++		goto retry;
++	}
++
++	xfrm_policy_put_afinfo(afinfo);
++	return type;
++}
++
++int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, 
++		    unsigned short family)
++{
++	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
++	int err = 0;
++
++	if (unlikely(afinfo == NULL))
++		return -EAFNOSUPPORT;
++
++	if (likely(afinfo->dst_lookup != NULL))
++		err = afinfo->dst_lookup(dst, fl);
++	else
++		err = -EINVAL;
++	xfrm_policy_put_afinfo(afinfo);
++	return err;
++}
++
++void xfrm_put_type(struct xfrm_type *type)
++{
++	if (type->owner)
++		__MOD_DEC_USE_COUNT(type->owner);
++}
++
++static inline unsigned long make_jiffies(long secs)
++{
++	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
++		return MAX_SCHEDULE_TIMEOUT-1;
++	else
++	        return secs*HZ;
++}
++
++static void xfrm_policy_timer(unsigned long data)
++{
++	struct xfrm_policy *xp = (struct xfrm_policy*)data;
++	unsigned long now = (unsigned long)xtime.tv_sec;
++	long next = LONG_MAX;
++	int warn = 0;
++	int dir;
++
++	read_lock(&xp->lock);
++
++	if (xp->dead)
++		goto out;
++
++	dir = xp->index & 7;
++
++	if (xp->lft.hard_add_expires_seconds) {
++		long tmo = xp->lft.hard_add_expires_seconds +
++			xp->curlft.add_time - now;
++		if (tmo <= 0)
++			goto expired;
++		if (tmo < next)
++			next = tmo;
++	}
++	if (xp->lft.hard_use_expires_seconds) {
++		long tmo = xp->lft.hard_use_expires_seconds +
++			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
++		if (tmo <= 0)
++			goto expired;
++		if (tmo < next)
++			next = tmo;
++	}
++	if (xp->lft.soft_add_expires_seconds) {
++		long tmo = xp->lft.soft_add_expires_seconds +
++			xp->curlft.add_time - now;
++		if (tmo <= 0) {
++			warn = 1;
++			tmo = XFRM_KM_TIMEOUT;
++		}
++		if (tmo < next)
++			next = tmo;
++	}
++	if (xp->lft.soft_use_expires_seconds) {
++		long tmo = xp->lft.soft_use_expires_seconds +
++			(xp->curlft.use_time ? : xp->curlft.add_time) - now;
++		if (tmo <= 0) {
++			warn = 1;
++			tmo = XFRM_KM_TIMEOUT;
++		}
++		if (tmo < next)
++			next = tmo;
++	}
++
++	if (warn)
++		km_policy_expired(xp, dir, 0);
++	if (next != LONG_MAX &&
++	    !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
++		xfrm_pol_hold(xp);
++
++out:
++	read_unlock(&xp->lock);
++	xfrm_pol_put(xp);
++	return;
++
++expired:
++	read_unlock(&xp->lock);
++	km_policy_expired(xp, dir, 1);
++	xfrm_policy_delete(xp, dir);
++	xfrm_pol_put(xp);
++}
++
++
++/* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
++ * SPD calls.
++ */
++
++struct xfrm_policy *xfrm_policy_alloc(int gfp)
++{
++	struct xfrm_policy *policy;
++
++	policy = kmalloc(sizeof(struct xfrm_policy), gfp);
++
++	if (policy) {
++		memset(policy, 0, sizeof(struct xfrm_policy));
++		atomic_set(&policy->refcnt, 1);
++		policy->lock = RW_LOCK_UNLOCKED;
++		init_timer(&policy->timer);
++		policy->timer.data = (unsigned long)policy;
++		policy->timer.function = xfrm_policy_timer;
++	}
++	return policy;
++}
++
++/* Destroy xfrm_policy: descendant resources must be released to this moment. */
++
++void __xfrm_policy_destroy(struct xfrm_policy *policy)
++{
++	if (!policy->dead)
++		BUG();
++
++	if (policy->bundles)
++		BUG();
++
++	if (del_timer(&policy->timer))
++		BUG();
++
++	kfree(policy);
++}
++
++static void xfrm_policy_gc_kill(struct xfrm_policy *policy)
++{
++	struct dst_entry *dst;
++
++	while ((dst = policy->bundles) != NULL) {
++		policy->bundles = dst->next;
++		dst_free(dst);
++	}
++
++	if (del_timer(&policy->timer))
++		atomic_dec(&policy->refcnt);
++
++	if (atomic_read(&policy->refcnt) > 1)
++		flow_cache_flush();
++
++	xfrm_pol_put(policy);
++}
++
++static void xfrm_policy_gc_task(void *data)
++{
++	struct xfrm_policy *policy;
++	struct list_head *entry, *tmp;
++	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
++
++	spin_lock_bh(&xfrm_policy_gc_lock);
++	list_splice_init(&xfrm_policy_gc_list, &gc_list);
++	spin_unlock_bh(&xfrm_policy_gc_lock);
++
++	list_for_each_safe(entry, tmp, &gc_list) {
++		policy = list_entry(entry, struct xfrm_policy, list);
++		xfrm_policy_gc_kill(policy);
++	}
++}
++
++/* Rule must be locked. Release descentant resources, announce
++ * entry dead. The rule must be unlinked from lists to the moment.
++ */
++
++static void xfrm_policy_kill(struct xfrm_policy *policy)
++{
++	write_lock_bh(&policy->lock);
++	if (policy->dead)
++		goto out;
++
++	policy->dead = 1;
++
++	spin_lock(&xfrm_policy_gc_lock);
++	list_add(&policy->list, &xfrm_policy_gc_list);
++	spin_unlock(&xfrm_policy_gc_lock);
++	schedule_task(&xfrm_policy_gc_work);
++
++out:
++	write_unlock_bh(&policy->lock);
++}
++
++/* Generate new index... KAME seems to generate them ordered by cost
++ * of an absolute inpredictability of ordering of rules. This will not pass. */
++static u32 xfrm_gen_index(int dir)
++{
++	u32 idx;
++	struct xfrm_policy *p;
++	static u32 idx_generator;
++
++	for (;;) {
++		idx = (idx_generator | dir);
++		idx_generator += 8;
++		if (idx == 0)
++			idx = 8;
++		for (p = xfrm_policy_list[dir]; p; p = p->next) {
++			if (p->index == idx)
++				break;
++		}
++		if (!p)
++			return idx;
++	}
++}
++
++int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
++{
++	struct xfrm_policy *pol, **p;
++	struct xfrm_policy *delpol = NULL;
++	struct xfrm_policy **newpos = NULL;
++
++	write_lock_bh(&xfrm_policy_lock);
++	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) {
++		if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0) {
++			if (excl) {
++				write_unlock_bh(&xfrm_policy_lock);
++				return -EEXIST;
++			}
++			*p = pol->next;
++			delpol = pol;
++			if (policy->priority > pol->priority)
++				continue;
++		} else if (policy->priority >= pol->priority) {
++			p = &pol->next;
++			continue;
++		}
++		if (!newpos)
++			newpos = p;
++		if (delpol)
++			break;
++		p = &pol->next;
++	}
++	if (newpos)
++		p = newpos;
++	xfrm_pol_hold(policy);
++	policy->next = *p;
++	*p = policy;
++	atomic_inc(&flow_cache_genid);
++	policy->index = delpol ? delpol->index : xfrm_gen_index(dir);
++	policy->curlft.add_time = (unsigned long)xtime.tv_sec;
++	policy->curlft.use_time = 0;
++	if (!mod_timer(&policy->timer, jiffies + HZ))
++		xfrm_pol_hold(policy);
++	write_unlock_bh(&xfrm_policy_lock);
++
++	if (delpol) {
++		xfrm_policy_kill(delpol);
++	}
++	wake_up(&km_waitq);
++	return 0;
++}
++
++struct xfrm_policy *xfrm_policy_bysel(int dir, struct xfrm_selector *sel,
++				      int delete)
++{
++	struct xfrm_policy *pol, **p;
++
++	write_lock_bh(&xfrm_policy_lock);
++	for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) {
++		if (memcmp(sel, &pol->selector, sizeof(*sel)) == 0) {
++			xfrm_pol_hold(pol);
++			if (delete)
++				*p = pol->next;
++			break;
++		}
++	}
++	write_unlock_bh(&xfrm_policy_lock);
++
++	if (pol && delete) {
++		atomic_inc(&flow_cache_genid);
++		xfrm_policy_kill(pol);
++		wake_up(&km_waitq);
++	}
++	return pol;
++}
++
++struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete)
++{
++	struct xfrm_policy *pol, **p;
++
++	write_lock_bh(&xfrm_policy_lock);
++	for (p = &xfrm_policy_list[id & 7]; (pol=*p)!=NULL; p = &pol->next) {
++		if (pol->index == id) {
++			xfrm_pol_hold(pol);
++			if (delete)
++				*p = pol->next;
++			break;
++		}
++	}
++	write_unlock_bh(&xfrm_policy_lock);
++
++	if (pol && delete) {
++		atomic_inc(&flow_cache_genid);
++		xfrm_policy_kill(pol);
++		wake_up(&km_waitq);
++	}
++	return pol;
++}
++
++void xfrm_policy_flush()
++{
++	struct xfrm_policy *xp;
++	int dir;
++
++	write_lock_bh(&xfrm_policy_lock);
++	for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
++		while ((xp = xfrm_policy_list[dir]) != NULL) {
++			xfrm_policy_list[dir] = xp->next;
++			write_unlock_bh(&xfrm_policy_lock);
++
++			xfrm_policy_kill(xp);
++
++			write_lock_bh(&xfrm_policy_lock);
++		}
++	}
++	atomic_inc(&flow_cache_genid);
++	write_unlock_bh(&xfrm_policy_lock);
++	wake_up(&km_waitq);
++}
++
++int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*),
++		     void *data)
++{
++	struct xfrm_policy *xp;
++	int dir;
++	int count = 0;
++	int error = 0;
++
++	read_lock_bh(&xfrm_policy_lock);
++	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
++		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next)
++			count++;
++	}
++
++	if (count == 0) {
++		error = -ENOENT;
++		goto out;
++	}
++
++	for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) {
++		for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) {
++			error = func(xp, dir%XFRM_POLICY_MAX, --count, data);
++			if (error)
++				goto out;
++		}
++	}
++
++out:
++	read_unlock_bh(&xfrm_policy_lock);
++	return error;
++}
++
++
++/* Find policy to apply to this flow. */
++
++static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir,
++			       void **objp, atomic_t **obj_refp)
++{
++	struct xfrm_policy *pol;
++
++	read_lock_bh(&xfrm_policy_lock);
++	for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) {
++		struct xfrm_selector *sel = &pol->selector;
++		int match;
++
++		if (pol->family != family)
++			continue;
++
++		match = xfrm_selector_match(sel, fl, family);
++		if (match) {
++			xfrm_pol_hold(pol);
++			break;
++		}
++	}
++	read_unlock_bh(&xfrm_policy_lock);
++	if ((*objp = (void *) pol) != NULL)
++		*obj_refp = &pol->refcnt;
++}
++
++struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl)
++{
++	struct xfrm_policy *pol;
++
++	read_lock_bh(&xfrm_policy_lock);
++	if ((pol = sk->policy[dir]) != NULL) {
++		int match;
++
++		match = xfrm_selector_match(&pol->selector, fl, sk->family);
++		if (match)
++			xfrm_pol_hold(pol);
++		else
++			pol = NULL;
++	}
++	read_unlock_bh(&xfrm_policy_lock);
++	return pol;
++}
++
++static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
++{
++	pol->next = xfrm_policy_list[dir];
++	xfrm_policy_list[dir] = pol;
++	xfrm_pol_hold(pol);
++}
++
++static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
++						int dir)
++{
++	struct xfrm_policy **polp;
++
++	for (polp = &xfrm_policy_list[dir];
++	     *polp != NULL; polp = &(*polp)->next) {
++		if (*polp == pol) {
++			*polp = pol->next;
++			return pol;
++		}
++	}
++	return NULL;
++}
++
++void xfrm_policy_delete(struct xfrm_policy *pol, int dir)
++{
++	write_lock_bh(&xfrm_policy_lock);
++	pol = __xfrm_policy_unlink(pol, dir);
++	write_unlock_bh(&xfrm_policy_lock);
++	if (pol) {
++		if (dir < XFRM_POLICY_MAX)
++			atomic_inc(&flow_cache_genid);
++		xfrm_policy_kill(pol);
++	}
++}
++
++int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
++{
++	struct xfrm_policy *old_pol;
++
++	write_lock_bh(&xfrm_policy_lock);
++	old_pol = sk->policy[dir];
++	sk->policy[dir] = pol;
++	if (pol) {
++		pol->curlft.add_time = (unsigned long)xtime.tv_sec;
++		pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir);
++		__xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
++	}
++	if (old_pol)
++		__xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
++	write_unlock_bh(&xfrm_policy_lock);
++
++	if (old_pol) {
++		xfrm_policy_kill(old_pol);
++	}
++	wake_up(&km_waitq);
++	return 0;
++}
++
++static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir)
++{
++	struct xfrm_policy *newp = xfrm_policy_alloc(GFP_ATOMIC);
++
++	if (newp) {
++		newp->selector = old->selector;
++		newp->lft = old->lft;
++		newp->curlft = old->curlft;
++		newp->action = old->action;
++		newp->flags = old->flags;
++		newp->xfrm_nr = old->xfrm_nr;
++		newp->index = old->index;
++		memcpy(newp->xfrm_vec, old->xfrm_vec,
++		       newp->xfrm_nr*sizeof(struct xfrm_tmpl));
++		write_lock_bh(&xfrm_policy_lock);
++		__xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
++		write_unlock_bh(&xfrm_policy_lock);
++		xfrm_pol_put(newp);
++	}
++	return newp;
++}
++
++int __xfrm_sk_clone_policy(struct sock *sk)
++{
++	struct xfrm_policy *p0, *p1;
++	p0 = sk->policy[0];
++	p1 = sk->policy[1];
++	sk->policy[0] = NULL;
++	sk->policy[1] = NULL;
++	if (p0 && (sk->policy[0] = clone_policy(p0, 0)) == NULL)
++		return -ENOMEM;
++	if (p1 && (sk->policy[1] = clone_policy(p1, 1)) == NULL)
++		return -ENOMEM;
++	return 0;
++}
++
++/* Resolve list of templates for the flow, given policy. */
++
++static int
++xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl,
++		  struct xfrm_state **xfrm,
++		  unsigned short family)
++{
++	int nx;
++	int i, error;
++	xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
++	xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
++
++	for (nx=0, i = 0; i < policy->xfrm_nr; i++) {
++		struct xfrm_state *x;
++		xfrm_address_t *remote = daddr;
++		xfrm_address_t *local  = saddr;
++		struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
++
++		if (tmpl->mode) {
++			remote = &tmpl->id.daddr;
++			local = &tmpl->saddr;
++		}
++
++		x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
++
++		if (x && x->km.state == XFRM_STATE_VALID) {
++			xfrm[nx++] = x;
++			daddr = remote;
++			saddr = local;
++			continue;
++		}
++		if (x) {
++			error = (x->km.state == XFRM_STATE_ERROR ?
++				 -EINVAL : -EAGAIN);
++			xfrm_state_put(x);
++		}
++
++		if (!tmpl->optional)
++			goto fail;
++	}
++	return nx;
++
++fail:
++	for (nx--; nx>=0; nx--)
++		xfrm_state_put(xfrm[nx]);
++	return error;
++}
++
++/* Check that the bundle accepts the flow and its components are
++ * still valid.
++ */
++
++static struct dst_entry *
++xfrm_find_bundle(struct flowi *fl, struct xfrm_policy *policy, unsigned short family)
++{
++	struct dst_entry *x;
++	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
++	if (unlikely(afinfo == NULL))
++		return ERR_PTR(-EINVAL);
++	x = afinfo->find_bundle(fl, policy);
++	xfrm_policy_put_afinfo(afinfo);
++	return x;
++}
++
++/* Allocate chain of dst_entry's, attach known xfrm's, calculate
++ * all the metrics... Shortly, bundle a bundle.
++ */
++
++static int
++xfrm_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int nx,
++		   struct flowi *fl, struct dst_entry **dst_p,
++		   unsigned short family)
++{
++	int err;
++	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
++	if (unlikely(afinfo == NULL))
++		return -EINVAL;
++	err = afinfo->bundle_create(policy, xfrm, nx, fl, dst_p);
++	xfrm_policy_put_afinfo(afinfo);
++	return err;
++}
++
++static inline int policy_to_flow_dir(int dir)
++{
++	if (XFRM_POLICY_IN == FLOW_DIR_IN &&
++	    XFRM_POLICY_OUT == FLOW_DIR_OUT &&
++	    XFRM_POLICY_FWD == FLOW_DIR_FWD)
++		return dir;
++	switch (dir) {
++	default:
++	case XFRM_POLICY_IN:
++		return FLOW_DIR_IN;
++	case XFRM_POLICY_OUT:
++		return FLOW_DIR_OUT;
++	case XFRM_POLICY_FWD:
++		return FLOW_DIR_FWD;
++	};
++}
++
++static int stale_bundle(struct dst_entry *dst);
++
++/* Main function: finds/creates a bundle for given flow.
++ *
++ * At the moment we eat a raw IP route. Mostly to speed up lookups
++ * on interfaces with disabled IPsec.
++ */
++int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl,
++		struct sock *sk, int flags)
++{
++	struct xfrm_policy *policy;
++	struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
++	struct dst_entry *dst, *dst_orig = *dst_p;
++	int nx = 0;
++	int err;
++	u32 genid;
++	u16 family = dst_orig->ops->family;
++restart:
++	genid = atomic_read(&flow_cache_genid);
++	policy = NULL;
++	if (sk && sk->policy[1])
++		policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
++
++	if (!policy) {
++		/* To accelerate a bit...  */
++		if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT])
++			return 0;
++
++		policy = flow_cache_lookup(fl, family,
++					   policy_to_flow_dir(XFRM_POLICY_OUT),
++					   xfrm_policy_lookup);
++	}
++
++	if (!policy)
++		return 0;
++
++	policy->curlft.use_time = (unsigned long)xtime.tv_sec;
++
++	switch (policy->action) {
++	case XFRM_POLICY_BLOCK:
++		/* Prohibit the flow */
++		xfrm_pol_put(policy);
++		return -EPERM;
++
++	case XFRM_POLICY_ALLOW:
++		if (policy->xfrm_nr == 0) {
++			/* Flow passes not transformed. */
++			xfrm_pol_put(policy);
++			return 0;
++		}
++
++		/* Try to find matching bundle.
++		 *
++		 * LATER: help from flow cache. It is optional, this
++		 * is required only for output policy.
++		 */
++		dst = xfrm_find_bundle(fl, policy, family);
++		if (IS_ERR(dst)) {
++			xfrm_pol_put(policy);
++			return PTR_ERR(dst);
++		}
++
++		if (dst)
++			break;
++
++		nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
++
++		if (unlikely(nx<0)) {
++			err = nx;
++			if (err == -EAGAIN && flags) {
++				DECLARE_WAITQUEUE(wait, current);
++
++				add_wait_queue(&km_waitq, &wait);
++				set_current_state(TASK_INTERRUPTIBLE);
++				schedule();
++				set_current_state(TASK_RUNNING);
++				remove_wait_queue(&km_waitq, &wait);
++
++				nx = xfrm_tmpl_resolve(policy, fl, xfrm, family);
++
++				if (nx == -EAGAIN && signal_pending(current)) {
++					err = -ERESTART;
++					goto error;
++				}
++				if (nx == -EAGAIN ||
++				    genid != atomic_read(&flow_cache_genid)) {
++					xfrm_pol_put(policy);
++					goto restart;
++				}
++				err = nx;
++			}
++			if (err < 0)
++				goto error;
++		}
++		if (nx == 0) {
++			/* Flow passes not transformed. */
++			xfrm_pol_put(policy);
++			return 0;
++		}
++
++		dst = dst_orig;
++		err = xfrm_bundle_create(policy, xfrm, nx, fl, &dst, family);
++
++		if (unlikely(err)) {
++			int i;
++			for (i=0; i<nx; i++)
++				xfrm_state_put(xfrm[i]);
++			goto error;
++		}
++
++		write_lock_bh(&policy->lock);
++		if (unlikely(policy->dead || stale_bundle(dst))) {
++			/* Wow! While we worked on resolving, this
++			 * policy has gone. Retry. It is not paranoia,
++			 * we just cannot enlist new bundle to dead object.
++			 * We can't enlist stable bundles either.
++			 */
++			write_unlock_bh(&policy->lock);
++
++			xfrm_pol_put(policy);
++			if (dst)
++				dst_free(dst);
++			goto restart;
++		}
++		dst->next = policy->bundles;
++		policy->bundles = dst;
++		dst_hold(dst);
++		write_unlock_bh(&policy->lock);
++	}
++	*dst_p = dst;
++	dst_release(dst_orig);
++	xfrm_pol_put(policy);
++	return 0;
++
++error:
++	dst_release(dst_orig);
++	xfrm_pol_put(policy);
++	*dst_p = NULL;
++	return err;
++}
++
++/* When skb is transformed back to its "native" form, we have to
++ * check policy restrictions. At the moment we make this in maximally
++ * stupid way. Shame on me. :-) Of course, connected sockets must
++ * have policy cached at them.
++ */
++
++static inline int
++xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, 
++	      unsigned short family)
++{
++	if (xfrm_state_kern(x))
++		return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, family);
++	return	x->id.proto == tmpl->id.proto &&
++		(x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
++		(x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
++		x->props.mode == tmpl->mode &&
++		(tmpl->aalgos & (1<<x->props.aalgo)) &&
++		!(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family));
++}
++
++static inline int
++xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start,
++	       unsigned short family)
++{
++	int idx = start;
++
++	if (tmpl->optional) {
++		if (!tmpl->mode)
++			return start;
++	} else
++		start = -1;
++	for (; idx < sp->len; idx++) {
++		if (xfrm_state_ok(tmpl, sp->x[idx].xvec, family))
++			return ++idx;
++		if (sp->x[idx].xvec->props.mode)
++			break;
++	}
++	return start;
++}
++
++static int
++_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family)
++{
++	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
++
++	if (unlikely(afinfo == NULL))
++		return -EAFNOSUPPORT;
++
++	afinfo->decode_session(skb, fl);
++	xfrm_policy_put_afinfo(afinfo);
++	return 0;
++}
++
++static inline int secpath_has_tunnel(struct sec_path *sp, int k)
++{
++	for (; k < sp->len; k++) {
++		if (sp->x[k].xvec->props.mode)
++			return 1;
++	}
++
++	return 0;
++}
++
++int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, 
++			unsigned short family)
++{
++	struct xfrm_policy *pol;
++	struct flowi fl;
++
++	if (_decode_session(skb, &fl, family) < 0)
++		return 0;
++
++	/* First, check used SA against their selectors. */
++	if (skb->sp) {
++		int i;
++
++		for (i=skb->sp->len-1; i>=0; i--) {
++		  struct sec_decap_state *xvec = &(skb->sp->x[i]);
++			if (!xfrm_selector_match(&xvec->xvec->sel, &fl, family))
++				return 0;
++
++			/* If there is a post_input processor, try running it */
++			if (xvec->xvec->type->post_input &&
++			    (xvec->xvec->type->post_input)(xvec->xvec,
++							   &(xvec->decap),
++							   skb) != 0)
++				return 0;
++		}
++	}
++
++	pol = NULL;
++	if (sk && sk->policy[dir])
++		pol = xfrm_sk_policy_lookup(sk, dir, &fl);
++
++	if (!pol)
++		pol = flow_cache_lookup(&fl, family,
++					policy_to_flow_dir(dir),
++					xfrm_policy_lookup);
++
++	if (!pol)
++		return !skb->sp || !secpath_has_tunnel(skb->sp, 0);
++
++	pol->curlft.use_time = (unsigned long)xtime.tv_sec;
++
++	if (pol->action == XFRM_POLICY_ALLOW) {
++		struct sec_path *sp;
++		static struct sec_path dummy;
++		int i, k;
++
++		if ((sp = skb->sp) == NULL)
++			sp = &dummy;
++
++		/* For each tunnel xfrm, find the first matching tmpl.
++		 * For each tmpl before that, find corresponding xfrm.
++		 * Order is _important_. Later we will implement
++		 * some barriers, but at the moment barriers
++		 * are implied between each two transformations.
++		 */
++		for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) {
++			k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family);
++			if (k < 0)
++				goto reject;
++		}
++
++		if (secpath_has_tunnel(sp, k))
++			goto reject;
++
++		xfrm_pol_put(pol);
++		return 1;
++	}
++
++reject:
++	xfrm_pol_put(pol);
++	return 0;
++}
++
++int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
++{
++	struct flowi fl;
++
++	if (_decode_session(skb, &fl, family) < 0)
++		return 0;
++
++	return xfrm_lookup(&skb->dst, &fl, NULL, 0) == 0;
++}
++
++/* Optimize later using cookies and generation ids. */
++
++static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
++{
++	if (!stale_bundle(dst))
++		return dst;
++
++	dst_release(dst);
++	return NULL;
++}
++
++static int stale_bundle(struct dst_entry *dst)
++{
++	struct dst_entry *child = dst;
++
++	while (child) {
++		if (child->obsolete > 0 ||
++		    (child->dev && !netif_running(child->dev)) ||
++		    (child->xfrm && child->xfrm->km.state != XFRM_STATE_VALID)) {
++			return 1;
++		}
++		child = child->child;
++	}
++
++	return 0;
++}
++
++static void xfrm_dst_destroy(struct dst_entry *dst)
++{
++	if (!dst->xfrm)
++		return;
++	xfrm_state_put(dst->xfrm);
++	dst->xfrm = NULL;
++}
++
++static void xfrm_link_failure(struct sk_buff *skb)
++{
++	/* Impossible. Such dst must be popped before reaches point of failure. */
++	return;
++}
++
++static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
++{
++	if (dst) {
++		if (dst->obsolete) {
++			dst_release(dst);
++			dst = NULL;
++		}
++	}
++	return dst;
++}
++
++static void xfrm_prune_bundles(int (*func)(struct dst_entry *))
++{
++	int i;
++	struct xfrm_policy *pol;
++	struct dst_entry *dst, **dstp, *gc_list = NULL;
++
++	read_lock_bh(&xfrm_policy_lock);
++	for (i=0; i<2*XFRM_POLICY_MAX; i++) {
++		for (pol = xfrm_policy_list[i]; pol; pol = pol->next) {
++			write_lock(&pol->lock);
++			dstp = &pol->bundles;
++			while ((dst=*dstp) != NULL) {
++				if (func(dst)) {
++					*dstp = dst->next;
++					dst->next = gc_list;
++					gc_list = dst;
++				} else {
++					dstp = &dst->next;
++				}
++			}
++			write_unlock(&pol->lock);
++		}
++	}
++	read_unlock_bh(&xfrm_policy_lock);
++
++	while (gc_list) {
++		dst = gc_list;
++		gc_list = dst->next;
++		dst_free(dst);
++	}
++}
++
++static int unused_bundle(struct dst_entry *dst)
++{
++	return !atomic_read(&dst->__refcnt);
++}
++
++static void __xfrm_garbage_collect(void)
++{
++	xfrm_prune_bundles(unused_bundle);
++}
++
++int xfrm_flush_bundles(void)
++{
++	xfrm_prune_bundles(stale_bundle);
++	return 0;
++}
++
++/* Well... that's _TASK_. We need to scan through transformation
++ * list and figure out what mss tcp should generate in order to
++ * final datagram fit to mtu. Mama mia... :-)
++ *
++ * Apparently, some easy way exists, but we used to choose the most
++ * bizarre ones. :-) So, raising Kalashnikov... tra-ta-ta.
++ *
++ * Consider this function as something like dark humour. :-)
++ */
++static int xfrm_get_mss(struct dst_entry *dst, u32 mtu)
++{
++	int res = mtu - dst->header_len;
++
++	for (;;) {
++		struct dst_entry *d = dst;
++		int m = res;
++
++		do {
++			struct xfrm_state *x = d->xfrm;
++			if (x) {
++				spin_lock_bh(&x->lock);
++				if (x->km.state == XFRM_STATE_VALID &&
++				    x->type && x->type->get_max_size)
++					m = x->type->get_max_size(d->xfrm, m);
++				else
++					m += x->props.header_len;
++				spin_unlock_bh(&x->lock);
++			}
++		} while ((d = d->child) != NULL);
++
++		if (m <= mtu)
++			break;
++		res -= (m - mtu);
++		if (res < 88)
++			return mtu;
++	}
++
++	return res + dst->header_len;
++}
++
++int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
++{
++	int err = 0;
++	if (unlikely(afinfo == NULL))
++		return -EINVAL;
++	if (unlikely(afinfo->family >= NPROTO))
++		return -EAFNOSUPPORT;
++	write_lock(&xfrm_policy_afinfo_lock);
++	if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
++		err = -ENOBUFS;
++	else {
++		struct dst_ops *dst_ops = afinfo->dst_ops;
++		if (likely(dst_ops->kmem_cachep == NULL))
++			dst_ops->kmem_cachep = xfrm_dst_cache;
++		if (likely(dst_ops->check == NULL))
++			dst_ops->check = xfrm_dst_check;
++		if (likely(dst_ops->destroy == NULL))
++			dst_ops->destroy = xfrm_dst_destroy;
++		if (likely(dst_ops->negative_advice == NULL))
++			dst_ops->negative_advice = xfrm_negative_advice;
++		if (likely(dst_ops->link_failure == NULL))
++			dst_ops->link_failure = xfrm_link_failure;
++		if (likely(dst_ops->get_mss == NULL))
++			dst_ops->get_mss = xfrm_get_mss;
++		if (likely(afinfo->garbage_collect == NULL))
++			afinfo->garbage_collect = __xfrm_garbage_collect;
++		xfrm_policy_afinfo[afinfo->family] = afinfo;
++	}
++	write_unlock(&xfrm_policy_afinfo_lock);
++	return err;
++}
++
++int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
++{
++	int err = 0;
++	if (unlikely(afinfo == NULL))
++		return -EINVAL;
++	if (unlikely(afinfo->family >= NPROTO))
++		return -EAFNOSUPPORT;
++	write_lock(&xfrm_policy_afinfo_lock);
++	if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
++		if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
++			err = -EINVAL;
++		else {
++			struct dst_ops *dst_ops = afinfo->dst_ops;
++			xfrm_policy_afinfo[afinfo->family] = NULL;
++			dst_ops->kmem_cachep = NULL;
++			dst_ops->check = NULL;
++			dst_ops->destroy = NULL;
++			dst_ops->negative_advice = NULL;
++			dst_ops->link_failure = NULL;
++			dst_ops->get_mss = NULL;
++			afinfo->garbage_collect = NULL;
++		}
++	}
++	write_unlock(&xfrm_policy_afinfo_lock);
++	return err;
++}
++
++static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
++{
++	struct xfrm_policy_afinfo *afinfo;
++	if (unlikely(family >= NPROTO))
++		return NULL;
++	read_lock(&xfrm_policy_afinfo_lock);
++	afinfo = xfrm_policy_afinfo[family];
++	if (likely(afinfo != NULL))
++		read_lock(&afinfo->lock);
++	read_unlock(&xfrm_policy_afinfo_lock);
++	return afinfo;
++}
++
++static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
++{
++	if (unlikely(afinfo == NULL))
++		return;
++	read_unlock(&afinfo->lock);
++}
++
++static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
++{
++	switch (event) {
++	case NETDEV_DOWN:
++		xfrm_flush_bundles();
++	}
++	return NOTIFY_DONE;
++}
++
++struct notifier_block xfrm_dev_notifier = {
++	xfrm_dev_event,
++	NULL,
++	0
++};
++
++void __init xfrm_policy_init(void)
++{
++	xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
++					   sizeof(struct xfrm_dst),
++					   0, SLAB_HWCACHE_ALIGN,
++					   NULL, NULL);
++	if (!xfrm_dst_cache)
++		panic("XFRM: failed to allocate xfrm_dst_cache\n");
++
++	INIT_TQUEUE(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL);
++	register_netdevice_notifier(&xfrm_dev_notifier);
++}
++
++void __init xfrm_init(void)
++{
++	xfrm_state_init();
++	xfrm_policy_init();
++	xfrm_input_init();
++}
++
+diff -Nru a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/xfrm_state.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,950 @@
++/*
++ * xfrm_state.c
++ *
++ * Changes:
++ *	Mitsuru KANDA @USAGI
++ * 	Kazunori MIYAZAWA @USAGI
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ * 		IPv6 support
++ * 	YOSHIFUJI Hideaki @USAGI
++ * 		Split up af-specific functions
++ *	Derek Atkins <derek at ihtfp.com>
++ *		Add UDP Encapsulation
++ * 	
++ */
++
++#include <net/xfrm.h>
++#include <linux/pfkeyv2.h>
++#include <linux/ipsec.h>
++#include <asm/uaccess.h>
++#include <linux/tqueue.h>
++
++/* Each xfrm_state may be linked to two tables:
++
++   1. Hash table by (spi,daddr,ah/esp) to find SA by SPI. (input,ctl)
++   2. Hash table by daddr to find what SAs exist for given
++      destination/tunnel endpoint. (output)
++ */
++
++static spinlock_t xfrm_state_lock = SPIN_LOCK_UNLOCKED;
++
++/* Hash table to find appropriate SA towards given target (endpoint
++ * of tunnel or destination of transport mode) allowed by selector.
++ *
++ * Main use is finding SA after policy selected tunnel or transport mode.
++ * Also, it can be used by ah/esp icmp error handler to find offending SA.
++ */
++static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE];
++static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE];
++
++DECLARE_WAIT_QUEUE_HEAD(km_waitq);
++
++static rwlock_t xfrm_state_afinfo_lock = RW_LOCK_UNLOCKED;
++static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO];
++
++static struct tq_struct xfrm_state_gc_work;
++static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list);
++static spinlock_t xfrm_state_gc_lock = SPIN_LOCK_UNLOCKED;
++
++static void __xfrm_state_delete(struct xfrm_state *x);
++
++static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family);
++static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
++
++static void xfrm_state_gc_destroy(struct xfrm_state *x)
++{
++	if (del_timer(&x->timer))
++		BUG();
++	if (x->aalg)
++		kfree(x->aalg);
++	if (x->ealg)
++		kfree(x->ealg);
++	if (x->calg)
++		kfree(x->calg);
++	if (x->encap)
++		kfree(x->encap);
++	if (x->type) {
++		x->type->destructor(x);
++		xfrm_put_type(x->type);
++	}
++	kfree(x);
++}
++
++static void xfrm_state_gc_task(void *data)
++{
++	struct xfrm_state *x;
++	struct list_head *entry, *tmp;
++	struct list_head gc_list = LIST_HEAD_INIT(gc_list);
++
++	spin_lock_bh(&xfrm_state_gc_lock);
++	list_splice_init(&xfrm_state_gc_list, &gc_list);
++	spin_unlock_bh(&xfrm_state_gc_lock);
++
++	list_for_each_safe(entry, tmp, &gc_list) {
++		x = list_entry(entry, struct xfrm_state, bydst);
++		xfrm_state_gc_destroy(x);
++	}
++	wake_up(&km_waitq);
++}
++
++static inline unsigned long make_jiffies(long secs)
++{
++	if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
++		return MAX_SCHEDULE_TIMEOUT-1;
++	else
++	        return secs*HZ;
++}
++
++static void xfrm_timer_handler(unsigned long data)
++{
++	struct xfrm_state *x = (struct xfrm_state*)data;
++	unsigned long now = (unsigned long)xtime.tv_sec;
++	long next = LONG_MAX;
++	int warn = 0;
++
++	spin_lock(&x->lock);
++	if (x->km.state == XFRM_STATE_DEAD)
++		goto out;
++	if (x->km.state == XFRM_STATE_EXPIRED)
++		goto expired;
++	if (x->lft.hard_add_expires_seconds) {
++		long tmo = x->lft.hard_add_expires_seconds +
++			x->curlft.add_time - now;
++		if (tmo <= 0)
++			goto expired;
++		if (tmo < next)
++			next = tmo;
++	}
++	if (x->lft.hard_use_expires_seconds) {
++		long tmo = x->lft.hard_use_expires_seconds +
++			(x->curlft.use_time ? : now) - now;
++		if (tmo <= 0)
++			goto expired;
++		if (tmo < next)
++			next = tmo;
++	}
++	if (x->km.dying)
++		goto resched;
++	if (x->lft.soft_add_expires_seconds) {
++		long tmo = x->lft.soft_add_expires_seconds +
++			x->curlft.add_time - now;
++		if (tmo <= 0)
++			warn = 1;
++		else if (tmo < next)
++			next = tmo;
++	}
++	if (x->lft.soft_use_expires_seconds) {
++		long tmo = x->lft.soft_use_expires_seconds +
++			(x->curlft.use_time ? : now) - now;
++		if (tmo <= 0)
++			warn = 1;
++		else if (tmo < next)
++			next = tmo;
++	}
++
++	if (warn)
++		km_state_expired(x, 0);
++resched:
++	if (next != LONG_MAX &&
++	    !mod_timer(&x->timer, jiffies + make_jiffies(next)))
++		xfrm_state_hold(x);
++	goto out;
++
++expired:
++	if (x->km.state == XFRM_STATE_ACQ && x->id.spi == 0) {
++		x->km.state = XFRM_STATE_EXPIRED;
++		wake_up(&km_waitq);
++		next = 2;
++		goto resched;
++	}
++	if (x->id.spi != 0)
++		km_state_expired(x, 1);
++	__xfrm_state_delete(x);
++
++out:
++	spin_unlock(&x->lock);
++	xfrm_state_put(x);
++}
++
++struct xfrm_state *xfrm_state_alloc(void)
++{
++	struct xfrm_state *x;
++
++	x = kmalloc(sizeof(struct xfrm_state), GFP_ATOMIC);
++
++	if (x) {
++		memset(x, 0, sizeof(struct xfrm_state));
++		atomic_set(&x->refcnt, 1);
++		atomic_set(&x->tunnel_users, 0);
++		INIT_LIST_HEAD(&x->bydst);
++		INIT_LIST_HEAD(&x->byspi);
++		init_timer(&x->timer);
++		x->timer.function = xfrm_timer_handler;
++		x->timer.data	  = (unsigned long)x;
++		x->curlft.add_time = (unsigned long)xtime.tv_sec;
++		x->lft.soft_byte_limit = XFRM_INF;
++		x->lft.soft_packet_limit = XFRM_INF;
++		x->lft.hard_byte_limit = XFRM_INF;
++		x->lft.hard_packet_limit = XFRM_INF;
++		x->lock = SPIN_LOCK_UNLOCKED;
++	}
++	return x;
++}
++
++void __xfrm_state_destroy(struct xfrm_state *x)
++{
++	BUG_TRAP(x->km.state == XFRM_STATE_DEAD);
++
++	spin_lock_bh(&xfrm_state_gc_lock);
++	list_add(&x->bydst, &xfrm_state_gc_list);
++	spin_unlock_bh(&xfrm_state_gc_lock);
++	schedule_task(&xfrm_state_gc_work);
++}
++
++static void __xfrm_state_delete(struct xfrm_state *x)
++{
++	if (x->km.state != XFRM_STATE_DEAD) {
++		x->km.state = XFRM_STATE_DEAD;
++		spin_lock(&xfrm_state_lock);
++		list_del(&x->bydst);
++		atomic_dec(&x->refcnt);
++		if (x->id.spi) {
++			list_del(&x->byspi);
++			atomic_dec(&x->refcnt);
++		}
++		spin_unlock(&xfrm_state_lock);
++		if (del_timer(&x->timer))
++			atomic_dec(&x->refcnt);
++
++		/* The number two in this test is the reference
++		 * mentioned in the comment below plus the reference
++		 * our caller holds.  A larger value means that
++		 * there are DSTs attached to this xfrm_state.
++		 */
++		if (atomic_read(&x->refcnt) > 2)
++			xfrm_flush_bundles();
++
++		/* All xfrm_state objects are created by xfrm_state_alloc.
++		 * The xfrm_state_alloc call gives a reference, and that
++		 * is what we are dropping here.
++		 */
++		atomic_dec(&x->refcnt);
++	}
++}
++
++void xfrm_state_delete(struct xfrm_state *x)
++{
++	spin_lock_bh(&x->lock);
++	__xfrm_state_delete(x);
++	spin_unlock_bh(&x->lock);
++}
++
++void xfrm_state_flush(u8 proto)
++{
++	int i;
++	struct xfrm_state *x;
++
++	spin_lock_bh(&xfrm_state_lock);
++	for (i = 0; i < XFRM_DST_HSIZE; i++) {
++restart:
++		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
++			if (!xfrm_state_kern(x) &&
++			    (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) {
++				xfrm_state_hold(x);
++				spin_unlock_bh(&xfrm_state_lock);
++
++				xfrm_state_delete(x);
++				xfrm_state_put(x);
++
++				spin_lock_bh(&xfrm_state_lock);
++				goto restart;
++			}
++		}
++	}
++	spin_unlock_bh(&xfrm_state_lock);
++	wake_up(&km_waitq);
++}
++
++static int
++xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl,
++		  struct xfrm_tmpl *tmpl,
++		  xfrm_address_t *daddr, xfrm_address_t *saddr,
++		  unsigned short family)
++{
++	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
++	if (!afinfo)
++		return -1;
++	afinfo->init_tempsel(x, fl, tmpl, daddr, saddr);
++	xfrm_state_put_afinfo(afinfo);
++	return 0;
++}
++
++struct xfrm_state *
++xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, 
++		struct flowi *fl, struct xfrm_tmpl *tmpl,
++		struct xfrm_policy *pol, int *err,
++		unsigned short family)
++{
++	unsigned h = xfrm_dst_hash(daddr, family);
++	struct xfrm_state *x;
++	int acquire_in_progress = 0;
++	int error = 0;
++	struct xfrm_state *best = NULL;
++
++	spin_lock_bh(&xfrm_state_lock);
++	list_for_each_entry(x, xfrm_state_bydst+h, bydst) {
++		if (x->props.family == family &&
++		    x->props.reqid == tmpl->reqid &&
++		    xfrm_state_addr_check(x, daddr, saddr, family) &&
++		    tmpl->mode == x->props.mode &&
++		    tmpl->id.proto == x->id.proto) {
++			/* Resolution logic:
++			   1. There is a valid state with matching selector.
++			      Done.
++			   2. Valid state with inappropriate selector. Skip.
++
++			   Entering area of "sysdeps".
++
++			   3. If state is not valid, selector is temporary,
++			      it selects only session which triggered
++			      previous resolution. Key manager will do
++			      something to install a state with proper
++			      selector.
++			 */
++			if (x->km.state == XFRM_STATE_VALID) {
++				if (!xfrm_selector_match(&x->sel, fl, family))
++					continue;
++				if (!best ||
++				    best->km.dying > x->km.dying ||
++				    (best->km.dying == x->km.dying &&
++				     best->curlft.add_time < x->curlft.add_time))
++					best = x;
++			} else if (x->km.state == XFRM_STATE_ACQ) {
++				acquire_in_progress = 1;
++			} else if (x->km.state == XFRM_STATE_ERROR ||
++				   x->km.state == XFRM_STATE_EXPIRED) {
++				if (xfrm_selector_match(&x->sel, fl, family))
++					error = 1;
++			}
++		}
++	}
++
++	x = best;
++	if (!x && !error && !acquire_in_progress &&
++	    ((x = xfrm_state_alloc()) != NULL)) {
++		/* Initialize temporary selector matching only
++		 * to current session. */
++		xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family);
++
++		if (km_query(x, tmpl, pol) == 0) {
++			x->km.state = XFRM_STATE_ACQ;
++			list_add_tail(&x->bydst, xfrm_state_bydst+h);
++			xfrm_state_hold(x);
++			if (x->id.spi) {
++				h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
++				list_add(&x->byspi, xfrm_state_byspi+h);
++				xfrm_state_hold(x);
++			}
++			x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES;
++			xfrm_state_hold(x);
++			x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
++			add_timer(&x->timer);
++		} else {
++			x->km.state = XFRM_STATE_DEAD;
++			xfrm_state_put(x);
++			x = NULL;
++			error = 1;
++		}
++	}
++	if (x)
++		xfrm_state_hold(x);
++	else
++		*err = acquire_in_progress ? -EAGAIN :
++			(error ? -ESRCH : -ENOMEM);
++	spin_unlock_bh(&xfrm_state_lock);
++	return x;
++}
++
++static void __xfrm_state_insert(struct xfrm_state *x)
++{
++	unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family);
++
++	list_add(&x->bydst, xfrm_state_bydst+h);
++	xfrm_state_hold(x);
++
++	h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
++
++	list_add(&x->byspi, xfrm_state_byspi+h);
++	xfrm_state_hold(x);
++
++	if (!mod_timer(&x->timer, jiffies + HZ))
++		xfrm_state_hold(x);
++
++	wake_up(&km_waitq);
++}
++
++void xfrm_state_insert(struct xfrm_state *x)
++{
++	spin_lock_bh(&xfrm_state_lock);
++	__xfrm_state_insert(x);
++	spin_unlock_bh(&xfrm_state_lock);
++}
++
++static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq);
++
++int xfrm_state_add(struct xfrm_state *x)
++{
++	struct xfrm_state_afinfo *afinfo;
++	struct xfrm_state *x1;
++	int family;
++	int err;
++
++	family = x->props.family;
++	afinfo = xfrm_state_get_afinfo(family);
++	if (unlikely(afinfo == NULL))
++		return -EAFNOSUPPORT;
++
++	spin_lock_bh(&xfrm_state_lock);
++
++	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
++	if (x1) {
++		xfrm_state_put(x1);
++		x1 = NULL;
++		err = -EEXIST;
++		goto out;
++	}
++
++	if (x->km.seq) {
++		x1 = __xfrm_find_acq_byseq(x->km.seq);
++		if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) {
++			xfrm_state_put(x1);
++			x1 = NULL;
++		}
++	}
++
++	if (!x1)
++		x1 = afinfo->find_acq(
++			x->props.mode, x->props.reqid, x->id.proto,
++			&x->id.daddr, &x->props.saddr, 0);
++
++	__xfrm_state_insert(x);
++	err = 0;
++
++out:
++	spin_unlock_bh(&xfrm_state_lock);
++	xfrm_state_put_afinfo(afinfo);
++
++	if (x1) {
++		xfrm_state_delete(x1);
++		xfrm_state_put(x1);
++	}
++
++	return err;
++}
++
++int xfrm_state_update(struct xfrm_state *x)
++{
++	struct xfrm_state_afinfo *afinfo;
++	struct xfrm_state *x1;
++	int err;
++
++	afinfo = xfrm_state_get_afinfo(x->props.family);
++	if (unlikely(afinfo == NULL))
++		return -EAFNOSUPPORT;
++
++	spin_lock_bh(&xfrm_state_lock);
++	x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto);
++
++	err = -ESRCH;
++	if (!x1)
++		goto out;
++
++	if (xfrm_state_kern(x1)) {
++		xfrm_state_put(x1);
++		err = -EEXIST;
++		goto out;
++	}
++
++	if (x1->km.state == XFRM_STATE_ACQ) {
++		__xfrm_state_insert(x);
++		x = NULL;
++	}
++	err = 0;
++
++out:
++	spin_unlock_bh(&xfrm_state_lock);
++	xfrm_state_put_afinfo(afinfo);
++
++	if (err)
++		return err;
++
++	if (!x) {
++		xfrm_state_delete(x1);
++		xfrm_state_put(x1);
++		return 0;
++	}
++
++	err = -EINVAL;
++	spin_lock_bh(&x1->lock);
++	if (likely(x1->km.state == XFRM_STATE_VALID)) {
++		if (x->encap && x1->encap)
++			memcpy(x1->encap, x->encap, sizeof(*x1->encap));
++		memcpy(&x1->lft, &x->lft, sizeof(x1->lft));
++		x1->km.dying = 0;
++
++		if (!mod_timer(&x1->timer, jiffies + HZ))
++			xfrm_state_hold(x1);
++		if (x1->curlft.use_time)
++			xfrm_state_check_expire(x1);
++
++		err = 0;
++	}
++	spin_unlock_bh(&x1->lock);
++
++	xfrm_state_put(x1);
++
++	return err;
++}
++
++int xfrm_state_check_expire(struct xfrm_state *x)
++{
++	if (!x->curlft.use_time)
++		x->curlft.use_time = (unsigned long)xtime.tv_sec;
++
++	if (x->km.state != XFRM_STATE_VALID)
++		return -EINVAL;
++
++	if (x->curlft.bytes >= x->lft.hard_byte_limit ||
++	    x->curlft.packets >= x->lft.hard_packet_limit) {
++		km_state_expired(x, 1);
++		if (!mod_timer(&x->timer, jiffies + XFRM_ACQ_EXPIRES*HZ))
++			xfrm_state_hold(x);
++		return -EINVAL;
++	}
++
++	if (!x->km.dying &&
++	    (x->curlft.bytes >= x->lft.soft_byte_limit ||
++	     x->curlft.packets >= x->lft.soft_packet_limit))
++		km_state_expired(x, 0);
++	return 0;
++}
++
++static int xfrm_state_check_space(struct xfrm_state *x, struct sk_buff *skb)
++{
++	int nhead = x->props.header_len + LL_RESERVED_SPACE(skb->dst->dev)
++		- skb_headroom(skb);
++
++	if (nhead > 0)
++		return pskb_expand_head(skb, nhead, 0, GFP_ATOMIC);
++
++	/* Check tail too... */
++	return 0;
++}
++
++int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb)
++{
++	int err = xfrm_state_check_expire(x);
++	if (err < 0)
++		goto err;
++	err = xfrm_state_check_space(x, skb);
++err:
++	return err;
++}
++
++struct xfrm_state *
++xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto,
++		  unsigned short family)
++{
++	struct xfrm_state *x;
++	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
++	if (!afinfo)
++		return NULL;
++
++	spin_lock_bh(&xfrm_state_lock);
++	x = afinfo->state_lookup(daddr, spi, proto);
++	spin_unlock_bh(&xfrm_state_lock);
++	xfrm_state_put_afinfo(afinfo);
++	return x;
++}
++
++struct xfrm_state *
++xfrm_find_acq(u8 mode, u32 reqid, u8 proto, 
++	      xfrm_address_t *daddr, xfrm_address_t *saddr, 
++	      int create, unsigned short family)
++{
++	struct xfrm_state *x;
++	struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
++	if (!afinfo)
++		return NULL;
++
++	spin_lock_bh(&xfrm_state_lock);
++	x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create);
++	spin_unlock_bh(&xfrm_state_lock);
++	xfrm_state_put_afinfo(afinfo);
++	return x;
++}
++
++/* Silly enough, but I'm lazy to build resolution list */
++
++static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq)
++{
++	int i;
++	struct xfrm_state *x;
++
++	for (i = 0; i < XFRM_DST_HSIZE; i++) {
++		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
++			if (x->km.seq == seq) {
++				xfrm_state_hold(x);
++				return x;
++			}
++		}
++	}
++	return NULL;
++}
++
++struct xfrm_state *xfrm_find_acq_byseq(u32 seq)
++{
++	struct xfrm_state *x;
++
++	spin_lock_bh(&xfrm_state_lock);
++	x = __xfrm_find_acq_byseq(seq);
++	spin_unlock_bh(&xfrm_state_lock);
++	return x;
++}
++ 
++u32 xfrm_get_acqseq(void)
++{
++	u32 res;
++	static u32 acqseq;
++	static spinlock_t acqseq_lock = SPIN_LOCK_UNLOCKED;
++
++	spin_lock_bh(&acqseq_lock);
++	res = (++acqseq ? : ++acqseq);
++	spin_unlock_bh(&acqseq_lock);
++	return res;
++}
++
++void
++xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi)
++{
++	u32 h;
++	struct xfrm_state *x0;
++
++	if (x->id.spi)
++		return;
++
++	if (minspi == maxspi) {
++		x0 = xfrm_state_lookup(&x->id.daddr, minspi, x->id.proto, x->props.family);
++		if (x0) {
++			xfrm_state_put(x0);
++			return;
++		}
++		x->id.spi = minspi;
++	} else {
++		u32 spi = 0;
++		minspi = ntohl(minspi);
++		maxspi = ntohl(maxspi);
++		for (h=0; h<maxspi-minspi+1; h++) {
++			spi = minspi + net_random()%(maxspi-minspi+1);
++			x0 = xfrm_state_lookup(&x->id.daddr, htonl(spi), x->id.proto, x->props.family);
++			if (x0 == NULL) {
++				x->id.spi = htonl(spi);
++				break;
++			}
++			xfrm_state_put(x0);
++		}
++	}
++	if (x->id.spi) {
++		spin_lock_bh(&xfrm_state_lock);
++		h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family);
++		list_add(&x->byspi, xfrm_state_byspi+h);
++		xfrm_state_hold(x);
++		spin_unlock_bh(&xfrm_state_lock);
++		wake_up(&km_waitq);
++	}
++}
++
++int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*),
++		    void *data)
++{
++	int i;
++	struct xfrm_state *x;
++	int count = 0;
++	int err = 0;
++
++	spin_lock_bh(&xfrm_state_lock);
++	for (i = 0; i < XFRM_DST_HSIZE; i++) {
++		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
++			if (proto == IPSEC_PROTO_ANY || x->id.proto == proto)
++				count++;
++		}
++	}
++	if (count == 0) {
++		err = -ENOENT;
++		goto out;
++	}
++
++	for (i = 0; i < XFRM_DST_HSIZE; i++) {
++		list_for_each_entry(x, xfrm_state_bydst+i, bydst) {
++			if (proto != IPSEC_PROTO_ANY && x->id.proto != proto)
++				continue;
++			err = func(x, --count, data);
++			if (err)
++				goto out;
++		}
++	}
++out:
++	spin_unlock_bh(&xfrm_state_lock);
++	return err;
++}
++
++
++int xfrm_replay_check(struct xfrm_state *x, u32 seq)
++{
++	u32 diff;
++
++	seq = ntohl(seq);
++
++	if (unlikely(seq == 0))
++		return -EINVAL;
++
++	if (likely(seq > x->replay.seq))
++		return 0;
++
++	diff = x->replay.seq - seq;
++	if (diff >= x->props.replay_window) {
++		x->stats.replay_window++;
++		return -EINVAL;
++	}
++
++	if (x->replay.bitmap & (1U << diff)) {
++		x->stats.replay++;
++		return -EINVAL;
++	}
++	return 0;
++}
++
++void xfrm_replay_advance(struct xfrm_state *x, u32 seq)
++{
++	u32 diff;
++
++	seq = ntohl(seq);
++
++	if (seq > x->replay.seq) {
++		diff = seq - x->replay.seq;
++		if (diff < x->props.replay_window)
++			x->replay.bitmap = ((x->replay.bitmap) << diff) | 1;
++		else
++			x->replay.bitmap = 1;
++		x->replay.seq = seq;
++	} else {
++		diff = x->replay.seq - seq;
++		x->replay.bitmap |= (1U << diff);
++	}
++}
++
++static struct list_head xfrm_km_list = LIST_HEAD_INIT(xfrm_km_list);
++static rwlock_t		xfrm_km_lock = RW_LOCK_UNLOCKED;
++
++void km_state_expired(struct xfrm_state *x, int hard)
++{
++	struct xfrm_mgr *km;
++
++	if (hard)
++		x->km.state = XFRM_STATE_EXPIRED;
++	else
++		x->km.dying = 1;
++
++	read_lock(&xfrm_km_lock);
++	list_for_each_entry(km, &xfrm_km_list, list)
++		km->notify(x, hard);
++	read_unlock(&xfrm_km_lock);
++
++	if (hard)
++		wake_up(&km_waitq);
++}
++
++int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol)
++{
++	int err = -EINVAL;
++	struct xfrm_mgr *km;
++
++	read_lock(&xfrm_km_lock);
++	list_for_each_entry(km, &xfrm_km_list, list) {
++		err = km->acquire(x, t, pol, XFRM_POLICY_OUT);
++		if (!err)
++			break;
++	}
++	read_unlock(&xfrm_km_lock);
++	return err;
++}
++
++int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport)
++{
++	int err = -EINVAL;
++	struct xfrm_mgr *km;
++
++	read_lock(&xfrm_km_lock);
++	list_for_each_entry(km, &xfrm_km_list, list) {
++		if (km->new_mapping)
++			err = km->new_mapping(x, ipaddr, sport);
++		if (!err)
++			break;
++	}
++	read_unlock(&xfrm_km_lock);
++	return err;
++}
++
++void km_policy_expired(struct xfrm_policy *pol, int dir, int hard)
++{
++	struct xfrm_mgr *km;
++
++	read_lock(&xfrm_km_lock);
++	list_for_each_entry(km, &xfrm_km_list, list)
++		if (km->notify_policy)
++			km->notify_policy(pol, dir, hard);
++	read_unlock(&xfrm_km_lock);
++
++	if (hard)
++		wake_up(&km_waitq);
++}
++
++int xfrm_user_policy(struct sock *sk, int optname, u8 *optval, int optlen)
++{
++	int err;
++	u8 *data;
++	struct xfrm_mgr *km;
++	struct xfrm_policy *pol = NULL;
++
++	if (optlen <= 0 || optlen > PAGE_SIZE)
++		return -EMSGSIZE;
++
++	data = kmalloc(optlen, GFP_KERNEL);
++	if (!data)
++		return -ENOMEM;
++
++	err = -EFAULT;
++	if (copy_from_user(data, optval, optlen))
++		goto out;
++
++	err = -EINVAL;
++	read_lock(&xfrm_km_lock);
++	list_for_each_entry(km, &xfrm_km_list, list) {
++		pol = km->compile_policy(sk->family, optname, data, optlen, &err);
++		if (err >= 0)
++			break;
++	}
++	read_unlock(&xfrm_km_lock);
++
++	if (err >= 0) {
++		xfrm_sk_policy_insert(sk, err, pol);
++		xfrm_pol_put(pol);
++		err = 0;
++	}
++
++out:
++	kfree(data);
++	return err;
++}
++
++int xfrm_register_km(struct xfrm_mgr *km)
++{
++	write_lock_bh(&xfrm_km_lock);
++	list_add_tail(&km->list, &xfrm_km_list);
++	write_unlock_bh(&xfrm_km_lock);
++	return 0;
++}
++
++int xfrm_unregister_km(struct xfrm_mgr *km)
++{
++	write_lock_bh(&xfrm_km_lock);
++	list_del(&km->list);
++	write_unlock_bh(&xfrm_km_lock);
++	return 0;
++}
++
++int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo)
++{
++	int err = 0;
++	if (unlikely(afinfo == NULL))
++		return -EINVAL;
++	if (unlikely(afinfo->family >= NPROTO))
++		return -EAFNOSUPPORT;
++	write_lock(&xfrm_state_afinfo_lock);
++	if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL))
++		err = -ENOBUFS;
++	else {
++		afinfo->state_bydst = xfrm_state_bydst;
++		afinfo->state_byspi = xfrm_state_byspi;
++		xfrm_state_afinfo[afinfo->family] = afinfo;
++	}
++	write_unlock(&xfrm_state_afinfo_lock);
++	return err;
++}
++
++int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo)
++{
++	int err = 0;
++	if (unlikely(afinfo == NULL))
++		return -EINVAL;
++	if (unlikely(afinfo->family >= NPROTO))
++		return -EAFNOSUPPORT;
++	write_lock(&xfrm_state_afinfo_lock);
++	if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) {
++		if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo))
++			err = -EINVAL;
++		else {
++			xfrm_state_afinfo[afinfo->family] = NULL;
++			afinfo->state_byspi = NULL;
++			afinfo->state_bydst = NULL;
++		}
++	}
++	write_unlock(&xfrm_state_afinfo_lock);
++	return err;
++}
++
++static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family)
++{
++	struct xfrm_state_afinfo *afinfo;
++	if (unlikely(family >= NPROTO))
++		return NULL;
++	read_lock(&xfrm_state_afinfo_lock);
++	afinfo = xfrm_state_afinfo[family];
++	if (likely(afinfo != NULL))
++		read_lock(&afinfo->lock);
++	read_unlock(&xfrm_state_afinfo_lock);
++	return afinfo;
++}
++
++static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo)
++{
++	if (unlikely(afinfo == NULL))
++		return;
++	read_unlock(&afinfo->lock);
++}
++
++/* Temporarily located here until net/xfrm/xfrm_tunnel.c is created */
++void xfrm_state_delete_tunnel(struct xfrm_state *x)
++{
++	if (x->tunnel) {
++		struct xfrm_state *t = x->tunnel;
++
++		if (atomic_read(&t->tunnel_users) == 2)
++			xfrm_state_delete(t);
++		atomic_dec(&t->tunnel_users);
++		xfrm_state_put(t);
++		x->tunnel = NULL;
++	}
++}
++
++void __init xfrm_state_init(void)
++{
++	int i;
++
++	for (i=0; i<XFRM_DST_HSIZE; i++) {
++		INIT_LIST_HEAD(&xfrm_state_bydst[i]);
++		INIT_LIST_HEAD(&xfrm_state_byspi[i]);
++	}
++	INIT_TQUEUE(&xfrm_state_gc_work, xfrm_state_gc_task, NULL);
++}
++
+diff -Nru a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
+--- /dev/null	Wed Dec 31 16:00:00 196900
++++ b/net/xfrm/xfrm_user.c	2005-02-13 21:25:10 +11:00
+@@ -0,0 +1,1253 @@
++/* xfrm_user.c: User interface to configure xfrm engine.
++ *
++ * Copyright (C) 2002 David S. Miller (davem at redhat.com)
++ *
++ * Changes:
++ *	Mitsuru KANDA @USAGI
++ * 	Kazunori MIYAZAWA @USAGI
++ * 	Kunihiro Ishiguro <kunihiro at ipinfusion.com>
++ * 		IPv6 support
++ * 	
++ */
++
++#include <linux/module.h>
++#include <linux/kernel.h>
++#include <linux/types.h>
++#include <linux/slab.h>
++#include <linux/socket.h>
++#include <linux/string.h>
++#include <linux/net.h>
++#include <linux/skbuff.h>
++#include <linux/netlink.h>
++#include <linux/rtnetlink.h>
++#include <linux/pfkeyv2.h>
++#include <linux/ipsec.h>
++#include <linux/init.h>
++#include <net/sock.h>
++#include <net/xfrm.h>
++#include <asm/uaccess.h>
++
++static struct sock *xfrm_nl;
++
++static int verify_one_alg(struct rtattr **xfrma, enum xfrm_attr_type_t type)
++{
++	struct rtattr *rt = xfrma[type - 1];
++	struct xfrm_algo *algp;
++
++	if (!rt)
++		return 0;
++
++	if ((rt->rta_len - sizeof(*rt)) < sizeof(*algp))
++		return -EINVAL;
++
++	algp = RTA_DATA(rt);
++	switch (type) {
++	case XFRMA_ALG_AUTH:
++		if (!algp->alg_key_len &&
++		    strcmp(algp->alg_name, "digest_null") != 0)
++			return -EINVAL;
++		break;
++
++	case XFRMA_ALG_CRYPT:
++		if (!algp->alg_key_len &&
++		    strcmp(algp->alg_name, "cipher_null") != 0)
++			return -EINVAL;
++		break;
++
++	case XFRMA_ALG_COMP:
++		/* Zero length keys are legal.  */
++		break;
++
++	default:
++		return -EINVAL;
++	};
++
++	algp->alg_name[CRYPTO_MAX_ALG_NAME - 1] = '\0';
++	return 0;
++}
++
++static int verify_encap_tmpl(struct rtattr **xfrma)
++{
++	struct rtattr *rt = xfrma[XFRMA_ENCAP - 1];
++	struct xfrm_encap_tmpl *encap;
++
++	if (!rt)
++		return 0;
++
++	if ((rt->rta_len - sizeof(*rt)) < sizeof(*encap))
++		return -EINVAL;
++
++	return 0;
++}
++
++static int verify_newsa_info(struct xfrm_usersa_info *p,
++			     struct rtattr **xfrma)
++{
++	int err;
++
++	err = -EINVAL;
++	switch (p->family) {
++	case AF_INET:
++		break;
++
++	case AF_INET6:
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++		break;
++#else
++		err = -EAFNOSUPPORT;
++		goto out;
++#endif
++
++	default:
++		goto out;
++	};
++
++	err = -EINVAL;
++	switch (p->id.proto) {
++	case IPPROTO_AH:
++		if (!xfrma[XFRMA_ALG_AUTH-1]	||
++		    xfrma[XFRMA_ALG_CRYPT-1]	||
++		    xfrma[XFRMA_ALG_COMP-1])
++			goto out;
++		break;
++
++	case IPPROTO_ESP:
++		if ((!xfrma[XFRMA_ALG_AUTH-1] &&
++		     !xfrma[XFRMA_ALG_CRYPT-1])	||
++		    xfrma[XFRMA_ALG_COMP-1])
++			goto out;
++		break;
++
++	case IPPROTO_COMP:
++		if (!xfrma[XFRMA_ALG_COMP-1]	||
++		    xfrma[XFRMA_ALG_AUTH-1]	||
++		    xfrma[XFRMA_ALG_CRYPT-1])
++			goto out;
++		break;
++
++	default:
++		goto out;
++	};
++
++	if ((err = verify_one_alg(xfrma, XFRMA_ALG_AUTH)))
++		goto out;
++	if ((err = verify_one_alg(xfrma, XFRMA_ALG_CRYPT)))
++		goto out;
++	if ((err = verify_one_alg(xfrma, XFRMA_ALG_COMP)))
++		goto out;
++	if ((err = verify_encap_tmpl(xfrma)))
++		goto out;
++
++	err = -EINVAL;
++	switch (p->mode) {
++	case 0:
++	case 1:
++		break;
++
++	default:
++		goto out;
++	};
++
++	err = 0;
++
++out:
++	return err;
++}
++
++static int attach_one_algo(struct xfrm_algo **algpp, u8 *props,
++			   struct xfrm_algo_desc *(*get_byname)(char *),
++			   struct rtattr *u_arg)
++{
++	struct rtattr *rta = u_arg;
++	struct xfrm_algo *p, *ualg;
++	struct xfrm_algo_desc *algo;
++
++	if (!rta)
++		return 0;
++
++	ualg = RTA_DATA(rta);
++
++	algo = get_byname(ualg->alg_name);
++	if (!algo)
++		return -ENOSYS;
++	*props = algo->desc.sadb_alg_id;
++
++	p = kmalloc(sizeof(*ualg) + ualg->alg_key_len, GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, ualg, sizeof(*ualg) + ualg->alg_key_len);
++	*algpp = p;
++	return 0;
++}
++
++static int attach_encap_tmpl(struct xfrm_encap_tmpl **encapp, struct rtattr *u_arg)
++{
++	struct rtattr *rta = u_arg;
++	struct xfrm_encap_tmpl *p, *uencap;
++
++	if (!rta)
++		return 0;
++
++	uencap = RTA_DATA(rta);
++	p = kmalloc(sizeof(*p), GFP_KERNEL);
++	if (!p)
++		return -ENOMEM;
++
++	memcpy(p, uencap, sizeof(*p));
++	*encapp = p;
++	return 0;
++}
++
++static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
++{
++	memcpy(&x->id, &p->id, sizeof(x->id));
++	memcpy(&x->sel, &p->sel, sizeof(x->sel));
++	memcpy(&x->lft, &p->lft, sizeof(x->lft));
++	x->props.mode = p->mode;
++	x->props.replay_window = p->replay_window;
++	x->props.reqid = p->reqid;
++	x->props.family = p->family;
++	x->props.saddr = p->saddr;
++	x->props.flags = p->flags;
++}
++
++static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p,
++					       struct rtattr **xfrma,
++					       int *errp)
++{
++	struct xfrm_state *x = xfrm_state_alloc();
++	int err = -ENOMEM;
++
++	if (!x)
++		goto error_no_put;
++
++	copy_from_user_state(x, p);
++
++	if ((err = attach_one_algo(&x->aalg, &x->props.aalgo,
++				   xfrm_aalg_get_byname,
++				   xfrma[XFRMA_ALG_AUTH-1])))
++		goto error;
++	if ((err = attach_one_algo(&x->ealg, &x->props.ealgo,
++				   xfrm_ealg_get_byname,
++				   xfrma[XFRMA_ALG_CRYPT-1])))
++		goto error;
++	if ((err = attach_one_algo(&x->calg, &x->props.calgo,
++				   xfrm_calg_get_byname,
++				   xfrma[XFRMA_ALG_COMP-1])))
++		goto error;
++	if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1])))
++		goto error;
++
++	err = -ENOENT;
++	x->type = xfrm_get_type(x->id.proto, x->props.family);
++	if (x->type == NULL)
++		goto error;
++
++	err = x->type->init_state(x, NULL);
++	if (err)
++		goto error;
++
++	x->curlft.add_time = (unsigned long) xtime.tv_sec;
++	x->km.state = XFRM_STATE_VALID;
++	x->km.seq = p->seq;
++
++	return x;
++
++error:
++	x->km.state = XFRM_STATE_DEAD;
++	xfrm_state_put(x);
++error_no_put:
++	*errp = err;
++	return NULL;
++}
++
++static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	struct xfrm_usersa_info *p = NLMSG_DATA(nlh);
++	struct xfrm_state *x;
++	int err;
++
++	err = verify_newsa_info(p, (struct rtattr **) xfrma);
++	if (err)
++		return err;
++
++	xfrm_probe_algs();
++
++	x = xfrm_state_construct(p, (struct rtattr **) xfrma, &err);
++	if (!x)
++		return err;
++
++	if (nlh->nlmsg_type == XFRM_MSG_NEWSA)
++		err = xfrm_state_add(x);
++	else
++		err = xfrm_state_update(x);
++
++	if (err < 0) {
++		x->km.state = XFRM_STATE_DEAD;
++		xfrm_state_put(x);
++	}
++
++	return err;
++}
++
++static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	struct xfrm_state *x;
++	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
++
++	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
++	if (x == NULL)
++		return -ESRCH;
++
++	if (xfrm_state_kern(x)) {
++		xfrm_state_put(x);
++		return -EPERM;
++	}
++
++	xfrm_state_delete(x);
++	xfrm_state_put(x);
++
++	return 0;
++}
++
++static void copy_to_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p)
++{
++	memcpy(&p->id, &x->id, sizeof(p->id));
++	memcpy(&p->sel, &x->sel, sizeof(p->sel));
++	memcpy(&p->lft, &x->lft, sizeof(p->lft));
++	memcpy(&p->curlft, &x->curlft, sizeof(p->curlft));
++	memcpy(&p->stats, &x->stats, sizeof(p->stats));
++	p->saddr = x->props.saddr;
++	p->mode = x->props.mode;
++	p->replay_window = x->props.replay_window;
++	p->reqid = x->props.reqid;
++	p->family = x->props.family;
++	p->flags = x->props.flags;
++	p->seq = x->km.seq;
++}
++
++struct xfrm_dump_info {
++	struct sk_buff *in_skb;
++	struct sk_buff *out_skb;
++	u32 nlmsg_seq;
++	u16 nlmsg_flags;
++	int start_idx;
++	int this_idx;
++};
++
++static int dump_one_state(struct xfrm_state *x, int count, void *ptr)
++{
++	struct xfrm_dump_info *sp = ptr;
++	struct sk_buff *in_skb = sp->in_skb;
++	struct sk_buff *skb = sp->out_skb;
++	struct xfrm_usersa_info *p;
++	struct nlmsghdr *nlh;
++	unsigned char *b = skb->tail;
++
++	if (sp->this_idx < sp->start_idx)
++		goto out;
++
++	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
++			sp->nlmsg_seq,
++			XFRM_MSG_NEWSA, sizeof(*p));
++	nlh->nlmsg_flags = sp->nlmsg_flags;
++
++	p = NLMSG_DATA(nlh);
++	copy_to_user_state(x, p);
++
++	if (x->aalg)
++		RTA_PUT(skb, XFRMA_ALG_AUTH,
++			sizeof(*(x->aalg))+(x->aalg->alg_key_len+7)/8, x->aalg);
++	if (x->ealg)
++		RTA_PUT(skb, XFRMA_ALG_CRYPT,
++			sizeof(*(x->ealg))+(x->ealg->alg_key_len+7)/8, x->ealg);
++	if (x->calg)
++		RTA_PUT(skb, XFRMA_ALG_COMP, sizeof(*(x->calg)), x->calg);
++
++	if (x->encap)
++		RTA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap);
++
++	nlh->nlmsg_len = skb->tail - b;
++out:
++	sp->this_idx++;
++	return 0;
++
++nlmsg_failure:
++rtattr_failure:
++	skb_trim(skb, b - skb->data);
++	return -1;
++}
++
++static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb)
++{
++	struct xfrm_dump_info info;
++
++	info.in_skb = cb->skb;
++	info.out_skb = skb;
++	info.nlmsg_seq = cb->nlh->nlmsg_seq;
++	info.nlmsg_flags = NLM_F_MULTI;
++	info.this_idx = 0;
++	info.start_idx = cb->args[0];
++	(void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info);
++	cb->args[0] = info.this_idx;
++
++	return skb->len;
++}
++
++static struct sk_buff *xfrm_state_netlink(struct sk_buff *in_skb,
++					  struct xfrm_state *x, u32 seq)
++{
++	struct xfrm_dump_info info;
++	struct sk_buff *skb;
++
++	skb = alloc_skb(NLMSG_GOODSIZE, GFP_ATOMIC);
++	if (!skb)
++		return ERR_PTR(-ENOMEM);
++
++	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
++	info.in_skb = in_skb;
++	info.out_skb = skb;
++	info.nlmsg_seq = seq;
++	info.nlmsg_flags = 0;
++	info.this_idx = info.start_idx = 0;
++
++	if (dump_one_state(x, 0, &info)) {
++		kfree_skb(skb);
++		return NULL;
++	}
++
++	return skb;
++}
++
++static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	struct xfrm_usersa_id *p = NLMSG_DATA(nlh);
++	struct xfrm_state *x;
++	struct sk_buff *resp_skb;
++	int err;
++
++	x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family);
++	err = -ESRCH;
++	if (x == NULL)
++		goto out_noput;
++
++	resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
++	if (IS_ERR(resp_skb)) {
++		err = PTR_ERR(resp_skb);
++	} else {
++		err = netlink_unicast(xfrm_nl, resp_skb,
++				      NETLINK_CB(skb).pid, MSG_DONTWAIT);
++	}
++	xfrm_state_put(x);
++out_noput:
++	return err;
++}
++
++static int verify_userspi_info(struct xfrm_userspi_info *p)
++{
++	switch (p->info.id.proto) {
++	case IPPROTO_AH:
++	case IPPROTO_ESP:
++		break;
++
++	case IPPROTO_COMP:
++		/* IPCOMP spi is 16-bits. */
++		if (p->max >= 0x10000)
++			return -EINVAL;
++		break;
++
++	default:
++		return -EINVAL;
++	};
++
++	if (p->min > p->max)
++		return -EINVAL;
++
++	return 0;
++}
++
++static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	struct xfrm_state *x;
++	struct xfrm_userspi_info *p;
++	struct sk_buff *resp_skb;
++	xfrm_address_t *daddr;
++	int family;
++	int err;
++
++	p = NLMSG_DATA(nlh);
++	err = verify_userspi_info(p);
++	if (err)
++		goto out_noput;
++
++	family = p->info.family;
++	daddr = &p->info.id.daddr;
++
++	x = NULL;
++	if (p->info.seq) {
++		x = xfrm_find_acq_byseq(p->info.seq);
++		if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) {
++			xfrm_state_put(x);
++			x = NULL;
++		}
++	}
++
++	if (!x)
++		x = xfrm_find_acq(p->info.mode, p->info.reqid,
++				  p->info.id.proto, daddr,
++				  &p->info.saddr, 1,
++				  family);
++	err = -ENOENT;
++	if (x == NULL)
++		goto out_noput;
++
++	resp_skb = ERR_PTR(-ENOENT);
++
++	spin_lock_bh(&x->lock);
++	if (x->km.state != XFRM_STATE_DEAD) {
++		xfrm_alloc_spi(x, htonl(p->min), htonl(p->max));
++		if (x->id.spi)
++			resp_skb = xfrm_state_netlink(skb, x, nlh->nlmsg_seq);
++	}
++	spin_unlock_bh(&x->lock);
++
++	if (IS_ERR(resp_skb)) {
++		err = PTR_ERR(resp_skb);
++		goto out;
++	}
++
++	err = netlink_unicast(xfrm_nl, resp_skb,
++			      NETLINK_CB(skb).pid, MSG_DONTWAIT);
++
++out:
++	xfrm_state_put(x);
++out_noput:
++	return err;
++}
++
++static int verify_policy_dir(__u8 dir)
++{
++	switch (dir) {
++	case XFRM_POLICY_IN:
++	case XFRM_POLICY_OUT:
++	case XFRM_POLICY_FWD:
++		break;
++
++	default:
++		return -EINVAL;
++	};
++
++	return 0;
++}
++
++static int verify_newpolicy_info(struct xfrm_userpolicy_info *p)
++{
++	switch (p->share) {
++	case XFRM_SHARE_ANY:
++	case XFRM_SHARE_SESSION:
++	case XFRM_SHARE_USER:
++	case XFRM_SHARE_UNIQUE:
++		break;
++
++	default:
++		return -EINVAL;
++	};
++
++	switch (p->action) {
++	case XFRM_POLICY_ALLOW:
++	case XFRM_POLICY_BLOCK:
++		break;
++
++	default:
++		return -EINVAL;
++	};
++
++	switch (p->sel.family) {
++	case AF_INET:
++		break;
++
++	case AF_INET6:
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++		break;
++#else
++		return  -EAFNOSUPPORT;
++#endif
++
++	default:
++		return -EINVAL;
++	};
++
++	return verify_policy_dir(p->dir);
++}
++
++static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut,
++			   int nr)
++{
++	int i;
++
++	xp->xfrm_nr = nr;
++	for (i = 0; i < nr; i++, ut++) {
++		struct xfrm_tmpl *t = &xp->xfrm_vec[i];
++
++		memcpy(&t->id, &ut->id, sizeof(struct xfrm_id));
++		memcpy(&t->saddr, &ut->saddr,
++		       sizeof(xfrm_address_t));
++		t->reqid = ut->reqid;
++		t->mode = ut->mode;
++		t->share = ut->share;
++		t->optional = ut->optional;
++		t->aalgos = ut->aalgos;
++		t->ealgos = ut->ealgos;
++		t->calgos = ut->calgos;
++	}
++}
++
++static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma)
++{
++	struct rtattr *rt = xfrma[XFRMA_TMPL-1];
++	struct xfrm_user_tmpl *utmpl;
++	int nr;
++
++	if (!rt) {
++		pol->xfrm_nr = 0;
++	} else {
++		nr = (rt->rta_len - sizeof(*rt)) / sizeof(*utmpl);
++
++		if (nr > XFRM_MAX_DEPTH)
++			return -EINVAL;
++
++		copy_templates(pol, RTA_DATA(rt), nr);
++	}
++	return 0;
++}
++
++static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p)
++{
++	xp->priority = p->priority;
++	xp->index = p->index;
++	memcpy(&xp->selector, &p->sel, sizeof(xp->selector));
++	memcpy(&xp->lft, &p->lft, sizeof(xp->lft));
++	xp->action = p->action;
++	xp->flags = p->flags;
++	xp->family = p->sel.family;
++	/* XXX xp->share = p->share; */
++}
++
++static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p, int dir)
++{
++	memcpy(&p->sel, &xp->selector, sizeof(p->sel));
++	memcpy(&p->lft, &xp->lft, sizeof(p->lft));
++	memcpy(&p->curlft, &xp->curlft, sizeof(p->curlft));
++	p->priority = xp->priority;
++	p->index = xp->index;
++	p->sel.family = xp->family;
++	p->dir = dir;
++	p->action = xp->action;
++	p->flags = xp->flags;
++	p->share = XFRM_SHARE_ANY; /* XXX xp->share */
++}
++
++static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, struct rtattr **xfrma, int *errp)
++{
++	struct xfrm_policy *xp = xfrm_policy_alloc(GFP_KERNEL);
++	int err;
++
++	if (!xp) {
++		*errp = -ENOMEM;
++		return NULL;
++	}
++
++	copy_from_user_policy(xp, p);
++	err = copy_from_user_tmpl(xp, xfrma);
++	if (err) {
++		*errp = err;
++		kfree(xp);
++		xp = NULL;
++	}
++
++	return xp;
++}
++
++static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	struct xfrm_userpolicy_info *p = NLMSG_DATA(nlh);
++	struct xfrm_policy *xp;
++	int err;
++	int excl;
++
++	err = verify_newpolicy_info(p);
++	if (err)
++		return err;
++
++	xp = xfrm_policy_construct(p, (struct rtattr **) xfrma, &err);
++	if (!xp)
++		return err;
++
++	excl = nlh->nlmsg_type == XFRM_MSG_NEWPOLICY;
++	err = xfrm_policy_insert(p->dir, xp, excl);
++	if (err) {
++		kfree(xp);
++		return err;
++	}
++
++	xfrm_pol_put(xp);
++
++	return 0;
++}
++
++static int copy_to_user_tmpl(struct xfrm_policy *xp, struct sk_buff *skb)
++{
++	struct xfrm_user_tmpl vec[XFRM_MAX_DEPTH];
++	int i;
++
++	if (xp->xfrm_nr == 0)
++		return 0;
++
++	for (i = 0; i < xp->xfrm_nr; i++) {
++		struct xfrm_user_tmpl *up = &vec[i];
++		struct xfrm_tmpl *kp = &xp->xfrm_vec[i];
++
++		memcpy(&up->id, &kp->id, sizeof(up->id));
++		up->family = xp->family;
++		memcpy(&up->saddr, &kp->saddr, sizeof(up->saddr));
++		up->reqid = kp->reqid;
++		up->mode = kp->mode;
++		up->share = kp->share;
++		up->optional = kp->optional;
++		up->aalgos = kp->aalgos;
++		up->ealgos = kp->ealgos;
++		up->calgos = kp->calgos;
++	}
++	RTA_PUT(skb, XFRMA_TMPL,
++		(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr),
++		vec);
++
++	return 0;
++
++rtattr_failure:
++	return -1;
++}
++
++static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr)
++{
++	struct xfrm_dump_info *sp = ptr;
++	struct xfrm_userpolicy_info *p;
++	struct sk_buff *in_skb = sp->in_skb;
++	struct sk_buff *skb = sp->out_skb;
++	struct nlmsghdr *nlh;
++	unsigned char *b = skb->tail;
++
++	if (sp->this_idx < sp->start_idx)
++		goto out;
++
++	nlh = NLMSG_PUT(skb, NETLINK_CB(in_skb).pid,
++			sp->nlmsg_seq,
++			XFRM_MSG_NEWPOLICY, sizeof(*p));
++	p = NLMSG_DATA(nlh);
++	nlh->nlmsg_flags = sp->nlmsg_flags;
++
++	copy_to_user_policy(xp, p, dir);
++	if (copy_to_user_tmpl(xp, skb) < 0)
++		goto nlmsg_failure;
++
++	nlh->nlmsg_len = skb->tail - b;
++out:
++	sp->this_idx++;
++	return 0;
++
++nlmsg_failure:
++	skb_trim(skb, b - skb->data);
++	return -1;
++}
++
++static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb)
++{
++	struct xfrm_dump_info info;
++
++	info.in_skb = cb->skb;
++	info.out_skb = skb;
++	info.nlmsg_seq = cb->nlh->nlmsg_seq;
++	info.nlmsg_flags = NLM_F_MULTI;
++	info.this_idx = 0;
++	info.start_idx = cb->args[0];
++	(void) xfrm_policy_walk(dump_one_policy, &info);
++	cb->args[0] = info.this_idx;
++
++	return skb->len;
++}
++
++static struct sk_buff *xfrm_policy_netlink(struct sk_buff *in_skb,
++					  struct xfrm_policy *xp,
++					  int dir, u32 seq)
++{
++	struct xfrm_dump_info info;
++	struct sk_buff *skb;
++
++	skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
++	if (!skb)
++		return ERR_PTR(-ENOMEM);
++
++	NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid;
++	info.in_skb = in_skb;
++	info.out_skb = skb;
++	info.nlmsg_seq = seq;
++	info.nlmsg_flags = 0;
++	info.this_idx = info.start_idx = 0;
++
++	if (dump_one_policy(xp, dir, 0, &info) < 0) {
++		kfree_skb(skb);
++		return NULL;
++	}
++
++	return skb;
++}
++
++static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	struct xfrm_policy *xp;
++	struct xfrm_userpolicy_id *p;
++	int err;
++	int delete;
++
++	p = NLMSG_DATA(nlh);
++	delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY;
++
++	err = verify_policy_dir(p->dir);
++	if (err)
++		return err;
++
++	if (p->index)
++		xp = xfrm_policy_byid(p->dir, p->index, delete);
++	else
++		xp = xfrm_policy_bysel(p->dir, &p->sel, delete);
++	if (xp == NULL)
++		return -ENOENT;
++
++	if (!delete) {
++		struct sk_buff *resp_skb;
++
++		resp_skb = xfrm_policy_netlink(skb, xp, p->dir, nlh->nlmsg_seq);
++		if (IS_ERR(resp_skb)) {
++			err = PTR_ERR(resp_skb);
++		} else {
++			err = netlink_unicast(xfrm_nl, resp_skb,
++					      NETLINK_CB(skb).pid,
++					      MSG_DONTWAIT);
++		}
++	}
++
++	xfrm_pol_put(xp);
++
++	return err;
++}
++
++static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	struct xfrm_usersa_flush *p = NLMSG_DATA(nlh);
++
++	xfrm_state_flush(p->proto);
++	return 0;
++}
++
++static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma)
++{
++	xfrm_policy_flush();
++	return 0;
++}
++
++static const int xfrm_msg_min[(XFRM_MSG_MAX + 1 - XFRM_MSG_BASE)] = {
++	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* NEW SA */
++	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* DEL SA */
++	NLMSG_LENGTH(sizeof(struct xfrm_usersa_id)),	/* GET SA */
++	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* NEW POLICY */
++	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* DEL POLICY */
++	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_id)),  /* GET POLICY */
++	NLMSG_LENGTH(sizeof(struct xfrm_userspi_info)),	/* ALLOC SPI */
++	NLMSG_LENGTH(sizeof(struct xfrm_user_acquire)),	/* ACQUIRE */
++	NLMSG_LENGTH(sizeof(struct xfrm_user_expire)),	/* EXPIRE */
++	NLMSG_LENGTH(sizeof(struct xfrm_userpolicy_info)),/* UPD POLICY */
++	NLMSG_LENGTH(sizeof(struct xfrm_usersa_info)),	/* UPD SA */
++	NLMSG_LENGTH(sizeof(struct xfrm_user_polexpire)), /* POLEXPIRE */
++	NLMSG_LENGTH(sizeof(struct xfrm_usersa_flush)),	/* FLUSH SA */
++	NLMSG_LENGTH(0),				/* FLUSH POLICY */
++};
++
++static struct xfrm_link {
++	int (*doit)(struct sk_buff *, struct nlmsghdr *, void **);
++	int (*dump)(struct sk_buff *, struct netlink_callback *);
++} xfrm_dispatch[] = {
++	{	.doit	=	xfrm_add_sa, 		},
++	{	.doit	=	xfrm_del_sa, 		},
++	{
++		.doit	=	xfrm_get_sa,
++		.dump	=	xfrm_dump_sa,
++	},
++	{	.doit	=	xfrm_add_policy 	},
++	{	.doit	=	xfrm_get_policy 	},
++	{
++		.doit	=	xfrm_get_policy,
++		.dump	=	xfrm_dump_policy,
++	},
++	{	.doit	=	xfrm_alloc_userspi	},
++	{},
++	{},
++	{	.doit	=	xfrm_add_policy 	},
++	{	.doit	=	xfrm_add_sa, 		},
++	{},
++	{	.doit	=	xfrm_flush_sa		},
++	{	.doit	=	xfrm_flush_policy	},
++};
++
++static int xfrm_done(struct netlink_callback *cb)
++{
++	return 0;
++}
++
++static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, int *errp)
++{
++	struct rtattr *xfrma[XFRMA_MAX];
++	struct xfrm_link *link;
++	int type, min_len;
++
++	if (!(nlh->nlmsg_flags & NLM_F_REQUEST))
++		return 0;
++
++	type = nlh->nlmsg_type;
++
++	/* A control message: ignore them */
++	if (type < XFRM_MSG_BASE)
++		return 0;
++
++	/* Unknown message: reply with EINVAL */
++	if (type > XFRM_MSG_MAX)
++		goto err_einval;
++
++	type -= XFRM_MSG_BASE;
++	link = &xfrm_dispatch[type];
++
++	/* All operations require privileges, even GET */
++	if (!cap_raised(NETLINK_CB(skb).eff_cap, CAP_NET_ADMIN)) {
++		*errp = -EPERM;
++		return -1;
++	}
++
++	if ((type == 2 || type == 5) && (nlh->nlmsg_flags & NLM_F_DUMP)) {
++		u32 rlen;
++
++		if (link->dump == NULL)
++			goto err_einval;
++
++		if ((*errp = netlink_dump_start(xfrm_nl, skb, nlh,
++						link->dump,
++						xfrm_done)) != 0) {
++			return -1;
++		}
++		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
++		if (rlen > skb->len)
++			rlen = skb->len;
++		skb_pull(skb, rlen);
++		return -1;
++	}
++
++	memset(xfrma, 0, sizeof(xfrma));
++
++	if (nlh->nlmsg_len < (min_len = xfrm_msg_min[type]))
++		goto err_einval;
++
++	if (nlh->nlmsg_len > min_len) {
++		int attrlen = nlh->nlmsg_len - NLMSG_ALIGN(min_len);
++		struct rtattr *attr = (void *) nlh + NLMSG_ALIGN(min_len);
++
++		while (RTA_OK(attr, attrlen)) {
++			unsigned short flavor = attr->rta_type;
++			if (flavor) {
++				if (flavor > XFRMA_MAX)
++					goto err_einval;
++				xfrma[flavor - 1] = attr;
++			}
++			attr = RTA_NEXT(attr, attrlen);
++		}
++	}
++
++	if (link->doit == NULL)
++		goto err_einval;
++	*errp = link->doit(skb, nlh, (void **) &xfrma);
++
++	return *errp;
++
++err_einval:
++	*errp = -EINVAL;
++	return -1;
++}
++
++static int xfrm_user_rcv_skb(struct sk_buff *skb)
++{
++	int err;
++	struct nlmsghdr *nlh;
++
++	while (skb->len >= NLMSG_SPACE(0)) {
++		u32 rlen;
++
++		nlh = (struct nlmsghdr *) skb->data;
++		if (nlh->nlmsg_len < sizeof(*nlh) ||
++		    skb->len < nlh->nlmsg_len)
++			return 0;
++		rlen = NLMSG_ALIGN(nlh->nlmsg_len);
++		if (rlen > skb->len)
++			rlen = skb->len;
++		if (xfrm_user_rcv_msg(skb, nlh, &err) < 0) {
++			if (err == 0)
++				return -1;
++			netlink_ack(skb, nlh, err);
++		} else if (nlh->nlmsg_flags & NLM_F_ACK)
++			netlink_ack(skb, nlh, 0);
++		skb_pull(skb, rlen);
++	}
++
++	return 0;
++}
++
++static void xfrm_netlink_rcv(struct sock *sk, int len)
++{
++	do {
++		struct sk_buff *skb;
++
++		down(&xfrm_cfg_sem);
++
++		while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
++			if (xfrm_user_rcv_skb(skb)) {
++				if (skb->len)
++					skb_queue_head(&sk->receive_queue, skb);
++				else
++					kfree_skb(skb);
++				break;
++			}
++			kfree_skb(skb);
++		}
++
++		up(&xfrm_cfg_sem);
++
++	} while (xfrm_nl && xfrm_nl->receive_queue.qlen);
++}
++
++static int build_expire(struct sk_buff *skb, struct xfrm_state *x, int hard)
++{
++	struct xfrm_user_expire *ue;
++	struct nlmsghdr *nlh;
++	unsigned char *b = skb->tail;
++
++	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_EXPIRE,
++			sizeof(*ue));
++	ue = NLMSG_DATA(nlh);
++	nlh->nlmsg_flags = 0;
++
++	copy_to_user_state(x, &ue->state);
++	ue->hard = (hard != 0) ? 1 : 0;
++
++	nlh->nlmsg_len = skb->tail - b;
++	return skb->len;
++
++nlmsg_failure:
++	skb_trim(skb, b - skb->data);
++	return -1;
++}
++
++static int xfrm_send_state_notify(struct xfrm_state *x, int hard)
++{
++	struct sk_buff *skb;
++
++	skb = alloc_skb(sizeof(struct xfrm_user_expire) + 16, GFP_ATOMIC);
++	if (skb == NULL)
++		return -ENOMEM;
++
++	if (build_expire(skb, x, hard) < 0)
++		BUG();
++
++	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
++
++	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
++}
++
++static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
++			 struct xfrm_tmpl *xt, struct xfrm_policy *xp,
++			 int dir)
++{
++	struct xfrm_user_acquire *ua;
++	struct nlmsghdr *nlh;
++	unsigned char *b = skb->tail;
++	__u32 seq = xfrm_get_acqseq();
++
++	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_ACQUIRE,
++			sizeof(*ua));
++	ua = NLMSG_DATA(nlh);
++	nlh->nlmsg_flags = 0;
++
++	memcpy(&ua->id, &x->id, sizeof(ua->id));
++	memcpy(&ua->saddr, &x->props.saddr, sizeof(ua->saddr));
++	memcpy(&ua->sel, &x->sel, sizeof(ua->sel));
++	copy_to_user_policy(xp, &ua->policy, dir);
++	ua->aalgos = xt->aalgos;
++	ua->ealgos = xt->ealgos;
++	ua->calgos = xt->calgos;
++	ua->seq = x->km.seq = seq;
++
++	if (copy_to_user_tmpl(xp, skb) < 0)
++		goto nlmsg_failure;
++
++	nlh->nlmsg_len = skb->tail - b;
++	return skb->len;
++
++nlmsg_failure:
++	skb_trim(skb, b - skb->data);
++	return -1;
++}
++
++static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
++			     struct xfrm_policy *xp, int dir)
++{
++	struct sk_buff *skb;
++	size_t len;
++
++	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
++	len += NLMSG_SPACE(sizeof(struct xfrm_user_acquire));
++	skb = alloc_skb(len, GFP_ATOMIC);
++	if (skb == NULL)
++		return -ENOMEM;
++
++	if (build_acquire(skb, x, xt, xp, dir) < 0)
++		BUG();
++
++	NETLINK_CB(skb).dst_groups = XFRMGRP_ACQUIRE;
++
++	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_ACQUIRE, GFP_ATOMIC);
++}
++
++/* User gives us xfrm_user_policy_info followed by an array of 0
++ * or more templates.
++ */
++struct xfrm_policy *xfrm_compile_policy(u16 family, int opt,
++                                        u8 *data, int len, int *dir)
++{
++	struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data;
++	struct xfrm_user_tmpl *ut = (struct xfrm_user_tmpl *) (p + 1);
++	struct xfrm_policy *xp;
++	int nr;
++
++	switch (family) {
++	case AF_INET:
++		if (opt != IP_XFRM_POLICY) {
++			*dir = -EOPNOTSUPP;
++			return NULL;
++		}
++		break;
++#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
++	case AF_INET6:
++		if (opt != IPV6_XFRM_POLICY) {
++			*dir = -EOPNOTSUPP;
++			return NULL;
++		}
++		break;
++#endif
++	default:
++		*dir = -EINVAL;
++		return NULL;
++	}
++
++	*dir = -EINVAL;
++
++	if (len < sizeof(*p) ||
++	    verify_newpolicy_info(p))
++		return NULL;
++
++	nr = ((len - sizeof(*p)) / sizeof(*ut));
++	if (nr > XFRM_MAX_DEPTH)
++		return NULL;
++
++	xp = xfrm_policy_alloc(GFP_KERNEL);
++	if (xp == NULL) {
++		*dir = -ENOBUFS;
++		return NULL;
++	}
++
++	copy_from_user_policy(xp, p);
++	copy_templates(xp, ut, nr);
++
++	*dir = p->dir;
++
++	return xp;
++}
++
++static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
++			   int dir, int hard)
++{
++	struct xfrm_user_polexpire *upe;
++	struct nlmsghdr *nlh;
++	unsigned char *b = skb->tail;
++
++	nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe));
++	upe = NLMSG_DATA(nlh);
++	nlh->nlmsg_flags = 0;
++
++	copy_to_user_policy(xp, &upe->pol, dir);
++	if (copy_to_user_tmpl(xp, skb) < 0)
++		goto nlmsg_failure;
++	upe->hard = !!hard;
++
++	nlh->nlmsg_len = skb->tail - b;
++	return skb->len;
++
++nlmsg_failure:
++	skb_trim(skb, b - skb->data);
++	return -1;
++}
++
++static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, int hard)
++{
++	struct sk_buff *skb;
++	size_t len;
++
++	len = RTA_SPACE(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr);
++	len += NLMSG_SPACE(sizeof(struct xfrm_user_polexpire));
++	skb = alloc_skb(len, GFP_ATOMIC);
++	if (skb == NULL)
++		return -ENOMEM;
++
++	if (build_polexpire(skb, xp, dir, hard) < 0)
++		BUG();
++
++	NETLINK_CB(skb).dst_groups = XFRMGRP_EXPIRE;
++
++	return netlink_broadcast(xfrm_nl, skb, 0, XFRMGRP_EXPIRE, GFP_ATOMIC);
++}
++
++static struct xfrm_mgr netlink_mgr = {
++	.id		= "netlink",
++	.notify		= xfrm_send_state_notify,
++	.acquire	= xfrm_send_acquire,
++	.compile_policy	= xfrm_compile_policy,
++	.notify_policy	= xfrm_send_policy_notify,
++};
++
++static int __init xfrm_user_init(void)
++{
++	printk(KERN_INFO "Initializing IPsec netlink socket\n");
++
++	xfrm_nl = netlink_kernel_create(NETLINK_XFRM, xfrm_netlink_rcv);
++	if (xfrm_nl == NULL)
++		return -ENOMEM;
++
++	xfrm_register_km(&netlink_mgr);
++
++	return 0;
++}
++
++static void __exit xfrm_user_exit(void)
++{
++	xfrm_unregister_km(&netlink_mgr);
++	sock_release(xfrm_nl->socket);
++}
++
++module_init(xfrm_user_init);
++module_exit(xfrm_user_exit);
++MODULE_LICENSE("GPL");
+diff -Nru a/scripts/tkgen.c b/scripts/tkgen.c
+--- a/scripts/tkgen.c	2005-02-13 21:25:09 +11:00
++++ b/scripts/tkgen.c	2005-02-13 21:25:09 +11:00
+@@ -546,7 +546,7 @@
+ 	    printf( "set %s [expr $%s&15]",
+ 		vartable[cfg->nameindex].name, vartable[cfg->nameindex].name );
+ 	    printf( "} else {");
+-	    printf( "set %s [expr $%s|16]}\n",
++	    printf( "set %s [expr $%s]}\n",
+ 		vartable[cfg->nameindex].name, vartable[cfg->nameindex].name );
+ 	    break;
+ 
+@@ -612,7 +612,7 @@
+ 	/*
+ 	 * Clear the disable bit to enable the correct radiobutton.
+ 	 */
+-	    printf( "set %s [expr $%s|16]}\n",
++	    printf( "set %s [expr $%s]}\n",
+ 		vartable[cfg->nameindex].name, vartable[cfg->nameindex].name );
+ 	    break;
+ 

Modified: trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/series/2.4.29-1
===================================================================
--- trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/series/2.4.29-1	2005-02-14 17:51:54 UTC (rev 2486)
+++ trunk/kernel-2.4/source/kernel-source-2.4.29-2.4.29/debian/patches/series/2.4.29-1	2005-02-15 00:36:04 UTC (rev 2487)
@@ -89,3 +89,4 @@
 + 092_sparc64_hme_lockup.diff
 + 095_sparc32_initrd_memcpy.diff
 + 096_megaraid2_proc_name.diff
++ 097_ipsec.diff




More information about the Kernel-svn-changes mailing list