[kernel] r11633 - in dists/trunk/linux-2.6/debian: . patches/features/arm patches/series
Martin Michlmayr
tbm at alioth.debian.org
Fri Jun 13 10:19:12 UTC 2008
Author: tbm
Date: Fri Jun 13 10:18:56 2008
New Revision: 11633
Log:
some cache align speedups
Added:
dists/trunk/linux-2.6/debian/patches/features/arm/cache_align1.patch
dists/trunk/linux-2.6/debian/patches/features/arm/cache_align2.patch
dists/trunk/linux-2.6/debian/patches/features/arm/fix_cache_alignment.patch
Modified:
dists/trunk/linux-2.6/debian/changelog
dists/trunk/linux-2.6/debian/patches/series/1~experimental.1
Modified: dists/trunk/linux-2.6/debian/changelog
==============================================================================
--- dists/trunk/linux-2.6/debian/changelog (original)
+++ dists/trunk/linux-2.6/debian/changelog Fri Jun 13 10:18:56 2008
@@ -46,6 +46,8 @@
* [arm/orion5x] Add some patches from Marvell's Orion tree:
- Feroceon: speed up flushing of the entire cache
- support for 5281 D0 stepping
+ - cache align destination pointer when copying memory for some processors
+ - cache align memset and memzero
* [arm/orion5x] Enable NETCONSOLE.
* [arm/orion5x] Disable more SCSI drivers.
* [arm/ixp4xx] Disable ATA and more SCSI and network drivers.
Added: dists/trunk/linux-2.6/debian/patches/features/arm/cache_align1.patch
==============================================================================
--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/features/arm/cache_align1.patch Fri Jun 13 10:18:56 2008
@@ -0,0 +1,124 @@
+From: Nicolas Pitre <nico at cam.org>
+
+The implementation for memory copy functions on ARM had a (disabled)
+provision for aligning the source pointer before loading registers with
+data. Turns out that aligning the _destination_ pointer is much more
+useful, as the read side is already sufficiently helped with the use of
+preload.
+
+So this changes the definition of the CALGN() macro to target the
+destination pointer instead, and turns it on for Feroceon processors
+where the gain is very notable.
+
+Signed-off-by: Nicolas Pitre <nico at marvell.com>
+---
+ arch/arm/lib/copy_template.S | 12 ++----------
+ arch/arm/lib/memmove.S | 12 ++----------
+ include/asm-arm/assembler.h | 15 +++++++++++++++
+ 3 files changed, 19 insertions(+), 20 deletions(-)
+
+Index: linux-2.6.26-rc5/arch/arm/lib/copy_template.S
+===================================================================
+--- linux-2.6.26-rc5.orig/arch/arm/lib/copy_template.S
++++ linux-2.6.26-rc5/arch/arm/lib/copy_template.S
+@@ -13,14 +13,6 @@
+ */
+
+ /*
+- * This can be used to enable code to cacheline align the source pointer.
+- * Experiments on tested architectures (StrongARM and XScale) didn't show
+- * this a worthwhile thing to do. That might be different in the future.
+- */
+-//#define CALGN(code...) code
+-#define CALGN(code...)
+-
+-/*
+ * Theory of operation
+ * -------------------
+ *
+@@ -82,7 +74,7 @@
+ stmfd sp!, {r5 - r8}
+ blt 5f
+
+- CALGN( ands ip, r1, #31 )
++ CALGN( ands ip, r0, #31 )
+ CALGN( rsb r3, ip, #32 )
+ CALGN( sbcnes r4, r3, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+@@ -168,7 +160,7 @@
+ subs r2, r2, #28
+ blt 14f
+
+- CALGN( ands ip, r1, #31 )
++ CALGN( ands ip, r0, #31 )
+ CALGN( rsb ip, ip, #32 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+Index: linux-2.6.26-rc5/arch/arm/lib/memmove.S
+===================================================================
+--- linux-2.6.26-rc5.orig/arch/arm/lib/memmove.S
++++ linux-2.6.26-rc5/arch/arm/lib/memmove.S
+@@ -13,14 +13,6 @@
+ #include <linux/linkage.h>
+ #include <asm/assembler.h>
+
+-/*
+- * This can be used to enable code to cacheline align the source pointer.
+- * Experiments on tested architectures (StrongARM and XScale) didn't show
+- * this a worthwhile thing to do. That might be different in the future.
+- */
+-//#define CALGN(code...) code
+-#define CALGN(code...)
+-
+ .text
+
+ /*
+@@ -55,7 +47,7 @@ ENTRY(memmove)
+ stmfd sp!, {r5 - r8}
+ blt 5f
+
+- CALGN( ands ip, r1, #31 )
++ CALGN( ands ip, r0, #31 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+@@ -139,7 +131,7 @@ ENTRY(memmove)
+ subs r2, r2, #28
+ blt 14f
+
+- CALGN( ands ip, r1, #31 )
++ CALGN( ands ip, r0, #31 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
+Index: linux-2.6.26-rc5/include/asm-arm/assembler.h
+===================================================================
+--- linux-2.6.26-rc5.orig/include/asm-arm/assembler.h
++++ linux-2.6.26-rc5/include/asm-arm/assembler.h
+@@ -56,6 +56,21 @@
+ #endif
+
+ /*
++ * This can be used to enable code to cacheline align the destination
++ * pointer when bulk writing to memory. Experiments on StrongARM and
++ * XScale didn't show this a worthwhile thing to do when the cache is not
++ * set to write-allocate (this would need further testing on XScale when WA
++ * is used).
++ *
++ * On Feroceon there is much to gain however, regardless of cache mode.
++ */
++#ifdef CONFIG_CPU_FEROCEON
++#define CALGN(code...) code
++#else
++#define CALGN(code...)
++#endif
++
++/*
+ * Enable and disable interrupts
+ */
+ #if __LINUX_ARM_ARCH__ >= 6
+
+-------------------------------------------------------------------
+List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
+FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php
+Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php
Added: dists/trunk/linux-2.6/debian/patches/features/arm/cache_align2.patch
==============================================================================
--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/features/arm/cache_align2.patch Fri Jun 13 10:18:56 2008
@@ -0,0 +1,142 @@
+From: Nicolas Pitre <nico at cam.org>
+
+This is a natural extension following the previous patch.
+Non Feroceon based targets are unchanged.
+
+Signed-off-by: Nicolas Pitre <nico at marvell.com>
+---
+ arch/arm/lib/memset.S | 46 ++++++++++++++++++++++++++++++++++++++++++++++
+ arch/arm/lib/memzero.S | 44 ++++++++++++++++++++++++++++++++++++++++++++
+ 2 files changed, 90 insertions(+), 0 deletions(-)
+
+Index: linux-2.6.26-rc5/arch/arm/lib/memset.S
+===================================================================
+--- linux-2.6.26-rc5.orig/arch/arm/lib/memset.S
++++ linux-2.6.26-rc5/arch/arm/lib/memset.S
+@@ -39,6 +39,9 @@ ENTRY(memset)
+ mov r3, r1
+ cmp r2, #16
+ blt 4f
++
++#if ! CALGN(1)+0
++
+ /*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+@@ -64,6 +67,49 @@ ENTRY(memset)
+ stmneia r0!, {r1, r3, ip, lr}
+ ldr lr, [sp], #4
+
++#else
++
++/*
++ * This version aligns the destination pointer in order to write
++ * whole cache lines at once.
++ */
++
++ stmfd sp!, {r4-r7, lr}
++ mov r4, r1
++ mov r5, r1
++ mov r6, r1
++ mov r7, r1
++ mov ip, r1
++ mov lr, r1
++
++ cmp r2, #96
++ tstgt r0, #31
++ ble 3f
++
++ and ip, r0, #31
++ rsb ip, ip, #32
++ sub r2, r2, ip
++ movs ip, ip, lsl #(32 - 4)
++ stmcsia r0!, {r4, r5, r6, r7}
++ stmmiia r0!, {r4, r5}
++ tst ip, #(1 << 30)
++ mov ip, r1
++ strne r1, [r0], #4
++
++3: subs r2, r2, #64
++ stmgeia r0!, {r1, r3-r7, ip, lr}
++ stmgeia r0!, {r1, r3-r7, ip, lr}
++ bgt 3b
++ ldmeqfd sp!, {r4-r7, pc}
++
++ tst r2, #32
++ stmneia r0!, {r1, r3-r7, ip, lr}
++ tst r2, #16
++ stmneia r0!, {r4-r7}
++ ldmfd sp!, {r4-r7, lr}
++
++#endif
++
+ 4: tst r2, #8
+ stmneia r0!, {r1, r3}
+ tst r2, #4
+Index: linux-2.6.26-rc5/arch/arm/lib/memzero.S
+===================================================================
+--- linux-2.6.26-rc5.orig/arch/arm/lib/memzero.S
++++ linux-2.6.26-rc5/arch/arm/lib/memzero.S
+@@ -39,6 +39,9 @@ ENTRY(__memzero)
+ */
+ cmp r1, #16 @ 1 we can skip this chunk if we
+ blt 4f @ 1 have < 16 bytes
++
++#if ! CALGN(1)+0
++
+ /*
+ * We need an extra register for this loop - save the return address and
+ * use the LR
+@@ -64,6 +67,47 @@ ENTRY(__memzero)
+ stmneia r0!, {r2, r3, ip, lr} @ 4
+ ldr lr, [sp], #4 @ 1
+
++#else
++
++/*
++ * This version aligns the destination pointer in order to write
++ * whole cache lines at once.
++ */
++
++ stmfd sp!, {r4-r7, lr}
++ mov r4, r2
++ mov r5, r2
++ mov r6, r2
++ mov r7, r2
++ mov ip, r2
++ mov lr, r2
++
++ cmp r1, #96
++ andgts ip, r0, #31
++ ble 3f
++
++ rsb ip, ip, #32
++ sub r1, r1, ip
++ movs ip, ip, lsl #(32 - 4)
++ stmcsia r0!, {r4, r5, r6, r7}
++ stmmiia r0!, {r4, r5}
++ movs ip, ip, lsl #2
++ strcs r2, [r0], #4
++
++3: subs r1, r1, #64
++ stmgeia r0!, {r2-r7, ip, lr}
++ stmgeia r0!, {r2-r7, ip, lr}
++ bgt 3b
++ ldmeqfd sp!, {r4-r7, pc}
++
++ tst r1, #32
++ stmneia r0!, {r2-r7, ip, lr}
++ tst r1, #16
++ stmneia r0!, {r4-r7}
++ ldmfd sp!, {r4-r7, lr}
++
++#endif
++
+ 4: tst r1, #8 @ 1 8 bytes or more?
+ stmneia r0!, {r2, r3} @ 2
+ tst r1, #4 @ 1 4 bytes or more?
+
+-------------------------------------------------------------------
+List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
+FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php
+Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php
Added: dists/trunk/linux-2.6/debian/patches/features/arm/fix_cache_alignment.patch
==============================================================================
--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/features/arm/fix_cache_alignment.patch Fri Jun 13 10:18:56 2008
@@ -0,0 +1,34 @@
+From: Nicolas Pitre <nico at cam.org>
+
+This code is currently disabled, which explains why no one was affected.
+
+Signed-off-by: Nicolas Pitre <nico at marvell.com>
+---
+ arch/arm/lib/memmove.S | 2 +-
+ 1 files changed, 1 insertions(+), 1 deletions(-)
+
+Index: linux-2.6.26-rc5/arch/arm/lib/memmove.S
+===================================================================
+--- linux-2.6.26-rc5.orig/arch/arm/lib/memmove.S
++++ linux-2.6.26-rc5/arch/arm/lib/memmove.S
+@@ -60,6 +60,7 @@ ENTRY(memmove)
+ CALGN( bcs 2f )
+ CALGN( adr r4, 6f )
+ CALGN( subs r2, r2, ip ) @ C is set here
++ CALGN( rsb ip, ip, #32 )
+ CALGN( add pc, r4, ip )
+
+ PLD( pld [r1, #-4] )
+@@ -139,7 +140,6 @@ ENTRY(memmove)
+ blt 14f
+
+ CALGN( ands ip, r1, #31 )
+- CALGN( rsb ip, ip, #32 )
+ CALGN( sbcnes r4, ip, r2 ) @ C is always set here
+ CALGN( subcc r2, r2, ip )
+ CALGN( bcc 15f )
+
+-------------------------------------------------------------------
+List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
+FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php
+Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php
Modified: dists/trunk/linux-2.6/debian/patches/series/1~experimental.1
==============================================================================
--- dists/trunk/linux-2.6/debian/patches/series/1~experimental.1 (original)
+++ dists/trunk/linux-2.6/debian/patches/series/1~experimental.1 Fri Jun 13 10:18:56 2008
@@ -31,6 +31,9 @@
+ bugfix/arm/disable-r6040.patch
+ features/arm/speed_flush_cache.patch
+ features/arm/5281d0.patch
++ features/arm/fix_cache_alignment.patch
++ features/arm/cache_align1.patch
++ features/arm/cache_align2.patch
+ features/arm/led-pca9532-generic.patch
+ features/arm/led-pca9532-fix.patch
+ features/arm/led-pca9532-n2100.patch
More information about the Kernel-svn-changes
mailing list