[kernel] r11350 - in dists/trunk/linux-2.6/debian: . patches/features/arm patches/series

Sun May 11 17:11:43 UTC 2008

Author: tbm
Date: Sun May 11 17:11:42 2008
New Revision: 11350

Log:
Add some patches from Marvell's Orion tree


Added:
   dists/trunk/linux-2.6/debian/patches/features/arm/5281d0.patch
   dists/trunk/linux-2.6/debian/patches/features/arm/cache-align.patch
   dists/trunk/linux-2.6/debian/patches/features/arm/cache-align2.patch
   dists/trunk/linux-2.6/debian/patches/features/arm/speed_flush_cache.patch
Modified:
   dists/trunk/linux-2.6/debian/changelog
   dists/trunk/linux-2.6/debian/patches/series/1~experimental.1

Modified: dists/trunk/linux-2.6/debian/changelog
==============================================================================

--- dists/trunk/linux-2.6/debian/changelog	(original)
+++ dists/trunk/linux-2.6/debian/changelog	Sun May 11 17:11:42 2008
@@ -22,6 +22,11 @@
   [ Martin Michlmayr ]
   * [arm/orion5x] Update the config to reflect upstream renaming this
     subarch.
+  * [arm/orion5x] Add some patches from Marvell's Orion tree:
+    - cache align destination pointer when copying memory for some processors
+    - cache align memset and memzero
+    - Feroceon: speed up flushing of the entire cache
+    - support for 5281 D0 stepping
 
  -- maximilian attems <maks at debian.org>  Sat, 26 Apr 2008 23:11:17 +0200
 

Added: dists/trunk/linux-2.6/debian/patches/features/arm/5281d0.patch
==============================================================================
--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/features/arm/5281d0.patch	Sun May 11 17:11:42 2008
@@ -0,0 +1,52 @@
+From: Lennert Buytenhek <buytenh at marvell.com>
+Date: Mon, 5 May 2008 18:19:55 +0000 (-0400)
+Subject: Orion: support for D0 stepping
+X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=0e33c8a37f7c05bf85944cf10ca499b2d3754c1b
+
+Orion: support for D0 stepping
+
+Signed-off-by: Lennert Buytenhek <buytenh at marvell.com>
+Signed-off-by: Nicolas Pitre <nico at marvell.com>
+---
+
+diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
+index 4f13fd0..3e40e96 100644
+--- a/arch/arm/mach-orion5x/common.c
++++ b/arch/arm/mach-orion5x/common.c
+@@ -338,6 +338,8 @@ static void __init orion5x_id(u32 *dev, u32 *rev, char **dev_name)
+ 			*dev_name = "MV88F5281-D2";
+ 		} else if (*rev == MV88F5281_REV_D1) {
+ 			*dev_name = "MV88F5281-D1";
++		} else if (*rev == MV88F5281_REV_D0) {
++			*dev_name = "MV88F5281-D0";
+ 		} else {
+ 			*dev_name = "MV88F5281-Rev-Unsupported";
+ 		}
+@@ -372,6 +374,15 @@ void __init orion5x_init(void)
+ 	orion5x_setup_cpu_mbus_bridge();
+ 
+ 	/*
++	 * Don't issue "Wait for Interrupt" instruction if we are
++	 * running on D0 5281 silicon.
++	 */
++	if (dev == MV88F5281_DEV_ID && rev == MV88F5281_REV_D0) {
++		printk(KERN_INFO "Orion: Applying 5281 D0 WFI workaround.\n");
++		disable_hlt();
++	}
++
++	/*
+ 	 * Register devices.
+ 	 */
+ 	platform_device_register(&orion5x_uart);
+diff --git a/include/asm-arm/arch-orion5x/orion5x.h b/include/asm-arm/arch-orion5x/orion5x.h
+index 206ddd7..25ec775 100644
+--- a/include/asm-arm/arch-orion5x/orion5x.h
++++ b/include/asm-arm/arch-orion5x/orion5x.h
+@@ -71,6 +71,7 @@
+ #define MV88F5182_REV_A2	2
+ /* Orion-2 (88F5281) */
+ #define MV88F5281_DEV_ID	0x5281
++#define MV88F5281_REV_D0	4
+ #define MV88F5281_REV_D1	5
+ #define MV88F5281_REV_D2	6
+ 

Added: dists/trunk/linux-2.6/debian/patches/features/arm/cache-align.patch
==============================================================================
--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/features/arm/cache-align.patch	Sun May 11 17:11:42 2008
@@ -0,0 +1,120 @@
+From: Nicolas Pitre <nico at cam.org>
+Date: Mon, 31 Mar 2008 16:38:31 +0000 (-0400)
+Subject: [ARM] cache align destination pointer when copying memory for some processors
+X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=f25c9c5b9b3eca2f4a41ac72fec6244c0cbd87cc
+
+[ARM] cache align destination pointer when copying memory for some processors
+
+The implementation for memory copy functions on ARM had a (disabled)
+provision for aligning the source pointer before loading registers with
+data.  Turns out that aligning the _destination_ pointer is much more
+useful, as the read side is already sufficiently helped with the use of
+preload.
+
+So this changes the definition of the CALGN() macro to target the
+destination pointer instead, and turns it on for Feroceon processors
+where the gain is very notable.
+
+Signed-off-by: Nicolas Pitre <nico at marvell.com>
+---
+
+diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
+index cab355c..139cce6 100644
+--- a/arch/arm/lib/copy_template.S
++++ b/arch/arm/lib/copy_template.S
+@@ -13,14 +13,6 @@
+  */
+ 
+ /*
+- * This can be used to enable code to cacheline align the source pointer.
+- * Experiments on tested architectures (StrongARM and XScale) didn't show
+- * this a worthwhile thing to do.  That might be different in the future.
+- */
+-//#define CALGN(code...)	code
+-#define CALGN(code...)
+-
+-/*
+  * Theory of operation
+  * -------------------
+  *
+@@ -82,7 +74,7 @@
+ 		stmfd	sp!, {r5 - r8}
+ 		blt	5f
+ 
+-	CALGN(	ands	ip, r1, #31		)
++	CALGN(	ands	ip, r0, #31		)
+ 	CALGN(	rsb	r3, ip, #32		)
+ 	CALGN(	sbcnes	r4, r3, r2		)  @ C is always set here
+ 	CALGN(	bcs	2f			)
+@@ -168,7 +160,7 @@
+ 		subs	r2, r2, #28
+ 		blt	14f
+ 
+-	CALGN(	ands	ip, r1, #31		)
++	CALGN(	ands	ip, r0, #31		)
+ 	CALGN(	rsb	ip, ip, #32		)
+ 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+ 	CALGN(	subcc	r2, r2, ip		)
+diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
+index ef7fddc..415e3d1 100644
+--- a/arch/arm/lib/memmove.S
++++ b/arch/arm/lib/memmove.S
+@@ -13,14 +13,6 @@
+ #include <linux/linkage.h>
+ #include <asm/assembler.h>
+ 
+-/*
+- * This can be used to enable code to cacheline align the source pointer.
+- * Experiments on tested architectures (StrongARM and XScale) didn't show
+- * this a worthwhile thing to do.  That might be different in the future.
+- */
+-//#define CALGN(code...)        code
+-#define CALGN(code...)
+-
+ 		.text
+ 
+ /*
+@@ -55,7 +47,7 @@ ENTRY(memmove)
+ 		stmfd	sp!, {r5 - r8}
+ 		blt	5f
+ 
+-	CALGN(	ands	ip, r1, #31		)
++	CALGN(	ands	ip, r0, #31		)
+ 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+ 	CALGN(	bcs	2f			)
+ 	CALGN(	adr	r4, 6f			)
+@@ -138,7 +130,7 @@ ENTRY(memmove)
+ 		subs	r2, r2, #28
+ 		blt	14f
+ 
+-	CALGN(	ands	ip, r1, #31		)
++	CALGN(	ands	ip, r0, #31		)
+ 	CALGN(	rsb	ip, ip, #32		)
+ 	CALGN(	sbcnes	r4, ip, r2		)  @ C is always set here
+ 	CALGN(	subcc	r2, r2, ip		)
+diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h
+index fce8328..911393b 100644
+--- a/include/asm-arm/assembler.h
++++ b/include/asm-arm/assembler.h
+@@ -56,6 +56,21 @@
+ #endif
+ 
+ /*
++ * This can be used to enable code to cacheline align the destination
++ * pointer when bulk writing to memory.  Experiments on StrongARM and
++ * XScale didn't show this a worthwhile thing to do when the cache is not
++ * set to write-allocate (this would need further testing on XScale when WA
++ * is used).
++ *
++ * On Feroceon there is much to gain however, regardless of cache mode.
++ */
++#ifdef CONFIG_CPU_FEROCEON
++#define CALGN(code...) code
++#else
++#define CALGN(code...)
++#endif
++
++/*
+  * Enable and disable interrupts
+  */
+ #if __LINUX_ARM_ARCH__ >= 6

Added: dists/trunk/linux-2.6/debian/patches/features/arm/cache-align2.patch
==============================================================================
--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/features/arm/cache-align2.patch	Sun May 11 17:11:42 2008
@@ -0,0 +1,139 @@
+From: Nicolas Pitre <nico at cam.org>
+Date: Sat, 12 Apr 2008 01:04:28 +0000 (-0400)
+Subject: [ARM] cache align memset and memzero
+X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=74fa6238bc1602038532b548b954020f06b596cc
+
+[ARM] cache align memset and memzero
+
+This is a natural extension following the previous patch.
+Non Feroceon based targets are unchanged.
+
+Signed-off-by: Nicolas Pitre <nico at marvell.com>
+---
+
+diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
+index 95b110b..cf75188 100644
+--- a/arch/arm/lib/memset.S
++++ b/arch/arm/lib/memset.S
+@@ -39,6 +39,9 @@ ENTRY(memset)
+ 	mov	r3, r1
+ 	cmp	r2, #16
+ 	blt	4f
++
++#if CALGN(1)-1 != 0
++
+ /*
+  * We need an extra register for this loop - save the return address and
+  * use the LR
+@@ -64,6 +67,49 @@ ENTRY(memset)
+ 	stmneia	r0!, {r1, r3, ip, lr}
+ 	ldr	lr, [sp], #4
+ 
++#else
++
++/*
++ * This version aligns the destination pointer in order to write
++ * whole cache lines at once.
++ */
++
++	stmfd	sp!, {r4-r7, lr}
++	mov	r4, r1
++	mov	r5, r1
++	mov	r6, r1
++	mov	r7, r1
++	mov	ip, r1
++	mov	lr, r1
++
++	cmp	r2, #96
++	tstgt	r0, #31
++	ble	3f
++
++	and	ip, r0, #31
++	rsb	ip, ip, #32
++	sub	r2, r2, ip
++	movs	ip, ip, lsl #(32 - 4)
++	stmcsia	r0!, {r4, r5, r6, r7}
++	stmmiia	r0!, {r4, r5}
++	tst	ip, #(1 << 30)
++	mov	ip, r1
++	strne	r1, [r0], #4
++
++3:	subs	r2, r2, #64
++	stmgeia	r0!, {r1, r3-r7, ip, lr}
++	stmgeia	r0!, {r1, r3-r7, ip, lr}
++	bgt	3b
++	ldmeqfd	sp!, {r4-r7, pc}
++
++	tst	r2, #32
++	stmneia	r0!, {r1, r3-r7, ip, lr}
++	tst	r2, #16
++	stmneia	r0!, {r4-r7}
++	ldmfd	sp!, {r4-r7, lr}
++
++#endif
++
+ 4:	tst	r2, #8
+ 	stmneia	r0!, {r1, r3}
+ 	tst	r2, #4
+diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
+index abf2508..a9bfef5 100644
+--- a/arch/arm/lib/memzero.S
++++ b/arch/arm/lib/memzero.S
+@@ -39,6 +39,9 @@ ENTRY(__memzero)
+  */
+ 	cmp	r1, #16			@ 1 we can skip this chunk if we
+ 	blt	4f			@ 1 have < 16 bytes
++
++#if CALGN(1)-1 != 0
++
+ /*
+  * We need an extra register for this loop - save the return address and
+  * use the LR
+@@ -64,6 +67,47 @@ ENTRY(__memzero)
+ 	stmneia	r0!, {r2, r3, ip, lr}	@ 4
+ 	ldr	lr, [sp], #4		@ 1
+ 
++#else
++
++/*
++ * This version aligns the destination pointer in order to write
++ * whole cache lines at once.
++ */
++
++	stmfd	sp!, {r4-r7, lr}
++	mov	r4, r2
++	mov	r5, r2
++	mov	r6, r2
++	mov	r7, r2
++	mov	ip, r2
++	mov	lr, r2
++
++	cmp	r1, #96
++	andgts	ip, r0, #31
++	ble	3f
++
++	rsb	ip, ip, #32
++	sub	r1, r1, ip
++	movs	ip, ip, lsl #(32 - 4)
++	stmcsia	r0!, {r4, r5, r6, r7}
++	stmmiia	r0!, {r4, r5}
++	movs	ip, ip, lsl #2
++	strcs	r2, [r0], #4
++
++3:	subs	r1, r1, #64
++	stmgeia	r0!, {r2-r7, ip, lr}
++	stmgeia	r0!, {r2-r7, ip, lr}
++	bgt	3b
++	ldmeqfd	sp!, {r4-r7, pc}
++
++	tst	r1, #32
++	stmneia	r0!, {r2-r7, ip, lr}
++	tst	r1, #16
++	stmneia	r0!, {r4-r7}
++	ldmfd	sp!, {r4-r7, lr}
++
++#endif
++
+ 4:	tst	r1, #8			@ 1 8 bytes or more?
+ 	stmneia	r0!, {r2, r3}		@ 2
+ 	tst	r1, #4			@ 1 4 bytes or more?

Added: dists/trunk/linux-2.6/debian/patches/features/arm/speed_flush_cache.patch
==============================================================================
--- (empty file)
+++ dists/trunk/linux-2.6/debian/patches/features/arm/speed_flush_cache.patch	Sun May 11 17:11:42 2008
@@ -0,0 +1,124 @@
+From: Lennert Buytenhek <buytenh at wantstofly.org>
+Date: Thu, 24 Apr 2008 05:31:46 +0000 (-0400)
+Subject: [ARM] Feroceon: speed up flushing of the entire cache
+X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=8c38bce5ed3a5f2c8a1cb070ba41a8889cc69257
+
+[ARM] Feroceon: speed up flushing of the entire cache
+
+Flushing the L1 D cache with a test/clean/invalidate loop is very
+easy in software, but it is not the quickest way of doing it, as
+there is a lot of overhead involved in re-scanning the cache from
+the beginning every time we hit a dirty line.
+
+This patch makes proc-feroceon.S use "clean+invalidate by set/way"
+loops according to possible cache configuration of Feroceon CPUs
+(either direct-mapped or 4-way set associative).
+
+[nico: optimized the assembly a bit]
+
+Signed-off-by: Lennert Buytenhek <buytenh at marvell.com>
+Signed-off-by: Nicolas Pitre <nico at marvell.com>
+---
+
+diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
+index a02c171..968d5ad 100644
+--- a/arch/arm/mm/proc-feroceon.S
++++ b/arch/arm/mm/proc-feroceon.S
+@@ -44,11 +44,31 @@
+  */
+ #define CACHE_DLINESIZE	32
+ 
++	.bss
++	.align 3
++__cache_params_loc:
++	.space	8
++
+ 	.text
++__cache_params:
++	.word	__cache_params_loc
++
+ /*
+  * cpu_feroceon_proc_init()
+  */
+ ENTRY(cpu_feroceon_proc_init)
++	mrc	p15, 0, r0, c0, c0, 1		@ read cache type register
++	ldr	r1, __cache_params
++	mov	r2, #(16 << 5)
++	tst	r0, #(1 << 16)			@ get way
++	mov	r0, r0, lsr #18			@ get cache size order
++	movne	r3, #((4 - 1) << 30)		@ 4-way
++	and	r0, r0, #0xf
++	moveq	r3, #0				@ 1-way
++	mov	r2, r2, lsl r0			@ actual cache size
++	movne	r2, r2, lsr #2			@ turned into # of sets
++	sub	r2, r2, #(1 << 5)
++	stmia	r1, {r2, r3}
+ 	mov	pc, lr
+ 
+ /*
+@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all)
+  */
+ ENTRY(feroceon_flush_kern_cache_all)
+ 	mov	r2, #VM_EXEC
+-	mov	ip, #0
++
+ __flush_whole_cache:
+-1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+-	bne	1b
++	ldr	r1, __cache_params
++	ldmia	r1, {r1, r3}
++1:	orr	ip, r1, r3
++2:	mcr	p15, 0, ip, c7, c14, 2		@ clean + invalidate D set/way
++	subs	ip, ip, #(1 << 30)		@ next way
++	bcs	2b
++	subs	r1, r1, #(1 << 5)		@ next set
++	bcs	1b
++
+ 	tst	r2, #VM_EXEC
++	mov	ip, #0
+ 	mcrne	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+ 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+ 	mov	pc, lr
+@@ -138,7 +166,6 @@ __flush_whole_cache:
+  */
+ 	.align	5
+ ENTRY(feroceon_flush_user_cache_range)
+-	mov	ip, #0
+ 	sub	r3, r1, r0			@ calculate total size
+ 	cmp	r3, #CACHE_DLIMIT
+ 	bgt	__flush_whole_cache
+@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range)
+ 	cmp	r0, r1
+ 	blo	1b
+ 	tst	r2, #VM_EXEC
++	mov	ip, #0
+ 	mcrne	p15, 0, ip, c7, c10, 4		@ drain WB
+ 	mov	pc, lr
+ 
+@@ -306,16 +334,19 @@ ENTRY(cpu_feroceon_dcache_clean_area)
+ 	.align	5
+ ENTRY(cpu_feroceon_switch_mm)
+ #ifdef CONFIG_MMU
+-	mov	ip, #0
+-@ && 'Clean & Invalidate whole DCache'
+-1:	mrc	p15, 0, r15, c7, c14, 3 	@ test,clean,invalidate
+-	bne	1b
+-	mcr	p15, 0, ip, c7, c5, 0		@ invalidate I cache
+-	mcr	p15, 0, ip, c7, c10, 4		@ drain WB
++	mov	r2, lr				@ abuse r2 to preserve lr
++	bl	__flush_whole_cache
++	@ if r2 contains the VM_EXEC bit then the next 2 ops are done already
++	tst	r2, #VM_EXEC
++	mcreq	p15, 0, ip, c7, c5, 0		@ invalidate I cache
++	mcreq	p15, 0, ip, c7, c10, 4		@ drain WB
++
+ 	mcr	p15, 0, r0, c2, c0, 0		@ load page table pointer
+ 	mcr	p15, 0, ip, c8, c7, 0		@ invalidate I & D TLBs
+-#endif
++	mov	pc, r2
++#else
+ 	mov	pc, lr
++#endif
+ 
+ /*
+  * cpu_feroceon_set_pte_ext(ptep, pte, ext)

Modified: dists/trunk/linux-2.6/debian/patches/series/1~experimental.1
==============================================================================
--- dists/trunk/linux-2.6/debian/patches/series/1~experimental.1	(original)
+++ dists/trunk/linux-2.6/debian/patches/series/1~experimental.1	Sun May 11 17:11:42 2008
@@ -27,6 +27,10 @@
 + bugfix/arm/disable-scsi_acard.patch
 ##+ bugfix/arm/disable-ath5k.patch
 + bugfix/arm/disable-r6040.patch
++ features/arm/cache-align.patch
++ features/arm/cache-align2.patch
++ features/arm/speed_flush_cache.patch
++ features/arm/5281d0.patch
 + features/all/at76.patch 
 + bugfix/fix-hifn_795X-divdi3.patch
 + bugfix/all/mtd-prevent-physmap-from-causing-request_module-runaway-loop-modprobe-net-pf-1.patch