Add some patches from Marvell's Orion tree

svn path=/dists/trunk/linux-2.6/; revision=11350
This commit is contained in:
Martin Michlmayr 2008-05-11 17:11:42 +00:00
parent 2ebd19a615
commit 110a9434a0
6 changed files with 444 additions and 0 deletions

5
debian/changelog vendored
View File

@ -22,6 +22,11 @@ linux-2.6 (2.6.26~rc1-1~experimental.1) UNRELEASED; urgency=low
[ Martin Michlmayr ]
* [arm/orion5x] Update the config to reflect upstream renaming this
subarch.
* [arm/orion5x] Add some patches from Marvell's Orion tree:
- cache align destination pointer when copying memory for some processors
- cache align memset and memzero
- Feroceon: speed up flushing of the entire cache
- support for 5281 D0 stepping
-- maximilian attems <maks@debian.org> Sat, 26 Apr 2008 23:11:17 +0200

View File

@ -0,0 +1,52 @@
From: Lennert Buytenhek <buytenh@marvell.com>
Date: Mon, 5 May 2008 18:19:55 +0000 (-0400)
Subject: Orion: support for D0 stepping
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=0e33c8a37f7c05bf85944cf10ca499b2d3754c1b
Orion: support for D0 stepping
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
index 4f13fd0..3e40e96 100644
--- a/arch/arm/mach-orion5x/common.c
+++ b/arch/arm/mach-orion5x/common.c
@@ -338,6 +338,8 @@ static void __init orion5x_id(u32 *dev, u32 *rev, char **dev_name)
*dev_name = "MV88F5281-D2";
} else if (*rev == MV88F5281_REV_D1) {
*dev_name = "MV88F5281-D1";
+ } else if (*rev == MV88F5281_REV_D0) {
+ *dev_name = "MV88F5281-D0";
} else {
*dev_name = "MV88F5281-Rev-Unsupported";
}
@@ -372,6 +374,15 @@ void __init orion5x_init(void)
orion5x_setup_cpu_mbus_bridge();
/*
+ * Don't issue "Wait for Interrupt" instruction if we are
+ * running on D0 5281 silicon.
+ */
+ if (dev == MV88F5281_DEV_ID && rev == MV88F5281_REV_D0) {
+ printk(KERN_INFO "Orion: Applying 5281 D0 WFI workaround.\n");
+ disable_hlt();
+ }
+
+ /*
* Register devices.
*/
platform_device_register(&orion5x_uart);
diff --git a/include/asm-arm/arch-orion5x/orion5x.h b/include/asm-arm/arch-orion5x/orion5x.h
index 206ddd7..25ec775 100644
--- a/include/asm-arm/arch-orion5x/orion5x.h
+++ b/include/asm-arm/arch-orion5x/orion5x.h
@@ -71,6 +71,7 @@
#define MV88F5182_REV_A2 2
/* Orion-2 (88F5281) */
#define MV88F5281_DEV_ID 0x5281
+#define MV88F5281_REV_D0 4
#define MV88F5281_REV_D1 5
#define MV88F5281_REV_D2 6

View File

@ -0,0 +1,120 @@
From: Nicolas Pitre <nico@cam.org>
Date: Mon, 31 Mar 2008 16:38:31 +0000 (-0400)
Subject: [ARM] cache align destination pointer when copying memory for some processors
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=f25c9c5b9b3eca2f4a41ac72fec6244c0cbd87cc
[ARM] cache align destination pointer when copying memory for some processors
The implementation for memory copy functions on ARM had a (disabled)
provision for aligning the source pointer before loading registers with
data. Turns out that aligning the _destination_ pointer is much more
useful, as the read side is already sufficiently helped with the use of
preload.
So this changes the definition of the CALGN() macro to target the
destination pointer instead, and turns it on for Feroceon processors
where the gain is very notable.
Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
index cab355c..139cce6 100644
--- a/arch/arm/lib/copy_template.S
+++ b/arch/arm/lib/copy_template.S
@@ -13,14 +13,6 @@
*/
/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do. That might be different in the future.
- */
-//#define CALGN(code...) code
-#define CALGN(code...)
-
-/*
* Theory of operation
* -------------------
*
@@ -82,7 +74,7 @@
stmfd sp!, {r5 - r8}
blt 5f
- CALGN( ands ip, r1, #31 )
+ CALGN( ands ip, r0, #31 )
CALGN( rsb r3, ip, #32 )
CALGN( sbcnes r4, r3, r2 ) @ C is always set here
CALGN( bcs 2f )
@@ -168,7 +160,7 @@
subs r2, r2, #28
blt 14f
- CALGN( ands ip, r1, #31 )
+ CALGN( ands ip, r0, #31 )
CALGN( rsb ip, ip, #32 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
index ef7fddc..415e3d1 100644
--- a/arch/arm/lib/memmove.S
+++ b/arch/arm/lib/memmove.S
@@ -13,14 +13,6 @@
#include <linux/linkage.h>
#include <asm/assembler.h>
-/*
- * This can be used to enable code to cacheline align the source pointer.
- * Experiments on tested architectures (StrongARM and XScale) didn't show
- * this a worthwhile thing to do. That might be different in the future.
- */
-//#define CALGN(code...) code
-#define CALGN(code...)
-
.text
/*
@@ -55,7 +47,7 @@ ENTRY(memmove)
stmfd sp!, {r5 - r8}
blt 5f
- CALGN( ands ip, r1, #31 )
+ CALGN( ands ip, r0, #31 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( bcs 2f )
CALGN( adr r4, 6f )
@@ -138,7 +130,7 @@ ENTRY(memmove)
subs r2, r2, #28
blt 14f
- CALGN( ands ip, r1, #31 )
+ CALGN( ands ip, r0, #31 )
CALGN( rsb ip, ip, #32 )
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
CALGN( subcc r2, r2, ip )
diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h
index fce8328..911393b 100644
--- a/include/asm-arm/assembler.h
+++ b/include/asm-arm/assembler.h
@@ -56,6 +56,21 @@
#endif
/*
+ * This can be used to enable code to cacheline align the destination
+ * pointer when bulk writing to memory. Experiments on StrongARM and
+ * XScale didn't show this a worthwhile thing to do when the cache is not
+ * set to write-allocate (this would need further testing on XScale when WA
+ * is used).
+ *
+ * On Feroceon there is much to gain however, regardless of cache mode.
+ */
+#ifdef CONFIG_CPU_FEROCEON
+#define CALGN(code...) code
+#else
+#define CALGN(code...)
+#endif
+
+/*
* Enable and disable interrupts
*/
#if __LINUX_ARM_ARCH__ >= 6

View File

@ -0,0 +1,139 @@
From: Nicolas Pitre <nico@cam.org>
Date: Sat, 12 Apr 2008 01:04:28 +0000 (-0400)
Subject: [ARM] cache align memset and memzero
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=74fa6238bc1602038532b548b954020f06b596cc
[ARM] cache align memset and memzero
This is a natural extension following the previous patch.
Non Feroceon based targets are unchanged.
Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 95b110b..cf75188 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -39,6 +39,9 @@ ENTRY(memset)
mov r3, r1
cmp r2, #16
blt 4f
+
+#if CALGN(1)-1 != 0
+
/*
* We need an extra register for this loop - save the return address and
* use the LR
@@ -64,6 +67,49 @@ ENTRY(memset)
stmneia r0!, {r1, r3, ip, lr}
ldr lr, [sp], #4
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+ stmfd sp!, {r4-r7, lr}
+ mov r4, r1
+ mov r5, r1
+ mov r6, r1
+ mov r7, r1
+ mov ip, r1
+ mov lr, r1
+
+ cmp r2, #96
+ tstgt r0, #31
+ ble 3f
+
+ and ip, r0, #31
+ rsb ip, ip, #32
+ sub r2, r2, ip
+ movs ip, ip, lsl #(32 - 4)
+ stmcsia r0!, {r4, r5, r6, r7}
+ stmmiia r0!, {r4, r5}
+ tst ip, #(1 << 30)
+ mov ip, r1
+ strne r1, [r0], #4
+
+3: subs r2, r2, #64
+ stmgeia r0!, {r1, r3-r7, ip, lr}
+ stmgeia r0!, {r1, r3-r7, ip, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r7, pc}
+
+ tst r2, #32
+ stmneia r0!, {r1, r3-r7, ip, lr}
+ tst r2, #16
+ stmneia r0!, {r4-r7}
+ ldmfd sp!, {r4-r7, lr}
+
+#endif
+
4: tst r2, #8
stmneia r0!, {r1, r3}
tst r2, #4
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index abf2508..a9bfef5 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -39,6 +39,9 @@ ENTRY(__memzero)
*/
cmp r1, #16 @ 1 we can skip this chunk if we
blt 4f @ 1 have < 16 bytes
+
+#if CALGN(1)-1 != 0
+
/*
* We need an extra register for this loop - save the return address and
* use the LR
@@ -64,6 +67,47 @@ ENTRY(__memzero)
stmneia r0!, {r2, r3, ip, lr} @ 4
ldr lr, [sp], #4 @ 1
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+ stmfd sp!, {r4-r7, lr}
+ mov r4, r2
+ mov r5, r2
+ mov r6, r2
+ mov r7, r2
+ mov ip, r2
+ mov lr, r2
+
+ cmp r1, #96
+ andgts ip, r0, #31
+ ble 3f
+
+ rsb ip, ip, #32
+ sub r1, r1, ip
+ movs ip, ip, lsl #(32 - 4)
+ stmcsia r0!, {r4, r5, r6, r7}
+ stmmiia r0!, {r4, r5}
+ movs ip, ip, lsl #2
+ strcs r2, [r0], #4
+
+3: subs r1, r1, #64
+ stmgeia r0!, {r2-r7, ip, lr}
+ stmgeia r0!, {r2-r7, ip, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r7, pc}
+
+ tst r1, #32
+ stmneia r0!, {r2-r7, ip, lr}
+ tst r1, #16
+ stmneia r0!, {r4-r7}
+ ldmfd sp!, {r4-r7, lr}
+
+#endif
+
4: tst r1, #8 @ 1 8 bytes or more?
stmneia r0!, {r2, r3} @ 2
tst r1, #4 @ 1 4 bytes or more?

View File

@ -0,0 +1,124 @@
From: Lennert Buytenhek <buytenh@wantstofly.org>
Date: Thu, 24 Apr 2008 05:31:46 +0000 (-0400)
Subject: [ARM] Feroceon: speed up flushing of the entire cache
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=8c38bce5ed3a5f2c8a1cb070ba41a8889cc69257
[ARM] Feroceon: speed up flushing of the entire cache
Flushing the L1 D cache with a test/clean/invalidate loop is very
easy in software, but it is not the quickest way of doing it, as
there is a lot of overhead involved in re-scanning the cache from
the beginning every time we hit a dirty line.
This patch makes proc-feroceon.S use "clean+invalidate by set/way"
loops according to possible cache configuration of Feroceon CPUs
(either direct-mapped or 4-way set associative).
[nico: optimized the assembly a bit]
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
index a02c171..968d5ad 100644
--- a/arch/arm/mm/proc-feroceon.S
+++ b/arch/arm/mm/proc-feroceon.S
@@ -44,11 +44,31 @@
*/
#define CACHE_DLINESIZE 32
+ .bss
+ .align 3
+__cache_params_loc:
+ .space 8
+
.text
+__cache_params:
+ .word __cache_params_loc
+
/*
* cpu_feroceon_proc_init()
*/
ENTRY(cpu_feroceon_proc_init)
+ mrc p15, 0, r0, c0, c0, 1 @ read cache type register
+ ldr r1, __cache_params
+ mov r2, #(16 << 5)
+ tst r0, #(1 << 16) @ get way
+ mov r0, r0, lsr #18 @ get cache size order
+ movne r3, #((4 - 1) << 30) @ 4-way
+ and r0, r0, #0xf
+ moveq r3, #0 @ 1-way
+ mov r2, r2, lsl r0 @ actual cache size
+ movne r2, r2, lsr #2 @ turned into # of sets
+ sub r2, r2, #(1 << 5)
+ stmia r1, {r2, r3}
mov pc, lr
/*
@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all)
*/
ENTRY(feroceon_flush_kern_cache_all)
mov r2, #VM_EXEC
- mov ip, #0
+
__flush_whole_cache:
-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
- bne 1b
+ ldr r1, __cache_params
+ ldmia r1, {r1, r3}
+1: orr ip, r1, r3
+2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
+ subs ip, ip, #(1 << 30) @ next way
+ bcs 2b
+ subs r1, r1, #(1 << 5) @ next set
+ bcs 1b
+
tst r2, #VM_EXEC
+ mov ip, #0
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
mov pc, lr
@@ -138,7 +166,6 @@ __flush_whole_cache:
*/
.align 5
ENTRY(feroceon_flush_user_cache_range)
- mov ip, #0
sub r3, r1, r0 @ calculate total size
cmp r3, #CACHE_DLIMIT
bgt __flush_whole_cache
@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range)
cmp r0, r1
blo 1b
tst r2, #VM_EXEC
+ mov ip, #0
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
mov pc, lr
@@ -306,16 +334,19 @@ ENTRY(cpu_feroceon_dcache_clean_area)
.align 5
ENTRY(cpu_feroceon_switch_mm)
#ifdef CONFIG_MMU
- mov ip, #0
-@ && 'Clean & Invalidate whole DCache'
-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
- bne 1b
- mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
- mcr p15, 0, ip, c7, c10, 4 @ drain WB
+ mov r2, lr @ abuse r2 to preserve lr
+ bl __flush_whole_cache
+ @ if r2 contains the VM_EXEC bit then the next 2 ops are done already
+ tst r2, #VM_EXEC
+ mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
+ mcreq p15, 0, ip, c7, c10, 4 @ drain WB
+
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
-#endif
+ mov pc, r2
+#else
mov pc, lr
+#endif
/*
* cpu_feroceon_set_pte_ext(ptep, pte, ext)

View File

@ -27,6 +27,10 @@
+ bugfix/arm/disable-scsi_acard.patch
##+ bugfix/arm/disable-ath5k.patch
+ bugfix/arm/disable-r6040.patch
+ features/arm/cache-align.patch
+ features/arm/cache-align2.patch
+ features/arm/speed_flush_cache.patch
+ features/arm/5281d0.patch
+ features/all/at76.patch
+ bugfix/fix-hifn_795X-divdi3.patch
+ bugfix/all/mtd-prevent-physmap-from-causing-request_module-runaway-loop-modprobe-net-pf-1.patch