Add some patches from Marvell's Orion tree
svn path=/dists/trunk/linux-2.6/; revision=11350
This commit is contained in:
parent
2ebd19a615
commit
110a9434a0
|
@ -22,6 +22,11 @@ linux-2.6 (2.6.26~rc1-1~experimental.1) UNRELEASED; urgency=low
|
|||
[ Martin Michlmayr ]
|
||||
* [arm/orion5x] Update the config to reflect upstream renaming this
|
||||
subarch.
|
||||
* [arm/orion5x] Add some patches from Marvell's Orion tree:
|
||||
- cache align destination pointer when copying memory for some processors
|
||||
- cache align memset and memzero
|
||||
- Feroceon: speed up flushing of the entire cache
|
||||
- support for 5281 D0 stepping
|
||||
|
||||
-- maximilian attems <maks@debian.org> Sat, 26 Apr 2008 23:11:17 +0200
|
||||
|
||||
|
|
|
@ -0,0 +1,52 @@
|
|||
From: Lennert Buytenhek <buytenh@marvell.com>
|
||||
Date: Mon, 5 May 2008 18:19:55 +0000 (-0400)
|
||||
Subject: Orion: support for D0 stepping
|
||||
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=0e33c8a37f7c05bf85944cf10ca499b2d3754c1b
|
||||
|
||||
Orion: support for D0 stepping
|
||||
|
||||
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
|
||||
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
||||
---
|
||||
|
||||
diff --git a/arch/arm/mach-orion5x/common.c b/arch/arm/mach-orion5x/common.c
|
||||
index 4f13fd0..3e40e96 100644
|
||||
--- a/arch/arm/mach-orion5x/common.c
|
||||
+++ b/arch/arm/mach-orion5x/common.c
|
||||
@@ -338,6 +338,8 @@ static void __init orion5x_id(u32 *dev, u32 *rev, char **dev_name)
|
||||
*dev_name = "MV88F5281-D2";
|
||||
} else if (*rev == MV88F5281_REV_D1) {
|
||||
*dev_name = "MV88F5281-D1";
|
||||
+ } else if (*rev == MV88F5281_REV_D0) {
|
||||
+ *dev_name = "MV88F5281-D0";
|
||||
} else {
|
||||
*dev_name = "MV88F5281-Rev-Unsupported";
|
||||
}
|
||||
@@ -372,6 +374,15 @@ void __init orion5x_init(void)
|
||||
orion5x_setup_cpu_mbus_bridge();
|
||||
|
||||
/*
|
||||
+ * Don't issue "Wait for Interrupt" instruction if we are
|
||||
+ * running on D0 5281 silicon.
|
||||
+ */
|
||||
+ if (dev == MV88F5281_DEV_ID && rev == MV88F5281_REV_D0) {
|
||||
+ printk(KERN_INFO "Orion: Applying 5281 D0 WFI workaround.\n");
|
||||
+ disable_hlt();
|
||||
+ }
|
||||
+
|
||||
+ /*
|
||||
* Register devices.
|
||||
*/
|
||||
platform_device_register(&orion5x_uart);
|
||||
diff --git a/include/asm-arm/arch-orion5x/orion5x.h b/include/asm-arm/arch-orion5x/orion5x.h
|
||||
index 206ddd7..25ec775 100644
|
||||
--- a/include/asm-arm/arch-orion5x/orion5x.h
|
||||
+++ b/include/asm-arm/arch-orion5x/orion5x.h
|
||||
@@ -71,6 +71,7 @@
|
||||
#define MV88F5182_REV_A2 2
|
||||
/* Orion-2 (88F5281) */
|
||||
#define MV88F5281_DEV_ID 0x5281
|
||||
+#define MV88F5281_REV_D0 4
|
||||
#define MV88F5281_REV_D1 5
|
||||
#define MV88F5281_REV_D2 6
|
||||
|
|
@ -0,0 +1,120 @@
|
|||
From: Nicolas Pitre <nico@cam.org>
|
||||
Date: Mon, 31 Mar 2008 16:38:31 +0000 (-0400)
|
||||
Subject: [ARM] cache align destination pointer when copying memory for some processors
|
||||
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=f25c9c5b9b3eca2f4a41ac72fec6244c0cbd87cc
|
||||
|
||||
[ARM] cache align destination pointer when copying memory for some processors
|
||||
|
||||
The implementation for memory copy functions on ARM had a (disabled)
|
||||
provision for aligning the source pointer before loading registers with
|
||||
data. Turns out that aligning the _destination_ pointer is much more
|
||||
useful, as the read side is already sufficiently helped with the use of
|
||||
preload.
|
||||
|
||||
So this changes the definition of the CALGN() macro to target the
|
||||
destination pointer instead, and turns it on for Feroceon processors
|
||||
where the gain is very notable.
|
||||
|
||||
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
||||
---
|
||||
|
||||
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
|
||||
index cab355c..139cce6 100644
|
||||
--- a/arch/arm/lib/copy_template.S
|
||||
+++ b/arch/arm/lib/copy_template.S
|
||||
@@ -13,14 +13,6 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
- * This can be used to enable code to cacheline align the source pointer.
|
||||
- * Experiments on tested architectures (StrongARM and XScale) didn't show
|
||||
- * this a worthwhile thing to do. That might be different in the future.
|
||||
- */
|
||||
-//#define CALGN(code...) code
|
||||
-#define CALGN(code...)
|
||||
-
|
||||
-/*
|
||||
* Theory of operation
|
||||
* -------------------
|
||||
*
|
||||
@@ -82,7 +74,7 @@
|
||||
stmfd sp!, {r5 - r8}
|
||||
blt 5f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( rsb r3, ip, #32 )
|
||||
CALGN( sbcnes r4, r3, r2 ) @ C is always set here
|
||||
CALGN( bcs 2f )
|
||||
@@ -168,7 +160,7 @@
|
||||
subs r2, r2, #28
|
||||
blt 14f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( rsb ip, ip, #32 )
|
||||
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
||||
CALGN( subcc r2, r2, ip )
|
||||
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
|
||||
index ef7fddc..415e3d1 100644
|
||||
--- a/arch/arm/lib/memmove.S
|
||||
+++ b/arch/arm/lib/memmove.S
|
||||
@@ -13,14 +13,6 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
-/*
|
||||
- * This can be used to enable code to cacheline align the source pointer.
|
||||
- * Experiments on tested architectures (StrongARM and XScale) didn't show
|
||||
- * this a worthwhile thing to do. That might be different in the future.
|
||||
- */
|
||||
-//#define CALGN(code...) code
|
||||
-#define CALGN(code...)
|
||||
-
|
||||
.text
|
||||
|
||||
/*
|
||||
@@ -55,7 +47,7 @@ ENTRY(memmove)
|
||||
stmfd sp!, {r5 - r8}
|
||||
blt 5f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
||||
CALGN( bcs 2f )
|
||||
CALGN( adr r4, 6f )
|
||||
@@ -138,7 +130,7 @@ ENTRY(memmove)
|
||||
subs r2, r2, #28
|
||||
blt 14f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( rsb ip, ip, #32 )
|
||||
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
||||
CALGN( subcc r2, r2, ip )
|
||||
diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h
|
||||
index fce8328..911393b 100644
|
||||
--- a/include/asm-arm/assembler.h
|
||||
+++ b/include/asm-arm/assembler.h
|
||||
@@ -56,6 +56,21 @@
|
||||
#endif
|
||||
|
||||
/*
|
||||
+ * This can be used to enable code to cacheline align the destination
|
||||
+ * pointer when bulk writing to memory. Experiments on StrongARM and
|
||||
+ * XScale didn't show this a worthwhile thing to do when the cache is not
|
||||
+ * set to write-allocate (this would need further testing on XScale when WA
|
||||
+ * is used).
|
||||
+ *
|
||||
+ * On Feroceon there is much to gain however, regardless of cache mode.
|
||||
+ */
|
||||
+#ifdef CONFIG_CPU_FEROCEON
|
||||
+#define CALGN(code...) code
|
||||
+#else
|
||||
+#define CALGN(code...)
|
||||
+#endif
|
||||
+
|
||||
+/*
|
||||
* Enable and disable interrupts
|
||||
*/
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
|
@ -0,0 +1,139 @@
|
|||
From: Nicolas Pitre <nico@cam.org>
|
||||
Date: Sat, 12 Apr 2008 01:04:28 +0000 (-0400)
|
||||
Subject: [ARM] cache align memset and memzero
|
||||
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=74fa6238bc1602038532b548b954020f06b596cc
|
||||
|
||||
[ARM] cache align memset and memzero
|
||||
|
||||
This is a natural extension following the previous patch.
|
||||
Non Feroceon based targets are unchanged.
|
||||
|
||||
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
||||
---
|
||||
|
||||
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
|
||||
index 95b110b..cf75188 100644
|
||||
--- a/arch/arm/lib/memset.S
|
||||
+++ b/arch/arm/lib/memset.S
|
||||
@@ -39,6 +39,9 @@ ENTRY(memset)
|
||||
mov r3, r1
|
||||
cmp r2, #16
|
||||
blt 4f
|
||||
+
|
||||
+#if CALGN(1)-1 != 0
|
||||
+
|
||||
/*
|
||||
* We need an extra register for this loop - save the return address and
|
||||
* use the LR
|
||||
@@ -64,6 +67,49 @@ ENTRY(memset)
|
||||
stmneia r0!, {r1, r3, ip, lr}
|
||||
ldr lr, [sp], #4
|
||||
|
||||
+#else
|
||||
+
|
||||
+/*
|
||||
+ * This version aligns the destination pointer in order to write
|
||||
+ * whole cache lines at once.
|
||||
+ */
|
||||
+
|
||||
+ stmfd sp!, {r4-r7, lr}
|
||||
+ mov r4, r1
|
||||
+ mov r5, r1
|
||||
+ mov r6, r1
|
||||
+ mov r7, r1
|
||||
+ mov ip, r1
|
||||
+ mov lr, r1
|
||||
+
|
||||
+ cmp r2, #96
|
||||
+ tstgt r0, #31
|
||||
+ ble 3f
|
||||
+
|
||||
+ and ip, r0, #31
|
||||
+ rsb ip, ip, #32
|
||||
+ sub r2, r2, ip
|
||||
+ movs ip, ip, lsl #(32 - 4)
|
||||
+ stmcsia r0!, {r4, r5, r6, r7}
|
||||
+ stmmiia r0!, {r4, r5}
|
||||
+ tst ip, #(1 << 30)
|
||||
+ mov ip, r1
|
||||
+ strne r1, [r0], #4
|
||||
+
|
||||
+3: subs r2, r2, #64
|
||||
+ stmgeia r0!, {r1, r3-r7, ip, lr}
|
||||
+ stmgeia r0!, {r1, r3-r7, ip, lr}
|
||||
+ bgt 3b
|
||||
+ ldmeqfd sp!, {r4-r7, pc}
|
||||
+
|
||||
+ tst r2, #32
|
||||
+ stmneia r0!, {r1, r3-r7, ip, lr}
|
||||
+ tst r2, #16
|
||||
+ stmneia r0!, {r4-r7}
|
||||
+ ldmfd sp!, {r4-r7, lr}
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
4: tst r2, #8
|
||||
stmneia r0!, {r1, r3}
|
||||
tst r2, #4
|
||||
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
|
||||
index abf2508..a9bfef5 100644
|
||||
--- a/arch/arm/lib/memzero.S
|
||||
+++ b/arch/arm/lib/memzero.S
|
||||
@@ -39,6 +39,9 @@ ENTRY(__memzero)
|
||||
*/
|
||||
cmp r1, #16 @ 1 we can skip this chunk if we
|
||||
blt 4f @ 1 have < 16 bytes
|
||||
+
|
||||
+#if CALGN(1)-1 != 0
|
||||
+
|
||||
/*
|
||||
* We need an extra register for this loop - save the return address and
|
||||
* use the LR
|
||||
@@ -64,6 +67,47 @@ ENTRY(__memzero)
|
||||
stmneia r0!, {r2, r3, ip, lr} @ 4
|
||||
ldr lr, [sp], #4 @ 1
|
||||
|
||||
+#else
|
||||
+
|
||||
+/*
|
||||
+ * This version aligns the destination pointer in order to write
|
||||
+ * whole cache lines at once.
|
||||
+ */
|
||||
+
|
||||
+ stmfd sp!, {r4-r7, lr}
|
||||
+ mov r4, r2
|
||||
+ mov r5, r2
|
||||
+ mov r6, r2
|
||||
+ mov r7, r2
|
||||
+ mov ip, r2
|
||||
+ mov lr, r2
|
||||
+
|
||||
+ cmp r1, #96
|
||||
+ andgts ip, r0, #31
|
||||
+ ble 3f
|
||||
+
|
||||
+ rsb ip, ip, #32
|
||||
+ sub r1, r1, ip
|
||||
+ movs ip, ip, lsl #(32 - 4)
|
||||
+ stmcsia r0!, {r4, r5, r6, r7}
|
||||
+ stmmiia r0!, {r4, r5}
|
||||
+ movs ip, ip, lsl #2
|
||||
+ strcs r2, [r0], #4
|
||||
+
|
||||
+3: subs r1, r1, #64
|
||||
+ stmgeia r0!, {r2-r7, ip, lr}
|
||||
+ stmgeia r0!, {r2-r7, ip, lr}
|
||||
+ bgt 3b
|
||||
+ ldmeqfd sp!, {r4-r7, pc}
|
||||
+
|
||||
+ tst r1, #32
|
||||
+ stmneia r0!, {r2-r7, ip, lr}
|
||||
+ tst r1, #16
|
||||
+ stmneia r0!, {r4-r7}
|
||||
+ ldmfd sp!, {r4-r7, lr}
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
4: tst r1, #8 @ 1 8 bytes or more?
|
||||
stmneia r0!, {r2, r3} @ 2
|
||||
tst r1, #4 @ 1 4 bytes or more?
|
|
@ -0,0 +1,124 @@
|
|||
From: Lennert Buytenhek <buytenh@wantstofly.org>
|
||||
Date: Thu, 24 Apr 2008 05:31:46 +0000 (-0400)
|
||||
Subject: [ARM] Feroceon: speed up flushing of the entire cache
|
||||
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=8c38bce5ed3a5f2c8a1cb070ba41a8889cc69257
|
||||
|
||||
[ARM] Feroceon: speed up flushing of the entire cache
|
||||
|
||||
Flushing the L1 D cache with a test/clean/invalidate loop is very
|
||||
easy in software, but it is not the quickest way of doing it, as
|
||||
there is a lot of overhead involved in re-scanning the cache from
|
||||
the beginning every time we hit a dirty line.
|
||||
|
||||
This patch makes proc-feroceon.S use "clean+invalidate by set/way"
|
||||
loops according to possible cache configuration of Feroceon CPUs
|
||||
(either direct-mapped or 4-way set associative).
|
||||
|
||||
[nico: optimized the assembly a bit]
|
||||
|
||||
Signed-off-by: Lennert Buytenhek <buytenh@marvell.com>
|
||||
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
||||
---
|
||||
|
||||
diff --git a/arch/arm/mm/proc-feroceon.S b/arch/arm/mm/proc-feroceon.S
|
||||
index a02c171..968d5ad 100644
|
||||
--- a/arch/arm/mm/proc-feroceon.S
|
||||
+++ b/arch/arm/mm/proc-feroceon.S
|
||||
@@ -44,11 +44,31 @@
|
||||
*/
|
||||
#define CACHE_DLINESIZE 32
|
||||
|
||||
+ .bss
|
||||
+ .align 3
|
||||
+__cache_params_loc:
|
||||
+ .space 8
|
||||
+
|
||||
.text
|
||||
+__cache_params:
|
||||
+ .word __cache_params_loc
|
||||
+
|
||||
/*
|
||||
* cpu_feroceon_proc_init()
|
||||
*/
|
||||
ENTRY(cpu_feroceon_proc_init)
|
||||
+ mrc p15, 0, r0, c0, c0, 1 @ read cache type register
|
||||
+ ldr r1, __cache_params
|
||||
+ mov r2, #(16 << 5)
|
||||
+ tst r0, #(1 << 16) @ get way
|
||||
+ mov r0, r0, lsr #18 @ get cache size order
|
||||
+ movne r3, #((4 - 1) << 30) @ 4-way
|
||||
+ and r0, r0, #0xf
|
||||
+ moveq r3, #0 @ 1-way
|
||||
+ mov r2, r2, lsl r0 @ actual cache size
|
||||
+ movne r2, r2, lsr #2 @ turned into # of sets
|
||||
+ sub r2, r2, #(1 << 5)
|
||||
+ stmia r1, {r2, r3}
|
||||
mov pc, lr
|
||||
|
||||
/*
|
||||
@@ -117,11 +137,19 @@ ENTRY(feroceon_flush_user_cache_all)
|
||||
*/
|
||||
ENTRY(feroceon_flush_kern_cache_all)
|
||||
mov r2, #VM_EXEC
|
||||
- mov ip, #0
|
||||
+
|
||||
__flush_whole_cache:
|
||||
-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
|
||||
- bne 1b
|
||||
+ ldr r1, __cache_params
|
||||
+ ldmia r1, {r1, r3}
|
||||
+1: orr ip, r1, r3
|
||||
+2: mcr p15, 0, ip, c7, c14, 2 @ clean + invalidate D set/way
|
||||
+ subs ip, ip, #(1 << 30) @ next way
|
||||
+ bcs 2b
|
||||
+ subs r1, r1, #(1 << 5) @ next set
|
||||
+ bcs 1b
|
||||
+
|
||||
tst r2, #VM_EXEC
|
||||
+ mov ip, #0
|
||||
mcrne p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
||||
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
|
||||
mov pc, lr
|
||||
@@ -138,7 +166,6 @@ __flush_whole_cache:
|
||||
*/
|
||||
.align 5
|
||||
ENTRY(feroceon_flush_user_cache_range)
|
||||
- mov ip, #0
|
||||
sub r3, r1, r0 @ calculate total size
|
||||
cmp r3, #CACHE_DLIMIT
|
||||
bgt __flush_whole_cache
|
||||
@@ -152,6 +179,7 @@ ENTRY(feroceon_flush_user_cache_range)
|
||||
cmp r0, r1
|
||||
blo 1b
|
||||
tst r2, #VM_EXEC
|
||||
+ mov ip, #0
|
||||
mcrne p15, 0, ip, c7, c10, 4 @ drain WB
|
||||
mov pc, lr
|
||||
|
||||
@@ -306,16 +334,19 @@ ENTRY(cpu_feroceon_dcache_clean_area)
|
||||
.align 5
|
||||
ENTRY(cpu_feroceon_switch_mm)
|
||||
#ifdef CONFIG_MMU
|
||||
- mov ip, #0
|
||||
-@ && 'Clean & Invalidate whole DCache'
|
||||
-1: mrc p15, 0, r15, c7, c14, 3 @ test,clean,invalidate
|
||||
- bne 1b
|
||||
- mcr p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
||||
- mcr p15, 0, ip, c7, c10, 4 @ drain WB
|
||||
+ mov r2, lr @ abuse r2 to preserve lr
|
||||
+ bl __flush_whole_cache
|
||||
+ @ if r2 contains the VM_EXEC bit then the next 2 ops are done already
|
||||
+ tst r2, #VM_EXEC
|
||||
+ mcreq p15, 0, ip, c7, c5, 0 @ invalidate I cache
|
||||
+ mcreq p15, 0, ip, c7, c10, 4 @ drain WB
|
||||
+
|
||||
mcr p15, 0, r0, c2, c0, 0 @ load page table pointer
|
||||
mcr p15, 0, ip, c8, c7, 0 @ invalidate I & D TLBs
|
||||
-#endif
|
||||
+ mov pc, r2
|
||||
+#else
|
||||
mov pc, lr
|
||||
+#endif
|
||||
|
||||
/*
|
||||
* cpu_feroceon_set_pte_ext(ptep, pte, ext)
|
|
@ -27,6 +27,10 @@
|
|||
+ bugfix/arm/disable-scsi_acard.patch
|
||||
##+ bugfix/arm/disable-ath5k.patch
|
||||
+ bugfix/arm/disable-r6040.patch
|
||||
+ features/arm/cache-align.patch
|
||||
+ features/arm/cache-align2.patch
|
||||
+ features/arm/speed_flush_cache.patch
|
||||
+ features/arm/5281d0.patch
|
||||
+ features/all/at76.patch
|
||||
+ bugfix/fix-hifn_795X-divdi3.patch
|
||||
+ bugfix/all/mtd-prevent-physmap-from-causing-request_module-runaway-loop-modprobe-net-pf-1.patch
|
||||
|
|
Loading…
Reference in New Issue