121 lines
3.6 KiB
Diff
121 lines
3.6 KiB
Diff
From: Nicolas Pitre <nico@cam.org>
|
|
Date: Mon, 31 Mar 2008 16:38:31 +0000 (-0400)
|
|
Subject: [ARM] cache align destination pointer when copying memory for some processors
|
|
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=f25c9c5b9b3eca2f4a41ac72fec6244c0cbd87cc
|
|
|
|
[ARM] cache align destination pointer when copying memory for some processors
|
|
|
|
The implementation for memory copy functions on ARM had a (disabled)
|
|
provision for aligning the source pointer before loading registers with
|
|
data. Turns out that aligning the _destination_ pointer is much more
|
|
useful, as the read side is already sufficiently helped with the use of
|
|
preload.
|
|
|
|
So this changes the definition of the CALGN() macro to target the
|
|
destination pointer instead, and turns it on for Feroceon processors
|
|
where the gain is very notable.
|
|
|
|
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
|
---
|
|
|
|
diff --git a/arch/arm/lib/copy_template.S b/arch/arm/lib/copy_template.S
|
|
index cab355c..139cce6 100644
|
|
--- a/arch/arm/lib/copy_template.S
|
|
+++ b/arch/arm/lib/copy_template.S
|
|
@@ -13,14 +13,6 @@
|
|
*/
|
|
|
|
/*
|
|
- * This can be used to enable code to cacheline align the source pointer.
|
|
- * Experiments on tested architectures (StrongARM and XScale) didn't show
|
|
- * this a worthwhile thing to do. That might be different in the future.
|
|
- */
|
|
-//#define CALGN(code...) code
|
|
-#define CALGN(code...)
|
|
-
|
|
-/*
|
|
* Theory of operation
|
|
* -------------------
|
|
*
|
|
@@ -82,7 +74,7 @@
|
|
stmfd sp!, {r5 - r8}
|
|
blt 5f
|
|
|
|
- CALGN( ands ip, r1, #31 )
|
|
+ CALGN( ands ip, r0, #31 )
|
|
CALGN( rsb r3, ip, #32 )
|
|
CALGN( sbcnes r4, r3, r2 ) @ C is always set here
|
|
CALGN( bcs 2f )
|
|
@@ -168,7 +160,7 @@
|
|
subs r2, r2, #28
|
|
blt 14f
|
|
|
|
- CALGN( ands ip, r1, #31 )
|
|
+ CALGN( ands ip, r0, #31 )
|
|
CALGN( rsb ip, ip, #32 )
|
|
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
|
CALGN( subcc r2, r2, ip )
|
|
diff --git a/arch/arm/lib/memmove.S b/arch/arm/lib/memmove.S
|
|
index ef7fddc..415e3d1 100644
|
|
--- a/arch/arm/lib/memmove.S
|
|
+++ b/arch/arm/lib/memmove.S
|
|
@@ -13,14 +13,6 @@
|
|
#include <linux/linkage.h>
|
|
#include <asm/assembler.h>
|
|
|
|
-/*
|
|
- * This can be used to enable code to cacheline align the source pointer.
|
|
- * Experiments on tested architectures (StrongARM and XScale) didn't show
|
|
- * this a worthwhile thing to do. That might be different in the future.
|
|
- */
|
|
-//#define CALGN(code...) code
|
|
-#define CALGN(code...)
|
|
-
|
|
.text
|
|
|
|
/*
|
|
@@ -55,7 +47,7 @@ ENTRY(memmove)
|
|
stmfd sp!, {r5 - r8}
|
|
blt 5f
|
|
|
|
- CALGN( ands ip, r1, #31 )
|
|
+ CALGN( ands ip, r0, #31 )
|
|
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
|
CALGN( bcs 2f )
|
|
CALGN( adr r4, 6f )
|
|
@@ -138,7 +130,7 @@ ENTRY(memmove)
|
|
subs r2, r2, #28
|
|
blt 14f
|
|
|
|
- CALGN( ands ip, r1, #31 )
|
|
+ CALGN( ands ip, r0, #31 )
|
|
CALGN( rsb ip, ip, #32 )
|
|
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
|
CALGN( subcc r2, r2, ip )
|
|
diff --git a/include/asm-arm/assembler.h b/include/asm-arm/assembler.h
|
|
index fce8328..911393b 100644
|
|
--- a/include/asm-arm/assembler.h
|
|
+++ b/include/asm-arm/assembler.h
|
|
@@ -56,6 +56,21 @@
|
|
#endif
|
|
|
|
/*
|
|
+ * This can be used to enable code to cacheline align the destination
|
|
+ * pointer when bulk writing to memory. Experiments on StrongARM and
|
|
+ * XScale didn't show this a worthwhile thing to do when the cache is not
|
|
+ * set to write-allocate (this would need further testing on XScale when WA
|
|
+ * is used).
|
|
+ *
|
|
+ * On Feroceon there is much to gain however, regardless of cache mode.
|
|
+ */
|
|
+#ifdef CONFIG_CPU_FEROCEON
|
|
+#define CALGN(code...) code
|
|
+#else
|
|
+#define CALGN(code...)
|
|
+#endif
|
|
+
|
|
+/*
|
|
* Enable and disable interrupts
|
|
*/
|
|
#if __LINUX_ARM_ARCH__ >= 6
|