some cache align speedups
svn path=/dists/trunk/linux-2.6/; revision=11633
This commit is contained in:
parent
8687a57c93
commit
e4ab0fb6f8
|
@ -46,6 +46,8 @@ linux-2.6 (2.6.26~rc6-1~experimental.1) UNRELEASED; urgency=low
|
|||
* [arm/orion5x] Add some patches from Marvell's Orion tree:
|
||||
- Feroceon: speed up flushing of the entire cache
|
||||
- support for 5281 D0 stepping
|
||||
- cache align destination pointer when copying memory for some processors
|
||||
- cache align memset and memzero
|
||||
* [arm/orion5x] Enable NETCONSOLE.
|
||||
* [arm/orion5x] Disable more SCSI drivers.
|
||||
* [arm/ixp4xx] Disable ATA and more SCSI and network drivers.
|
||||
|
|
|
@ -0,0 +1,124 @@
|
|||
From: Nicolas Pitre <nico@cam.org>
|
||||
|
||||
The implementation for memory copy functions on ARM had a (disabled)
|
||||
provision for aligning the source pointer before loading registers with
|
||||
data. Turns out that aligning the _destination_ pointer is much more
|
||||
useful, as the read side is already sufficiently helped with the use of
|
||||
preload.
|
||||
|
||||
So this changes the definition of the CALGN() macro to target the
|
||||
destination pointer instead, and turns it on for Feroceon processors
|
||||
where the gain is very notable.
|
||||
|
||||
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
||||
---
|
||||
arch/arm/lib/copy_template.S | 12 ++----------
|
||||
arch/arm/lib/memmove.S | 12 ++----------
|
||||
include/asm-arm/assembler.h | 15 +++++++++++++++
|
||||
3 files changed, 19 insertions(+), 20 deletions(-)
|
||||
|
||||
Index: linux-2.6.26-rc5/arch/arm/lib/copy_template.S
|
||||
===================================================================
|
||||
--- linux-2.6.26-rc5.orig/arch/arm/lib/copy_template.S
|
||||
+++ linux-2.6.26-rc5/arch/arm/lib/copy_template.S
|
||||
@@ -13,14 +13,6 @@
|
||||
*/
|
||||
|
||||
/*
|
||||
- * This can be used to enable code to cacheline align the source pointer.
|
||||
- * Experiments on tested architectures (StrongARM and XScale) didn't show
|
||||
- * this a worthwhile thing to do. That might be different in the future.
|
||||
- */
|
||||
-//#define CALGN(code...) code
|
||||
-#define CALGN(code...)
|
||||
-
|
||||
-/*
|
||||
* Theory of operation
|
||||
* -------------------
|
||||
*
|
||||
@@ -82,7 +74,7 @@
|
||||
stmfd sp!, {r5 - r8}
|
||||
blt 5f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( rsb r3, ip, #32 )
|
||||
CALGN( sbcnes r4, r3, r2 ) @ C is always set here
|
||||
CALGN( bcs 2f )
|
||||
@@ -168,7 +160,7 @@
|
||||
subs r2, r2, #28
|
||||
blt 14f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( rsb ip, ip, #32 )
|
||||
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
||||
CALGN( subcc r2, r2, ip )
|
||||
Index: linux-2.6.26-rc5/arch/arm/lib/memmove.S
|
||||
===================================================================
|
||||
--- linux-2.6.26-rc5.orig/arch/arm/lib/memmove.S
|
||||
+++ linux-2.6.26-rc5/arch/arm/lib/memmove.S
|
||||
@@ -13,14 +13,6 @@
|
||||
#include <linux/linkage.h>
|
||||
#include <asm/assembler.h>
|
||||
|
||||
-/*
|
||||
- * This can be used to enable code to cacheline align the source pointer.
|
||||
- * Experiments on tested architectures (StrongARM and XScale) didn't show
|
||||
- * this a worthwhile thing to do. That might be different in the future.
|
||||
- */
|
||||
-//#define CALGN(code...) code
|
||||
-#define CALGN(code...)
|
||||
-
|
||||
.text
|
||||
|
||||
/*
|
||||
@@ -55,7 +47,7 @@ ENTRY(memmove)
|
||||
stmfd sp!, {r5 - r8}
|
||||
blt 5f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
||||
CALGN( bcs 2f )
|
||||
CALGN( adr r4, 6f )
|
||||
@@ -139,7 +131,7 @@ ENTRY(memmove)
|
||||
subs r2, r2, #28
|
||||
blt 14f
|
||||
|
||||
- CALGN( ands ip, r1, #31 )
|
||||
+ CALGN( ands ip, r0, #31 )
|
||||
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
||||
CALGN( subcc r2, r2, ip )
|
||||
CALGN( bcc 15f )
|
||||
Index: linux-2.6.26-rc5/include/asm-arm/assembler.h
|
||||
===================================================================
|
||||
--- linux-2.6.26-rc5.orig/include/asm-arm/assembler.h
|
||||
+++ linux-2.6.26-rc5/include/asm-arm/assembler.h
|
||||
@@ -56,6 +56,21 @@
|
||||
#endif
|
||||
|
||||
/*
|
||||
+ * This can be used to enable code to cacheline align the destination
|
||||
+ * pointer when bulk writing to memory. Experiments on StrongARM and
|
||||
+ * XScale didn't show this a worthwhile thing to do when the cache is not
|
||||
+ * set to write-allocate (this would need further testing on XScale when WA
|
||||
+ * is used).
|
||||
+ *
|
||||
+ * On Feroceon there is much to gain however, regardless of cache mode.
|
||||
+ */
|
||||
+#ifdef CONFIG_CPU_FEROCEON
|
||||
+#define CALGN(code...) code
|
||||
+#else
|
||||
+#define CALGN(code...)
|
||||
+#endif
|
||||
+
|
||||
+/*
|
||||
* Enable and disable interrupts
|
||||
*/
|
||||
#if __LINUX_ARM_ARCH__ >= 6
|
||||
|
||||
-------------------------------------------------------------------
|
||||
List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
|
||||
FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php
|
||||
Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php
|
|
@ -0,0 +1,142 @@
|
|||
From: Nicolas Pitre <nico@cam.org>
|
||||
|
||||
This is a natural extension following the previous patch.
|
||||
Non Feroceon based targets are unchanged.
|
||||
|
||||
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
||||
---
|
||||
arch/arm/lib/memset.S | 46 ++++++++++++++++++++++++++++++++++++++++++++++
|
||||
arch/arm/lib/memzero.S | 44 ++++++++++++++++++++++++++++++++++++++++++++
|
||||
2 files changed, 90 insertions(+), 0 deletions(-)
|
||||
|
||||
Index: linux-2.6.26-rc5/arch/arm/lib/memset.S
|
||||
===================================================================
|
||||
--- linux-2.6.26-rc5.orig/arch/arm/lib/memset.S
|
||||
+++ linux-2.6.26-rc5/arch/arm/lib/memset.S
|
||||
@@ -39,6 +39,9 @@ ENTRY(memset)
|
||||
mov r3, r1
|
||||
cmp r2, #16
|
||||
blt 4f
|
||||
+
|
||||
+#if ! CALGN(1)+0
|
||||
+
|
||||
/*
|
||||
* We need an extra register for this loop - save the return address and
|
||||
* use the LR
|
||||
@@ -64,6 +67,49 @@ ENTRY(memset)
|
||||
stmneia r0!, {r1, r3, ip, lr}
|
||||
ldr lr, [sp], #4
|
||||
|
||||
+#else
|
||||
+
|
||||
+/*
|
||||
+ * This version aligns the destination pointer in order to write
|
||||
+ * whole cache lines at once.
|
||||
+ */
|
||||
+
|
||||
+ stmfd sp!, {r4-r7, lr}
|
||||
+ mov r4, r1
|
||||
+ mov r5, r1
|
||||
+ mov r6, r1
|
||||
+ mov r7, r1
|
||||
+ mov ip, r1
|
||||
+ mov lr, r1
|
||||
+
|
||||
+ cmp r2, #96
|
||||
+ tstgt r0, #31
|
||||
+ ble 3f
|
||||
+
|
||||
+ and ip, r0, #31
|
||||
+ rsb ip, ip, #32
|
||||
+ sub r2, r2, ip
|
||||
+ movs ip, ip, lsl #(32 - 4)
|
||||
+ stmcsia r0!, {r4, r5, r6, r7}
|
||||
+ stmmiia r0!, {r4, r5}
|
||||
+ tst ip, #(1 << 30)
|
||||
+ mov ip, r1
|
||||
+ strne r1, [r0], #4
|
||||
+
|
||||
+3: subs r2, r2, #64
|
||||
+ stmgeia r0!, {r1, r3-r7, ip, lr}
|
||||
+ stmgeia r0!, {r1, r3-r7, ip, lr}
|
||||
+ bgt 3b
|
||||
+ ldmeqfd sp!, {r4-r7, pc}
|
||||
+
|
||||
+ tst r2, #32
|
||||
+ stmneia r0!, {r1, r3-r7, ip, lr}
|
||||
+ tst r2, #16
|
||||
+ stmneia r0!, {r4-r7}
|
||||
+ ldmfd sp!, {r4-r7, lr}
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
4: tst r2, #8
|
||||
stmneia r0!, {r1, r3}
|
||||
tst r2, #4
|
||||
Index: linux-2.6.26-rc5/arch/arm/lib/memzero.S
|
||||
===================================================================
|
||||
--- linux-2.6.26-rc5.orig/arch/arm/lib/memzero.S
|
||||
+++ linux-2.6.26-rc5/arch/arm/lib/memzero.S
|
||||
@@ -39,6 +39,9 @@ ENTRY(__memzero)
|
||||
*/
|
||||
cmp r1, #16 @ 1 we can skip this chunk if we
|
||||
blt 4f @ 1 have < 16 bytes
|
||||
+
|
||||
+#if ! CALGN(1)+0
|
||||
+
|
||||
/*
|
||||
* We need an extra register for this loop - save the return address and
|
||||
* use the LR
|
||||
@@ -64,6 +67,47 @@ ENTRY(__memzero)
|
||||
stmneia r0!, {r2, r3, ip, lr} @ 4
|
||||
ldr lr, [sp], #4 @ 1
|
||||
|
||||
+#else
|
||||
+
|
||||
+/*
|
||||
+ * This version aligns the destination pointer in order to write
|
||||
+ * whole cache lines at once.
|
||||
+ */
|
||||
+
|
||||
+ stmfd sp!, {r4-r7, lr}
|
||||
+ mov r4, r2
|
||||
+ mov r5, r2
|
||||
+ mov r6, r2
|
||||
+ mov r7, r2
|
||||
+ mov ip, r2
|
||||
+ mov lr, r2
|
||||
+
|
||||
+ cmp r1, #96
|
||||
+ andgts ip, r0, #31
|
||||
+ ble 3f
|
||||
+
|
||||
+ rsb ip, ip, #32
|
||||
+ sub r1, r1, ip
|
||||
+ movs ip, ip, lsl #(32 - 4)
|
||||
+ stmcsia r0!, {r4, r5, r6, r7}
|
||||
+ stmmiia r0!, {r4, r5}
|
||||
+ movs ip, ip, lsl #2
|
||||
+ strcs r2, [r0], #4
|
||||
+
|
||||
+3: subs r1, r1, #64
|
||||
+ stmgeia r0!, {r2-r7, ip, lr}
|
||||
+ stmgeia r0!, {r2-r7, ip, lr}
|
||||
+ bgt 3b
|
||||
+ ldmeqfd sp!, {r4-r7, pc}
|
||||
+
|
||||
+ tst r1, #32
|
||||
+ stmneia r0!, {r2-r7, ip, lr}
|
||||
+ tst r1, #16
|
||||
+ stmneia r0!, {r4-r7}
|
||||
+ ldmfd sp!, {r4-r7, lr}
|
||||
+
|
||||
+#endif
|
||||
+
|
||||
4: tst r1, #8 @ 1 8 bytes or more?
|
||||
stmneia r0!, {r2, r3} @ 2
|
||||
tst r1, #4 @ 1 4 bytes or more?
|
||||
|
||||
-------------------------------------------------------------------
|
||||
List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
|
||||
FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php
|
||||
Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php
|
|
@ -0,0 +1,34 @@
|
|||
From: Nicolas Pitre <nico@cam.org>
|
||||
|
||||
This code is currently disabled, which explains why no one was affected.
|
||||
|
||||
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
||||
---
|
||||
arch/arm/lib/memmove.S | 2 +-
|
||||
1 files changed, 1 insertions(+), 1 deletions(-)
|
||||
|
||||
Index: linux-2.6.26-rc5/arch/arm/lib/memmove.S
|
||||
===================================================================
|
||||
--- linux-2.6.26-rc5.orig/arch/arm/lib/memmove.S
|
||||
+++ linux-2.6.26-rc5/arch/arm/lib/memmove.S
|
||||
@@ -60,6 +60,7 @@ ENTRY(memmove)
|
||||
CALGN( bcs 2f )
|
||||
CALGN( adr r4, 6f )
|
||||
CALGN( subs r2, r2, ip ) @ C is set here
|
||||
+ CALGN( rsb ip, ip, #32 )
|
||||
CALGN( add pc, r4, ip )
|
||||
|
||||
PLD( pld [r1, #-4] )
|
||||
@@ -139,7 +140,6 @@ ENTRY(memmove)
|
||||
blt 14f
|
||||
|
||||
CALGN( ands ip, r1, #31 )
|
||||
- CALGN( rsb ip, ip, #32 )
|
||||
CALGN( sbcnes r4, ip, r2 ) @ C is always set here
|
||||
CALGN( subcc r2, r2, ip )
|
||||
CALGN( bcc 15f )
|
||||
|
||||
-------------------------------------------------------------------
|
||||
List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
|
||||
FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php
|
||||
Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php
|
|
@ -31,6 +31,9 @@
|
|||
+ bugfix/arm/disable-r6040.patch
|
||||
+ features/arm/speed_flush_cache.patch
|
||||
+ features/arm/5281d0.patch
|
||||
+ features/arm/fix_cache_alignment.patch
|
||||
+ features/arm/cache_align1.patch
|
||||
+ features/arm/cache_align2.patch
|
||||
+ features/arm/led-pca9532-generic.patch
|
||||
+ features/arm/led-pca9532-fix.patch
|
||||
+ features/arm/led-pca9532-n2100.patch
|
||||
|
|
Loading…
Reference in New Issue