143 lines
3.1 KiB
Diff
143 lines
3.1 KiB
Diff
From: Nicolas Pitre <nico@cam.org>
|
|
|
|
This is a natural extension following the previous patch.
|
|
Non Feroceon based targets are unchanged.
|
|
|
|
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
|
---
|
|
arch/arm/lib/memset.S | 46 ++++++++++++++++++++++++++++++++++++++++++++++
|
|
arch/arm/lib/memzero.S | 44 ++++++++++++++++++++++++++++++++++++++++++++
|
|
2 files changed, 90 insertions(+), 0 deletions(-)
|
|
|
|
Index: linux-2.6.26-rc5/arch/arm/lib/memset.S
|
|
===================================================================
|
|
--- linux-2.6.26-rc5.orig/arch/arm/lib/memset.S
|
|
+++ linux-2.6.26-rc5/arch/arm/lib/memset.S
|
|
@@ -39,6 +39,9 @@ ENTRY(memset)
|
|
mov r3, r1
|
|
cmp r2, #16
|
|
blt 4f
|
|
+
|
|
+#if ! CALGN(1)+0
|
|
+
|
|
/*
|
|
* We need an extra register for this loop - save the return address and
|
|
* use the LR
|
|
@@ -64,6 +67,49 @@ ENTRY(memset)
|
|
stmneia r0!, {r1, r3, ip, lr}
|
|
ldr lr, [sp], #4
|
|
|
|
+#else
|
|
+
|
|
+/*
|
|
+ * This version aligns the destination pointer in order to write
|
|
+ * whole cache lines at once.
|
|
+ */
|
|
+
|
|
+ stmfd sp!, {r4-r7, lr}
|
|
+ mov r4, r1
|
|
+ mov r5, r1
|
|
+ mov r6, r1
|
|
+ mov r7, r1
|
|
+ mov ip, r1
|
|
+ mov lr, r1
|
|
+
|
|
+ cmp r2, #96
|
|
+ tstgt r0, #31
|
|
+ ble 3f
|
|
+
|
|
+ and ip, r0, #31
|
|
+ rsb ip, ip, #32
|
|
+ sub r2, r2, ip
|
|
+ movs ip, ip, lsl #(32 - 4)
|
|
+ stmcsia r0!, {r4, r5, r6, r7}
|
|
+ stmmiia r0!, {r4, r5}
|
|
+ tst ip, #(1 << 30)
|
|
+ mov ip, r1
|
|
+ strne r1, [r0], #4
|
|
+
|
|
+3: subs r2, r2, #64
|
|
+ stmgeia r0!, {r1, r3-r7, ip, lr}
|
|
+ stmgeia r0!, {r1, r3-r7, ip, lr}
|
|
+ bgt 3b
|
|
+ ldmeqfd sp!, {r4-r7, pc}
|
|
+
|
|
+ tst r2, #32
|
|
+ stmneia r0!, {r1, r3-r7, ip, lr}
|
|
+ tst r2, #16
|
|
+ stmneia r0!, {r4-r7}
|
|
+ ldmfd sp!, {r4-r7, lr}
|
|
+
|
|
+#endif
|
|
+
|
|
4: tst r2, #8
|
|
stmneia r0!, {r1, r3}
|
|
tst r2, #4
|
|
Index: linux-2.6.26-rc5/arch/arm/lib/memzero.S
|
|
===================================================================
|
|
--- linux-2.6.26-rc5.orig/arch/arm/lib/memzero.S
|
|
+++ linux-2.6.26-rc5/arch/arm/lib/memzero.S
|
|
@@ -39,6 +39,9 @@ ENTRY(__memzero)
|
|
*/
|
|
cmp r1, #16 @ 1 we can skip this chunk if we
|
|
blt 4f @ 1 have < 16 bytes
|
|
+
|
|
+#if ! CALGN(1)+0
|
|
+
|
|
/*
|
|
* We need an extra register for this loop - save the return address and
|
|
* use the LR
|
|
@@ -64,6 +67,47 @@ ENTRY(__memzero)
|
|
stmneia r0!, {r2, r3, ip, lr} @ 4
|
|
ldr lr, [sp], #4 @ 1
|
|
|
|
+#else
|
|
+
|
|
+/*
|
|
+ * This version aligns the destination pointer in order to write
|
|
+ * whole cache lines at once.
|
|
+ */
|
|
+
|
|
+ stmfd sp!, {r4-r7, lr}
|
|
+ mov r4, r2
|
|
+ mov r5, r2
|
|
+ mov r6, r2
|
|
+ mov r7, r2
|
|
+ mov ip, r2
|
|
+ mov lr, r2
|
|
+
|
|
+ cmp r1, #96
|
|
+ andgts ip, r0, #31
|
|
+ ble 3f
|
|
+
|
|
+ rsb ip, ip, #32
|
|
+ sub r1, r1, ip
|
|
+ movs ip, ip, lsl #(32 - 4)
|
|
+ stmcsia r0!, {r4, r5, r6, r7}
|
|
+ stmmiia r0!, {r4, r5}
|
|
+ movs ip, ip, lsl #2
|
|
+ strcs r2, [r0], #4
|
|
+
|
|
+3: subs r1, r1, #64
|
|
+ stmgeia r0!, {r2-r7, ip, lr}
|
|
+ stmgeia r0!, {r2-r7, ip, lr}
|
|
+ bgt 3b
|
|
+ ldmeqfd sp!, {r4-r7, pc}
|
|
+
|
|
+ tst r1, #32
|
|
+ stmneia r0!, {r2-r7, ip, lr}
|
|
+ tst r1, #16
|
|
+ stmneia r0!, {r4-r7}
|
|
+ ldmfd sp!, {r4-r7, lr}
|
|
+
|
|
+#endif
|
|
+
|
|
4: tst r1, #8 @ 1 8 bytes or more?
|
|
stmneia r0!, {r2, r3} @ 2
|
|
tst r1, #4 @ 1 4 bytes or more?
|
|
|
|
-------------------------------------------------------------------
|
|
List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel
|
|
FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php
|
|
Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php
|