linux/debian/patches/features/arm/cache-align2.patch

140 lines
2.8 KiB
Diff

From: Nicolas Pitre <nico@cam.org>
Date: Sat, 12 Apr 2008 01:04:28 +0000 (-0400)
Subject: [ARM] cache align memset and memzero
X-Git-Url: http://git.kernel.org/?p=linux%2Fkernel%2Fgit%2Fnico%2Forion.git;a=commitdiff_plain;h=74fa6238bc1602038532b548b954020f06b596cc
[ARM] cache align memset and memzero
This is a natural extension following the previous patch.
Non Feroceon based targets are unchanged.
Signed-off-by: Nicolas Pitre <nico@marvell.com>
---
diff --git a/arch/arm/lib/memset.S b/arch/arm/lib/memset.S
index 95b110b..cf75188 100644
--- a/arch/arm/lib/memset.S
+++ b/arch/arm/lib/memset.S
@@ -39,6 +39,9 @@ ENTRY(memset)
mov r3, r1
cmp r2, #16
blt 4f
+
+#if CALGN(1)-1 != 0
+
/*
* We need an extra register for this loop - save the return address and
* use the LR
@@ -64,6 +67,49 @@ ENTRY(memset)
stmneia r0!, {r1, r3, ip, lr}
ldr lr, [sp], #4
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+ stmfd sp!, {r4-r7, lr}
+ mov r4, r1
+ mov r5, r1
+ mov r6, r1
+ mov r7, r1
+ mov ip, r1
+ mov lr, r1
+
+ cmp r2, #96
+ tstgt r0, #31
+ ble 3f
+
+ and ip, r0, #31
+ rsb ip, ip, #32
+ sub r2, r2, ip
+ movs ip, ip, lsl #(32 - 4)
+ stmcsia r0!, {r4, r5, r6, r7}
+ stmmiia r0!, {r4, r5}
+ tst ip, #(1 << 30)
+ mov ip, r1
+ strne r1, [r0], #4
+
+3: subs r2, r2, #64
+ stmgeia r0!, {r1, r3-r7, ip, lr}
+ stmgeia r0!, {r1, r3-r7, ip, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r7, pc}
+
+ tst r2, #32
+ stmneia r0!, {r1, r3-r7, ip, lr}
+ tst r2, #16
+ stmneia r0!, {r4-r7}
+ ldmfd sp!, {r4-r7, lr}
+
+#endif
+
4: tst r2, #8
stmneia r0!, {r1, r3}
tst r2, #4
diff --git a/arch/arm/lib/memzero.S b/arch/arm/lib/memzero.S
index abf2508..a9bfef5 100644
--- a/arch/arm/lib/memzero.S
+++ b/arch/arm/lib/memzero.S
@@ -39,6 +39,9 @@ ENTRY(__memzero)
*/
cmp r1, #16 @ 1 we can skip this chunk if we
blt 4f @ 1 have < 16 bytes
+
+#if CALGN(1)-1 != 0
+
/*
* We need an extra register for this loop - save the return address and
* use the LR
@@ -64,6 +67,47 @@ ENTRY(__memzero)
stmneia r0!, {r2, r3, ip, lr} @ 4
ldr lr, [sp], #4 @ 1
+#else
+
+/*
+ * This version aligns the destination pointer in order to write
+ * whole cache lines at once.
+ */
+
+ stmfd sp!, {r4-r7, lr}
+ mov r4, r2
+ mov r5, r2
+ mov r6, r2
+ mov r7, r2
+ mov ip, r2
+ mov lr, r2
+
+ cmp r1, #96
+ andgts ip, r0, #31
+ ble 3f
+
+ rsb ip, ip, #32
+ sub r1, r1, ip
+ movs ip, ip, lsl #(32 - 4)
+ stmcsia r0!, {r4, r5, r6, r7}
+ stmmiia r0!, {r4, r5}
+ movs ip, ip, lsl #2
+ strcs r2, [r0], #4
+
+3: subs r1, r1, #64
+ stmgeia r0!, {r2-r7, ip, lr}
+ stmgeia r0!, {r2-r7, ip, lr}
+ bgt 3b
+ ldmeqfd sp!, {r4-r7, pc}
+
+ tst r1, #32
+ stmneia r0!, {r2-r7, ip, lr}
+ tst r1, #16
+ stmneia r0!, {r4-r7}
+ ldmfd sp!, {r4-r7, lr}
+
+#endif
+
4: tst r1, #8 @ 1 8 bytes or more?
stmneia r0!, {r2, r3} @ 2
tst r1, #4 @ 1 4 bytes or more?