diff --git a/debian/changelog b/debian/changelog index 66686879d..6c27374af 100644 --- a/debian/changelog +++ b/debian/changelog @@ -46,6 +46,8 @@ linux-2.6 (2.6.26~rc6-1~experimental.1) UNRELEASED; urgency=low * [arm/orion5x] Add some patches from Marvell's Orion tree: - Feroceon: speed up flushing of the entire cache - support for 5281 D0 stepping + - cache align destination pointer when copying memory for some processors + - cache align memset and memzero * [arm/orion5x] Enable NETCONSOLE. * [arm/orion5x] Disable more SCSI drivers. * [arm/ixp4xx] Disable ATA and more SCSI and network drivers. diff --git a/debian/patches/features/arm/cache_align1.patch b/debian/patches/features/arm/cache_align1.patch new file mode 100644 index 000000000..4db1b21c8 --- /dev/null +++ b/debian/patches/features/arm/cache_align1.patch @@ -0,0 +1,124 @@ +From: Nicolas Pitre + +The implementation for memory copy functions on ARM had a (disabled) +provision for aligning the source pointer before loading registers with +data. Turns out that aligning the _destination_ pointer is much more +useful, as the read side is already sufficiently helped with the use of +preload. + +So this changes the definition of the CALGN() macro to target the +destination pointer instead, and turns it on for Feroceon processors +where the gain is very notable. + +Signed-off-by: Nicolas Pitre +--- + arch/arm/lib/copy_template.S | 12 ++---------- + arch/arm/lib/memmove.S | 12 ++---------- + include/asm-arm/assembler.h | 15 +++++++++++++++ + 3 files changed, 19 insertions(+), 20 deletions(-) + +Index: linux-2.6.26-rc5/arch/arm/lib/copy_template.S +=================================================================== +--- linux-2.6.26-rc5.orig/arch/arm/lib/copy_template.S ++++ linux-2.6.26-rc5/arch/arm/lib/copy_template.S +@@ -13,14 +13,6 @@ + */ + + /* +- * This can be used to enable code to cacheline align the source pointer. +- * Experiments on tested architectures (StrongARM and XScale) didn't show +- * this a worthwhile thing to do. That might be different in the future. +- */ +-//#define CALGN(code...) code +-#define CALGN(code...) +- +-/* + * Theory of operation + * ------------------- + * +@@ -82,7 +74,7 @@ + stmfd sp!, {r5 - r8} + blt 5f + +- CALGN( ands ip, r1, #31 ) ++ CALGN( ands ip, r0, #31 ) + CALGN( rsb r3, ip, #32 ) + CALGN( sbcnes r4, r3, r2 ) @ C is always set here + CALGN( bcs 2f ) +@@ -168,7 +160,7 @@ + subs r2, r2, #28 + blt 14f + +- CALGN( ands ip, r1, #31 ) ++ CALGN( ands ip, r0, #31 ) + CALGN( rsb ip, ip, #32 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) +Index: linux-2.6.26-rc5/arch/arm/lib/memmove.S +=================================================================== +--- linux-2.6.26-rc5.orig/arch/arm/lib/memmove.S ++++ linux-2.6.26-rc5/arch/arm/lib/memmove.S +@@ -13,14 +13,6 @@ + #include + #include + +-/* +- * This can be used to enable code to cacheline align the source pointer. +- * Experiments on tested architectures (StrongARM and XScale) didn't show +- * this a worthwhile thing to do. That might be different in the future. +- */ +-//#define CALGN(code...) code +-#define CALGN(code...) +- + .text + + /* +@@ -55,7 +47,7 @@ ENTRY(memmove) + stmfd sp!, {r5 - r8} + blt 5f + +- CALGN( ands ip, r1, #31 ) ++ CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) +@@ -139,7 +131,7 @@ ENTRY(memmove) + subs r2, r2, #28 + blt 14f + +- CALGN( ands ip, r1, #31 ) ++ CALGN( ands ip, r0, #31 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) +Index: linux-2.6.26-rc5/include/asm-arm/assembler.h +=================================================================== +--- linux-2.6.26-rc5.orig/include/asm-arm/assembler.h ++++ linux-2.6.26-rc5/include/asm-arm/assembler.h +@@ -56,6 +56,21 @@ + #endif + + /* ++ * This can be used to enable code to cacheline align the destination ++ * pointer when bulk writing to memory. Experiments on StrongARM and ++ * XScale didn't show this a worthwhile thing to do when the cache is not ++ * set to write-allocate (this would need further testing on XScale when WA ++ * is used). ++ * ++ * On Feroceon there is much to gain however, regardless of cache mode. ++ */ ++#ifdef CONFIG_CPU_FEROCEON ++#define CALGN(code...) code ++#else ++#define CALGN(code...) ++#endif ++ ++/* + * Enable and disable interrupts + */ + #if __LINUX_ARM_ARCH__ >= 6 + +------------------------------------------------------------------- +List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel +FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php +Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php diff --git a/debian/patches/features/arm/cache_align2.patch b/debian/patches/features/arm/cache_align2.patch new file mode 100644 index 000000000..79b0a359e --- /dev/null +++ b/debian/patches/features/arm/cache_align2.patch @@ -0,0 +1,142 @@ +From: Nicolas Pitre + +This is a natural extension following the previous patch. +Non Feroceon based targets are unchanged. + +Signed-off-by: Nicolas Pitre +--- + arch/arm/lib/memset.S | 46 ++++++++++++++++++++++++++++++++++++++++++++++ + arch/arm/lib/memzero.S | 44 ++++++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 90 insertions(+), 0 deletions(-) + +Index: linux-2.6.26-rc5/arch/arm/lib/memset.S +=================================================================== +--- linux-2.6.26-rc5.orig/arch/arm/lib/memset.S ++++ linux-2.6.26-rc5/arch/arm/lib/memset.S +@@ -39,6 +39,9 @@ ENTRY(memset) + mov r3, r1 + cmp r2, #16 + blt 4f ++ ++#if ! CALGN(1)+0 ++ + /* + * We need an extra register for this loop - save the return address and + * use the LR +@@ -64,6 +67,49 @@ ENTRY(memset) + stmneia r0!, {r1, r3, ip, lr} + ldr lr, [sp], #4 + ++#else ++ ++/* ++ * This version aligns the destination pointer in order to write ++ * whole cache lines at once. ++ */ ++ ++ stmfd sp!, {r4-r7, lr} ++ mov r4, r1 ++ mov r5, r1 ++ mov r6, r1 ++ mov r7, r1 ++ mov ip, r1 ++ mov lr, r1 ++ ++ cmp r2, #96 ++ tstgt r0, #31 ++ ble 3f ++ ++ and ip, r0, #31 ++ rsb ip, ip, #32 ++ sub r2, r2, ip ++ movs ip, ip, lsl #(32 - 4) ++ stmcsia r0!, {r4, r5, r6, r7} ++ stmmiia r0!, {r4, r5} ++ tst ip, #(1 << 30) ++ mov ip, r1 ++ strne r1, [r0], #4 ++ ++3: subs r2, r2, #64 ++ stmgeia r0!, {r1, r3-r7, ip, lr} ++ stmgeia r0!, {r1, r3-r7, ip, lr} ++ bgt 3b ++ ldmeqfd sp!, {r4-r7, pc} ++ ++ tst r2, #32 ++ stmneia r0!, {r1, r3-r7, ip, lr} ++ tst r2, #16 ++ stmneia r0!, {r4-r7} ++ ldmfd sp!, {r4-r7, lr} ++ ++#endif ++ + 4: tst r2, #8 + stmneia r0!, {r1, r3} + tst r2, #4 +Index: linux-2.6.26-rc5/arch/arm/lib/memzero.S +=================================================================== +--- linux-2.6.26-rc5.orig/arch/arm/lib/memzero.S ++++ linux-2.6.26-rc5/arch/arm/lib/memzero.S +@@ -39,6 +39,9 @@ ENTRY(__memzero) + */ + cmp r1, #16 @ 1 we can skip this chunk if we + blt 4f @ 1 have < 16 bytes ++ ++#if ! CALGN(1)+0 ++ + /* + * We need an extra register for this loop - save the return address and + * use the LR +@@ -64,6 +67,47 @@ ENTRY(__memzero) + stmneia r0!, {r2, r3, ip, lr} @ 4 + ldr lr, [sp], #4 @ 1 + ++#else ++ ++/* ++ * This version aligns the destination pointer in order to write ++ * whole cache lines at once. ++ */ ++ ++ stmfd sp!, {r4-r7, lr} ++ mov r4, r2 ++ mov r5, r2 ++ mov r6, r2 ++ mov r7, r2 ++ mov ip, r2 ++ mov lr, r2 ++ ++ cmp r1, #96 ++ andgts ip, r0, #31 ++ ble 3f ++ ++ rsb ip, ip, #32 ++ sub r1, r1, ip ++ movs ip, ip, lsl #(32 - 4) ++ stmcsia r0!, {r4, r5, r6, r7} ++ stmmiia r0!, {r4, r5} ++ movs ip, ip, lsl #2 ++ strcs r2, [r0], #4 ++ ++3: subs r1, r1, #64 ++ stmgeia r0!, {r2-r7, ip, lr} ++ stmgeia r0!, {r2-r7, ip, lr} ++ bgt 3b ++ ldmeqfd sp!, {r4-r7, pc} ++ ++ tst r1, #32 ++ stmneia r0!, {r2-r7, ip, lr} ++ tst r1, #16 ++ stmneia r0!, {r4-r7} ++ ldmfd sp!, {r4-r7, lr} ++ ++#endif ++ + 4: tst r1, #8 @ 1 8 bytes or more? + stmneia r0!, {r2, r3} @ 2 + tst r1, #4 @ 1 4 bytes or more? + +------------------------------------------------------------------- +List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel +FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php +Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php diff --git a/debian/patches/features/arm/fix_cache_alignment.patch b/debian/patches/features/arm/fix_cache_alignment.patch new file mode 100644 index 000000000..1b1b8af86 --- /dev/null +++ b/debian/patches/features/arm/fix_cache_alignment.patch @@ -0,0 +1,34 @@ +From: Nicolas Pitre + +This code is currently disabled, which explains why no one was affected. + +Signed-off-by: Nicolas Pitre +--- + arch/arm/lib/memmove.S | 2 +- + 1 files changed, 1 insertions(+), 1 deletions(-) + +Index: linux-2.6.26-rc5/arch/arm/lib/memmove.S +=================================================================== +--- linux-2.6.26-rc5.orig/arch/arm/lib/memmove.S ++++ linux-2.6.26-rc5/arch/arm/lib/memmove.S +@@ -60,6 +60,7 @@ ENTRY(memmove) + CALGN( bcs 2f ) + CALGN( adr r4, 6f ) + CALGN( subs r2, r2, ip ) @ C is set here ++ CALGN( rsb ip, ip, #32 ) + CALGN( add pc, r4, ip ) + + PLD( pld [r1, #-4] ) +@@ -139,7 +140,6 @@ ENTRY(memmove) + blt 14f + + CALGN( ands ip, r1, #31 ) +- CALGN( rsb ip, ip, #32 ) + CALGN( sbcnes r4, ip, r2 ) @ C is always set here + CALGN( subcc r2, r2, ip ) + CALGN( bcc 15f ) + +------------------------------------------------------------------- +List admin: http://lists.arm.linux.org.uk/mailman/listinfo/linux-arm-kernel +FAQ: http://www.arm.linux.org.uk/mailinglists/faq.php +Etiquette: http://www.arm.linux.org.uk/mailinglists/etiquette.php diff --git a/debian/patches/series/1~experimental.1 b/debian/patches/series/1~experimental.1 index 2b511aa72..7ceb097e2 100644 --- a/debian/patches/series/1~experimental.1 +++ b/debian/patches/series/1~experimental.1 @@ -31,6 +31,9 @@ + bugfix/arm/disable-r6040.patch + features/arm/speed_flush_cache.patch + features/arm/5281d0.patch ++ features/arm/fix_cache_alignment.patch ++ features/arm/cache_align1.patch ++ features/arm/cache_align2.patch + features/arm/led-pca9532-generic.patch + features/arm/led-pca9532-fix.patch + features/arm/led-pca9532-n2100.patch