89 lines
3.1 KiB
Diff
89 lines
3.1 KiB
Diff
From: Nicolas Pitre <nico@cam.org>
|
|
Date: Fri, 22 May 2009 02:17:17 +0000 (-0400)
|
|
Subject: [ARM] lower overhead with alternative copy_to_user for small copies
|
|
X-Git-Url: http://git.marvell.com/?p=orion.git;a=commitdiff_plain;h=cb9dc92c0a1b76165c8c334402e27191084b2047
|
|
|
|
[ARM] lower overhead with alternative copy_to_user for small copies
|
|
|
|
Because the alternate copy_to_user implementation has a higher setup cost
|
|
than the standard implementation, the size of the memory area to copy
|
|
is tested and the standard implementation invoked instead when that size
|
|
is too small. Still, that test is made after the processor has preserved
|
|
a bunch of registers on the stack which have to be reloaded right away
|
|
needlessly in that case, causing a measurable performance regression
|
|
compared to plain usage of the standard implementation only.
|
|
|
|
To make the size test overhead negligible, let's factorize it out of
|
|
the alternate copy_to_user function where it is clear to the compiler
|
|
that no stack frame is needed. Thanks to CONFIG_ARM_UNWIND allowing
|
|
for frame pointers to be disabled and tail call optimization to kick in,
|
|
the overhead in the small copy case becomes only 3 assembly instructions.
|
|
|
|
A similar trick is applied to clear_user as well.
|
|
|
|
Signed-off-by: Nicolas Pitre <nico@marvell.com>
|
|
---
|
|
|
|
diff --git a/arch/arm/lib/uaccess_with_memcpy.c b/arch/arm/lib/uaccess_with_memcpy.c
|
|
index bf987b4..92838e7 100644
|
|
--- a/arch/arm/lib/uaccess_with_memcpy.c
|
|
+++ b/arch/arm/lib/uaccess_with_memcpy.c
|
|
@@ -49,14 +49,11 @@ pin_page_for_write(const void __user *_addr, pte_t **ptep, spinlock_t **ptlp)
|
|
return 1;
|
|
}
|
|
|
|
-unsigned long
|
|
-__copy_to_user(void __user *to, const void *from, unsigned long n)
|
|
+static unsigned long noinline
|
|
+__copy_to_user_memcpy(void __user *to, const void *from, unsigned long n)
|
|
{
|
|
int atomic;
|
|
|
|
- if (n < 1024)
|
|
- return __copy_to_user_std(to, from, n);
|
|
-
|
|
if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
|
|
memcpy((void *)to, from, n);
|
|
return 0;
|
|
@@ -99,11 +96,24 @@ out:
|
|
return n;
|
|
}
|
|
|
|
-unsigned long __clear_user(void __user *addr, unsigned long n)
|
|
+unsigned long
|
|
+__copy_to_user(void __user *to, const void *from, unsigned long n)
|
|
+{
|
|
+ /*
|
|
+ * This test is stubbed out of the main function above to keep
|
|
+ * the overhead for small copies low by avoiding a large
|
|
+ * register dump on the stack just to reload them right away.
|
|
+ * With frame pointer disabled, tail call optimization kicks in
|
|
+ * as well making this test almost invisible.
|
|
+ */
|
|
+ if (n < 1024)
|
|
+ return __copy_to_user_std(to, from, n);
|
|
+ return __copy_to_user_memcpy(to, from, n);
|
|
+}
|
|
+
|
|
+static unsigned long noinline
|
|
+__clear_user_memset(void __user *addr, unsigned long n)
|
|
{
|
|
- if (n < 256)
|
|
- return __clear_user_std(addr, n);
|
|
-
|
|
if (unlikely(segment_eq(get_fs(), KERNEL_DS))) {
|
|
memset((void *)addr, 0, n);
|
|
return 0;
|
|
@@ -137,3 +147,11 @@ unsigned long __clear_user(void __user *addr, unsigned long n)
|
|
out:
|
|
return n;
|
|
}
|
|
+
|
|
+unsigned long __clear_user(void __user *addr, unsigned long n)
|
|
+{
|
|
+ /* See rational for this in __copy_to_user() above. */
|
|
+ if (n < 256)
|
|
+ return __clear_user_std(addr, n);
|
|
+ return __clear_user_memset(addr, n);
|
|
+}
|