Ref:
https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=05a910f7b420c2b831f35ba90e61c80f001c0606


From 05a910f7b420c2b831f35ba90e61c80f001c0606 Mon Sep 17 00:00:00 2001
From: Wilco Dijkstra <wdijkstr@arm.com>
Date: Wed, 5 Aug 2015 15:58:15 +0100
Subject: [PATCH] Improve performance of mempcpy by inlining and using memcpy.
 Enable this for all targets except sparc which has an optimized mempcpy
 implementation.

---
 ChangeLog                   |  6 ++++++
 string/string.h             | 19 +++++++++++++++++++
 sysdeps/sparc/bits/string.h |  3 +++
 3 files changed, 28 insertions(+)

diff --git a/string/string.h b/string/string.h
index 54a4d39..3ab7103 100644
--- a/string/string.h
+++ b/string/string.h
@@ -636,6 +636,25 @@ extern char *basename (const char *__filename) __THROW __nonnull ((1));
 # endif
 #endif
 
+#if defined __USE_GNU && defined __OPTIMIZE__ \
+    && defined __extern_always_inline && __GNUC_PREREQ (3,2)
+# if !defined _FORCE_INLINES && !defined _HAVE_STRING_ARCH_mempcpy
+
+#undef mempcpy
+#undef __mempcpy
+#define mempcpy(dest, src, n) __mempcpy_inline (dest, src, n)
+#define __mempcpy(dest, src, n) __mempcpy_inline (dest, src, n)
+
+__extern_always_inline void *
+__mempcpy_inline (void *__restrict __dest,
+		  const void *__restrict __src, size_t __n)
+{
+  return (char *) memcpy (__dest, __src, __n) + __n;
+}
+
+# endif
+#endif
+
 __END_DECLS
 
 #endif /* string.h  */
diff --git a/sysdeps/sparc/bits/string.h b/sysdeps/sparc/bits/string.h
index 36fbb4c..4eb9447 100644
--- a/sysdeps/sparc/bits/string.h
+++ b/sysdeps/sparc/bits/string.h
@@ -26,3 +26,6 @@
 /* sparc32 and sparc64 strchr(x, '\0') perform better than
    __rawmemchr(x, '\0').  */
 #define _HAVE_STRING_ARCH_strchr 1
+
+/* Don't inline mempcpy into memcpy as sparc has an optimized mempcpy.  */
+#define _HAVE_STRING_ARCH_mempcpy 1
-- 
1.9.1

