diff -Naur glibc-2.11.orig/ports/sysdeps/mips/bits/atomic.h glibc-2.11/ports/sysdeps/mips/bits/atomic.h
--- glibc-2.11.orig/ports/sysdeps/mips/bits/atomic.h	2011-04-13 20:05:08.334348541 -0400
+++ glibc-2.11/ports/sysdeps/mips/bits/atomic.h	2011-04-13 20:37:54.024345224 -0400
@@ -154,25 +154,47 @@
 
 /* Compare and exchange with "acquire" semantics, ie barrier after.  */
 
+#if defined(_MIPS_ARCH_XLP)
+#define atomic_compare_and_exchange_bool_acq(mem, new, old)	\
+  __atomic_bool_bysize (__arch_compare_and_exchange_bool, int,	\
+		        mem, new, old, "", "")
+#else
 #define atomic_compare_and_exchange_bool_acq(mem, new, old)	\
   __atomic_bool_bysize (__arch_compare_and_exchange_bool, int,	\
 		        mem, new, old, "", MIPS_SYNC_STR)
+#endif
 
+#if defined(_MIPS_ARCH_XLP)	
+#define atomic_compare_and_exchange_val_acq(mem, new, old)	\
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int,	\
+		       mem, new, old, "", "")
+#else
 #define atomic_compare_and_exchange_val_acq(mem, new, old)	\
   __atomic_val_bysize (__arch_compare_and_exchange_val, int,	\
 		       mem, new, old, "", MIPS_SYNC_STR)
+#endif
 
 /* Compare and exchange with "release" semantics, ie barrier before.  */
 
+#if defined(_MIPS_ARCH_XLP)	
+#define atomic_compare_and_exchange_bool_rel(mem, new, old)	\
+  __atomic_bool_bysize (__arch_compare_and_exchange_bool, int,	\
+		        mem, new, old, "", "")
+#else
 #define atomic_compare_and_exchange_bool_rel(mem, new, old)	\
   __atomic_bool_bysize (__arch_compare_and_exchange_bool, int,	\
 		        mem, new, old, MIPS_SYNC_STR, "")
+#endif
 
+#if defined(_MIPS_ARCH_XLP)	
+#define atomic_compare_and_exchange_val_rel(mem, new, old)	\
+  __atomic_val_bysize (__arch_compare_and_exchange_val, int,	\
+		       mem, new, old, "", "")
+#else
 #define atomic_compare_and_exchange_val_rel(mem, new, old)	\
   __atomic_val_bysize (__arch_compare_and_exchange_val, int,	\
 		       mem, new, old, MIPS_SYNC_STR, "")
-
-
+#endif
 
 /* Atomic exchange (without compare).  */
 
@@ -182,6 +204,37 @@
 #define __arch_exchange_xxx_16_int(mem, newval, rel, acq) \
   (abort (), 0)
 
+#if defined(_MIPS_ARCH_XLP)
+static int __always_inline __arch_exchange_xxx_32_int(int *mem,
+		typeof (*mem) newval, const char *rel, const char *acq) {
+
+	__asm__ __volatile__
+		(".set  push\n\t"
+		 "swapw %0,%2\n\t"
+		 ".set  pop\n"
+		 : "=&r" (newval), "=m" (*mem)
+		 : "r" (mem), "0"
+		 (newval));
+
+	return newval;
+}
+#elif defined(_MIPS_ARCH_XLR)
+static int __always_inline __arch_exchange_xxx_32_int(int *mem,
+		typeof (*mem) newval, const char *rel, const char *acq) {
+
+	__asm__ __volatile__
+		(".set  push\n\t"
+		 "sync	\n\t"
+		 "swapw %0,%2\n\t"
+		 "sync	\n\t"
+		 ".set  pop\n"
+		 : "=&r" (newval), "=m" (*mem)
+		 : "r" (mem), "0"
+		 (newval));
+
+	return newval;
+}
+#else
 #define __arch_exchange_xxx_32_int(mem, newval, rel, acq) \
 ({ typeof (*mem) __prev; int __cmp;					      \
      __asm__ __volatile__ ("\n"						      \
@@ -200,12 +253,45 @@
 	      : "r" (newval), "m" (*mem)				      \
 	      : "memory");						      \
   __prev; })
+#endif
 
 #if _MIPS_SIM == _ABIO32
 /* We can't do an atomic 64-bit operation in O32.  */
 #define __arch_exchange_xxx_64_int(mem, newval, rel, acq) \
   (abort (), 0)
 #else
+
+#if defined(_MIPS_ARCH_XLP)
+static long __always_inline __arch_exchange_xxx_64_int(int *mem,
+		typeof (*mem) newval, const char *rel, const char *acq) {
+
+	__asm__ __volatile__
+		(".set  push\n\t"
+		 "swapd %0,%2\n\t"
+		 ".set  pop\n"
+		 : "=&r" (newval), "=m" (*mem)
+		 : "r" (mem), "0"
+		 (newval));
+
+	return newval;
+}
+#elif defined(_MIPS_ARCH_XLR) 
+static long __always_inline __arch_exchange_xxx_64_int(int *mem,
+		typeof (*mem) newval, const char *rel, const char *acq) {
+
+	__asm__ __volatile__
+		(".set  push\n\t"
+		 "sync	\n\t"
+		 "swapd %0,%2\n\t"
+		 "sync	\n\t"
+		 ".set  pop\n"
+		 : "=&r" (newval), "=m" (*mem)
+		 : "r" (mem), "0"
+		 (newval));
+
+	return newval;
+}
+#else
 #define __arch_exchange_xxx_64_int(mem, newval, rel, acq) \
 ({ typeof (*mem) __prev; int __cmp;					      \
      __asm__ __volatile__ ("\n"						      \
@@ -225,6 +311,7 @@
 	      : "memory");						      \
   __prev; })
 #endif
+#endif
 
 #define atomic_exchange_acq(mem, value) \
   __atomic_val_bysize (__arch_exchange_xxx, int, mem, value, "", MIPS_SYNC_STR)
@@ -241,6 +328,33 @@
 #define __arch_exchange_and_add_16_int(mem, newval, rel, acq) \
   (abort (), (typeof(*mem)) 0)
 
+#if defined (_MIPS_ARCH_XLP)
+static int __always_inline __arch_exchange_and_add_32_int(unsigned int *mem,
+		typeof (*mem) value, const char *rel, const char *acq) {
+
+	__asm__ __volatile__ (
+			".set       push\n\t"
+			"ldaddw     %0,%2\n\t"
+			".set       pop\n"
+			: "=&r" (value), "=m" (*mem)
+			: "r"(mem), "0" (value));
+	return value;
+}
+#elif defined(_MIPS_ARCH_XLR)
+static int __always_inline __arch_exchange_and_add_32_int(unsigned int *mem,
+		typeof (*mem) value, const char *rel, const char *acq) {
+
+	__asm__ __volatile__ (
+			".set       push\n\t"
+			"sync       \n\t"
+			"ldaddw     %0,%2\n\t"
+			"sync       \n\t"
+			".set       pop\n"
+			: "=&r" (value), "=m" (*mem)
+			: "r"(mem), "0" (value));
+	return value;
+}
+#else
 #define __arch_exchange_and_add_32_int(mem, value, rel, acq) \
 ({ typeof (*mem) __prev; int __cmp;					      \
      __asm__ __volatile__ ("\n"						      \
@@ -259,12 +373,40 @@
 	      : "r" (value), "m" (*mem)					      \
 	      : "memory");						      \
   __prev; })
+#endif
 
 #if _MIPS_SIM == _ABIO32
 /* We can't do an atomic 64-bit operation in O32.  */
 #define __arch_exchange_and_add_64_int(mem, value, rel, acq) \
   (abort (), (typeof(*mem)) 0)
 #else
+#if defined (_MIPS_ARCH_XLP)
+static long __always_inline __arch_exchange_and_add_64_int(unsigned int *mem,
+		typeof (*mem) value, const char *rel, const char *acq) {
+
+	__asm__ __volatile__ (
+			".set       push\n\t"
+			"ldaddd     %0,%2\n\t"
+			".set       pop\n"
+			: "=&r" (value), "=m" (*mem)
+			: "r"(mem), "0" (value));
+	return value;
+}
+#elif defined(_MIPS_ARCH_XLR)
+static long __always_inline __arch_exchange_and_add_64_int(unsigned int *mem,
+		typeof (*mem) value, const char *rel, const char *acq) {
+
+	__asm__ __volatile__ (
+			".set       push\n\t"
+			"sync       \n\t"
+			"ldaddd     %0,%2\n\t"
+			"sync       \n\t"
+			".set       pop\n"
+			: "=&r" (value), "=m" (*mem)
+			: "r"(mem), "0" (value));
+	return value;
+}
+#else
 #define __arch_exchange_and_add_64_int(mem, value, rel, acq) \
 ({ typeof (*mem) __prev; int __cmp;					      \
      __asm__ __volatile__ (						      \
@@ -284,6 +426,7 @@
 	      : "memory");						      \
   __prev; })
 #endif
+#endif
 
 /* ??? Barrier semantics for atomic_exchange_and_add appear to be 
    undefined.  Use full barrier for now, as that's safe.  */
diff -Naur glibc-2.11.orig/ports/sysdeps/mips/memset.S glibc-2.11/ports/sysdeps/mips/memset.S
--- glibc-2.11.orig/ports/sysdeps/mips/memset.S	2011-04-13 20:05:08.334348541 -0400
+++ glibc-2.11/ports/sysdeps/mips/memset.S	2011-04-13 20:37:54.024345224 -0400
@@ -1,86 +1,192 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
+/* -------------------------------------------------------------
+ * Copyright 2003-2011 Netlogic Microsystems Inc. (“Netlogic”).
+ * -------------------------------------------------------------
+ * This is a derived work from software originally provided by
+ * the external entity identified below. The licensing terms and
+ * warranties specified in the header of the original work apply
+ * to this derived work.
+ * --------------------------#NETL_1#---------------------------
+ */
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007  Maciej W. Rozycki
+ */
 
 #include <sysdep.h>
 #include <endian.h>
+#include <sys/asm.h>
 
+#if defined(__mips64)
+#define LONGSIZE    8
+#define LONGMASK    7
+#define LONGLOG     3
+#else
+#define LONGSIZE    4
+#define LONGMASK    3
+#define LONGLOG     2
+#endif
 
-/* void *memset(void *s, int c, size_t n).  */
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define SWHI	swl		/* high part is left in big-endian	*/
+#if LONGSIZE == 4
+#define LONG_S_L swl
+#define LONG_S_R swr
 #else
-# define SWHI	swr		/* high part is right in little-endian	*/
+#define LONG_S_L sdl
+#define LONG_S_R sdr
 #endif
 
-ENTRY (memset)
+/* These are from Linux */
+#define TI_TASK 0
+#define THREAD_BUADDR 1312
+
+#define EX(insn,reg,addr,handler)			\
+9:	insn	reg, addr;				\
+	.section __ex_table,"a"; 			\
+	PTR	9b, handler; 				\
+	.previous
+
+	.macro	f_fill64 dst, offset, val, fixup
+	EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup)
+#if LONGSIZE == 4
+	EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
+#endif
+	.endm
+
+/*
+ * memset(void *s, int c, size_t n)
+ *
+ * a0: start of area to clear
+ * a1: char to fill with
+ * a2: size of area to clear
+ */
 	.set	noreorder
+	.align	5
+ENTRY (memset)
+	beqz		a1, 1f
+	 move		v0, a0			/* result */
+
+	andi		a1, 0xff		/* spread fillword */
+	LONG_SLL		t1, a1, 8
+	or		a1, t1
+	LONG_SLL		t1, a1, 16
+#if LONGSIZE == 8
+	or		a1, t1
+	LONG_SLL		t1, a1, 32
+#endif
+	or		a1, t1
+1:
+	sltiu		t0, a2, LONGSIZE	/* very small region? */
+	bnez		t0, .Lsmall_memset
+	 andi		t0, a0, LONGMASK	/* aligned? */
+
+	beqz		t0, 1f
+	PTR_SUBU	t0, LONGSIZE		/* alignment in bytes */
+
+#ifdef __MIPSEB__
+	EX(LONG_S_L, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
+#endif
+#ifdef __MIPSEL__
+	EX(LONG_S_R, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
+#endif
+	PTR_SUBU	a0, t0			/* long align ptr */
+	PTR_ADDU	a2, t0			/* correct size */
 
-	slti	t1, a2, 8		# Less than 8?
-	bne	t1, zero, L(last8)
-	move	v0, a0			# Setup exit value before too late
-
-	beq	a1, zero, L(ueven)	# If zero pattern, no need to extend
-	andi	a1, 0xff		# Avoid problems with bogus arguments
-	sll	t0, a1, 8
-	or	a1, t0
-	sll	t0, a1, 16
-	or	a1, t0			# a1 is now pattern in full word
-
-L(ueven):	
-	subu	t0, zero, a0		# Unaligned address?
-	andi	t0, 0x3
-	beq	t0, zero, L(chkw)
-	subu	a2, t0
-	SWHI	a1, 0(a0)		# Yes, handle first unaligned part
-	addu	a0, t0			# Now both a0 and a2 are updated
-
-L(chkw):	
-	andi	t0, a2, 0x7		# Enough left for one loop iteration?
-	beq	t0, a2, L(chkl)
-	subu	a3, a2, t0
-	addu	a3, a0			# a3 is last loop address +1
-	move	a2, t0			# a2 is now # of bytes left after loop
-L(loopw):	
-	addiu	a0, 8			# Handle 2 words pr. iteration
-	sw	a1, -8(a0)
-	bne	a0, a3, L(loopw)
-	sw	a1, -4(a0)
-
-L(chkl):	
-	andi	t0, a2, 0x4		# Check if there is at least a full
-	beq	t0, zero, L(last8)	#  word remaining after the loop
-	subu	a2, t0
-	sw	a1, 0(a0)		# Yes...
-	addiu	a0, 4
-
-L(last8):	
-	blez	a2, L(exit)		# Handle last 8 bytes (if cnt>0)
-	addu	a3, a2, a0		# a3 is last address +1
-L(lst8l):	
-	addiu	a0, 1
-	bne	a0, a3, L(lst8l)
-	sb	a1, -1(a0)
-L(exit):	
-	j	ra			# Bye, bye
-	nop
+1:	ori		t1, a2, 0x3f		/* # of full blocks */
+	xori		t1, 0x3f
+	beqz		t1, .Lmemset_partial	/* no block to fill */
+	 andi		t0, a2, 0x40-LONGSIZE
+
+	PTR_ADDU	t1, a0			/* end address */
+	.set		reorder
+1:	PTR_ADDIU	a0, 64
+	f_fill64 a0, -64, a1, .Lfwd_fixup
+	bne		t1, a0, 1b
+	.set		noreorder
+
+.Lmemset_partial:
+	PTR_LA		t1, 2f			/* where to start */
+#if LONGSIZE == 4
+	PTR_SUBU	t1, t0
+#else
+	.set		noat
+	LONG_SRL		AT, t0, 1
+	PTR_SUBU	t1, AT
+	.set		at
+#endif
+	jr		t1
+	 PTR_ADDU	a0, t0			/* dest ptr */
 
-	.set	reorder
-END (memset)
+	.set		push
+	.set		noreorder
+	.set		nomacro
+	f_fill64 a0, -64, a1, .Lpartial_fixup	/* ... but first do longs ... */
+2:	.set		pop
+	andi		a2, LONGMASK		/* At most one long to go */
+
+	beqz		a2, 1f
+	 PTR_ADDU	a0, a2			/* What's left */
+#ifdef __MIPSEB__
+	EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
+#endif
+#ifdef __MIPSEL__
+	EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
+#endif
+1:	jr		ra
+	 move		a2, zero
+
+.Lsmall_memset:
+	beqz		a2, 2f
+	 PTR_ADDU	t1, a0, a2
+
+1:	PTR_ADDIU	a0, 1			/* fill bytewise */
+	bne		t1, a0, 1b
+	 sb		a1, -1(a0)
+
+2:	jr		ra			/* done */
+	 move		a2, zero
+
+END(memset)
 libc_hidden_builtin_def (memset)
+
+.Lfirst_fixup:
+	jr	ra
+	 nop
+
+.Lfwd_fixup:
+	PTR_L		t0, TI_TASK($28)
+	LONG_L		t0, THREAD_BUADDR(t0)
+	andi		a2, 0x3f
+	LONG_ADDU	a2, t1
+	jr		ra
+	 LONG_SUBU	a2, t0
+
+.Lpartial_fixup:
+	PTR_L		t0, TI_TASK($28)
+	LONG_L		t0, THREAD_BUADDR(t0)
+	andi		a2, LONGMASK
+	LONG_ADDU	a2, t1
+	jr		ra
+	 LONG_SUBU	a2, t0
+
+.Llast_fixup:
+	jr		ra
+	 andi		v1, a2, LONGMASK
diff -Naur glibc-2.11.orig/ports/sysdeps/mips/mips64/memset.S glibc-2.11/ports/sysdeps/mips/mips64/memset.S
--- glibc-2.11.orig/ports/sysdeps/mips/mips64/memset.S	2011-04-13 20:05:08.334348541 -0400
+++ glibc-2.11/ports/sysdeps/mips/mips64/memset.S	2011-04-13 20:37:54.024345224 -0400
@@ -1,92 +1,192 @@
-/* Copyright (C) 2002, 2003 Free Software Foundation, Inc.
-   This file is part of the GNU C Library.
-   Contributed by Hartvig Ekner <hartvige@mips.com>, 2002.
-   Ported to mips3 n32/n64 by Alexandre Oliva <aoliva@redhat.com>
-
-   The GNU C Library is free software; you can redistribute it and/or
-   modify it under the terms of the GNU Lesser General Public
-   License as published by the Free Software Foundation; either
-   version 2.1 of the License, or (at your option) any later version.
-
-   The GNU C Library is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   Lesser General Public License for more details.
-
-   You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
-   02111-1307 USA.  */
+/* -------------------------------------------------------------
+ * Copyright 2003-2011 Netlogic Microsystems Inc. (“Netlogic”).
+ * -------------------------------------------------------------
+ * This is a derived work from software originally provided by
+ * the external entity identified below. The licensing terms and
+ * warranties specified in the header of the original work apply
+ * to this derived work.
+ * --------------------------#NETL_1#---------------------------
+ */
+
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 1998, 1999, 2000 by Ralf Baechle
+ * Copyright (C) 1999, 2000 Silicon Graphics, Inc.
+ * Copyright (C) 2007  Maciej W. Rozycki
+ */
 
 #include <sysdep.h>
 #include <endian.h>
 #include <sys/asm.h>
 
+#if defined(__mips64)
+#define LONGSIZE    8
+#define LONGMASK    7
+#define LONGLOG     3
+#else
+#define LONGSIZE    4
+#define LONGMASK    3
+#define LONGLOG     2
+#endif
 
-/* void *memset(void *s, int c, size_t n);
-	
-   This could probably be optimized further.  */
-
-#if __BYTE_ORDER == __BIG_ENDIAN
-# define SDHI	sdl		/* high part is left in big-endian	*/
+#if LONGSIZE == 4
+#define LONG_S_L swl
+#define LONG_S_R swr
 #else
-# define SDHI	sdr		/* high part is right in little-endian	*/
+#define LONG_S_L sdl
+#define LONG_S_R sdr
 #endif
 
-ENTRY (memset)
+/* These are from Linux */
+#define TI_TASK 0
+#define THREAD_BUADDR 1312
+
+#define EX(insn,reg,addr,handler)			\
+9:	insn	reg, addr;				\
+	.section __ex_table,"a"; 			\
+	PTR	9b, handler; 				\
+	.previous
+
+	.macro	f_fill64 dst, offset, val, fixup
+	EX(LONG_S, \val, (\offset +  0 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  1 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  2 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  3 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  4 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  5 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  6 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  7 * LONGSIZE)(\dst), \fixup)
+#if LONGSIZE == 4
+	EX(LONG_S, \val, (\offset +  8 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset +  9 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 10 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 11 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 12 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 13 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 14 * LONGSIZE)(\dst), \fixup)
+	EX(LONG_S, \val, (\offset + 15 * LONGSIZE)(\dst), \fixup)
+#endif
+	.endm
+
+/*
+ * memset(void *s, int c, size_t n)
+ *
+ * a0: start of area to clear
+ * a1: char to fill with
+ * a2: size of area to clear
+ */
 	.set	noreorder
+	.align	5
+ENTRY (memset)
+	beqz		a1, 1f
+	 move		v0, a0			/* result */
+
+	andi		a1, 0xff		/* spread fillword */
+	LONG_SLL		t1, a1, 8
+	or		a1, t1
+	LONG_SLL		t1, a1, 16
+#if LONGSIZE == 8
+	or		a1, t1
+	LONG_SLL		t1, a1, 32
+#endif
+	or		a1, t1
+1:
+	sltiu		t0, a2, LONGSIZE	/* very small region? */
+	bnez		t0, .Lsmall_memset
+	 andi		t0, a0, LONGMASK	/* aligned? */
+
+	beqz		t0, 1f
+	PTR_SUBU	t0, LONGSIZE		/* alignment in bytes */
+
+#ifdef __MIPSEB__
+	EX(LONG_S_L, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
+#endif
+#ifdef __MIPSEL__
+	EX(LONG_S_R, a1, (a0), .Lfirst_fixup)	/* make word/dword aligned */
+#endif
+	PTR_SUBU	a0, t0			/* long align ptr */
+	PTR_ADDU	a2, t0			/* correct size */
 
-	slti	ta1, a2, 16		# Less than 16?
-	bne	ta1, zero, L(last16)
-	move	v0, a0			# Setup exit value before too late
-
-	beq	a1, zero, L(ueven)	# If zero pattern, no need to extend
-	andi	a1, 0xff		# Avoid problems with bogus arguments
-	dsll	ta0, a1, 8
-	or	a1, ta0
-	dsll	ta0, a1, 16
-	or	a1, ta0			# a1 is now pattern in full word
-	dsll	ta0, a1, 32
-	or	a1, ta0			# a1 is now pattern in double word
-
-L(ueven):
-	PTR_SUBU ta0, zero, a0		# Unaligned address?
-	andi	ta0, 0x7
-	beq	ta0, zero, L(chkw)
-	PTR_SUBU a2, ta0
-	SDHI	a1, 0(a0)		# Yes, handle first unaligned part
-	PTR_ADDU a0, ta0		# Now both a0 and a2 are updated
-
-L(chkw):
-	andi	ta0, a2, 0xf		# Enough left for one loop iteration?
-	beq	ta0, a2, L(chkl)
-	PTR_SUBU a3, a2, ta0
-	PTR_ADDU a3, a0			# a3 is last loop address +1
-	move	a2, ta0			# a2 is now # of bytes left after loop
-L(loopw):
-	PTR_ADDIU a0, 16		# Handle 2 dwords pr. iteration
-	sd	a1, -16(a0)
-	bne	a0, a3, L(loopw)
-	sd	a1,  -8(a0)
-
-L(chkl):
-	andi	ta0, a2, 0x8		# Check if there is at least a double
-	beq	ta0, zero, L(last16)	#  word remaining after the loop
-	PTR_SUBU a2, ta0
-	sd	a1, 0(a0)		# Yes...
-	PTR_ADDIU a0, 8
-
-L(last16):
-	blez	a2, L(exit)		# Handle last 16 bytes (if cnt>0)
-	PTR_ADDU a3, a2, a0		# a3 is last address +1
-L(lst16l):
-	PTR_ADDIU a0, 1
-	bne	a0, a3, L(lst16l)
-	sb	a1, -1(a0)
-L(exit):
-	j	ra			# Bye, bye
-	nop
+1:	ori		t1, a2, 0x3f		/* # of full blocks */
+	xori		t1, 0x3f
+	beqz		t1, .Lmemset_partial	/* no block to fill */
+	 andi		t0, a2, 0x40-LONGSIZE
+
+	PTR_ADDU	t1, a0			/* end address */
+	.set		reorder
+1:	PTR_ADDIU	a0, 64
+	f_fill64 a0, -64, a1, .Lfwd_fixup
+	bne		t1, a0, 1b
+	.set		noreorder
+
+.Lmemset_partial:
+	PTR_LA		t1, 2f			/* where to start */
+#if LONGSIZE == 4
+	PTR_SUBU	t1, t0
+#else
+	.set		noat
+	LONG_SRL		AT, t0, 1
+	PTR_SUBU	t1, AT
+	.set		at
+#endif
+	jr		t1
+	 PTR_ADDU	a0, t0			/* dest ptr */
 
-	.set	reorder
-END (memset)
+	.set		push
+	.set		noreorder
+	.set		nomacro
+	f_fill64 a0, -64, a1, .Lpartial_fixup	/* ... but first do longs ... */
+2:	.set		pop
+	andi		a2, LONGMASK		/* At most one long to go */
+
+	beqz		a2, 1f
+	 PTR_ADDU	a0, a2			/* What's left */
+#ifdef __MIPSEB__
+	EX(LONG_S_R, a1, -1(a0), .Llast_fixup)
+#endif
+#ifdef __MIPSEL__
+	EX(LONG_S_L, a1, -1(a0), .Llast_fixup)
+#endif
+1:	jr		ra
+	 move		a2, zero
+
+.Lsmall_memset:
+	beqz		a2, 2f
+	 PTR_ADDU	t1, a0, a2
+
+1:	PTR_ADDIU	a0, 1			/* fill bytewise */
+	bne		t1, a0, 1b
+	 sb		a1, -1(a0)
+
+2:	jr		ra			/* done */
+	 move		a2, zero
+
+END(memset)
 libc_hidden_builtin_def (memset)
+
+.Lfirst_fixup:
+	jr	ra
+	 nop
+
+.Lfwd_fixup:
+	PTR_L		t0, TI_TASK($28)
+	LONG_L		t0, THREAD_BUADDR(t0)
+	andi		a2, 0x3f
+	LONG_ADDU	a2, t1
+	jr		ra
+	 LONG_SUBU	a2, t0
+
+.Lpartial_fixup:
+	PTR_L		t0, TI_TASK($28)
+	LONG_L		t0, THREAD_BUADDR(t0)
+	andi		a2, LONGMASK
+	LONG_ADDU	a2, t1
+	jr		ra
+	 LONG_SUBU	a2, t0
+
+.Llast_fixup:
+	jr		ra
+	 andi		v1, a2, LONGMASK
diff -Naur glibc-2.11.orig/ports/sysdeps/mips/nptl/pthread_spin_lock.S glibc-2.11/ports/sysdeps/mips/nptl/pthread_spin_lock.S
--- glibc-2.11.orig/ports/sysdeps/mips/nptl/pthread_spin_lock.S	2011-04-13 20:05:08.334348541 -0400
+++ glibc-2.11/ports/sysdeps/mips/nptl/pthread_spin_lock.S	2011-04-13 20:37:54.024345224 -0400
@@ -21,6 +21,31 @@
 #include <sgidefs.h>
 
 ENTRY (pthread_spin_lock)
+
+#if defined(_MIPS_ARCH_XLP)
+	.set    push
+	.set    noreorder
+	li      a1, 1
+1:  swapw   a1, a0
+	bnez    a1, 1b
+	li      a1, 1
+	.set    reorder
+	.set    pop
+	li      v0, 0
+	ret
+#elif defined(_MIPS_ARCH_XLR)
+	.set    push
+	.set    noreorder
+	li      a1, 1
+1:  swapw   a1, a0
+	bnez    a1, 1b
+	li      a1, 1
+	MIPS_SYNC
+	.set    reorder
+	.set    pop
+	li      v0, 0
+	ret
+#else
 	.set	push
 #if _MIPS_SIM == _ABIO32
 	.set	mips2
@@ -34,4 +59,6 @@
 	.set	pop
 	li	v0, 0
 	ret
+#endif
+
 PSEUDO_END (pthread_spin_lock)
diff -Naur glibc-2.11.orig/ports/sysdeps/mips/nptl/pthread_spin_trylock.S glibc-2.11/ports/sysdeps/mips/nptl/pthread_spin_trylock.S
--- glibc-2.11.orig/ports/sysdeps/mips/nptl/pthread_spin_trylock.S	2011-04-13 20:05:08.334348541 -0400
+++ glibc-2.11/ports/sysdeps/mips/nptl/pthread_spin_trylock.S	2011-04-13 20:37:54.024345224 -0400
@@ -23,6 +23,33 @@
 #include <sgidefs.h>
 
 ENTRY (pthread_spin_trylock)
+
+#if defined(_MIPS_ARCH_XLP)
+	.set    push
+	.set    noreorder
+	li      a1, 1
+	swapw   a1, a0
+	bnez    a1, 1f
+	.set    reorder
+	.set    pop
+	li      v0, 0
+	ret
+1:  li      v0, EBUSY
+	ret
+#elif defined(_MIPS_ARCH_XLR)
+	.set    push
+	.set    noreorder
+	li      a1, 1
+	swapw   a1, a0
+	bnez    a1, 1f
+	MIPS_SYNC
+	.set    reorder
+	.set    pop
+	li      v0, 0
+	ret
+1:  li      v0, EBUSY
+	ret
+#else
 	.set	push
 #if _MIPS_SIM == _ABIO32
 	.set	mips2
@@ -38,4 +65,5 @@
 	ret
 1:	li	v0, EBUSY
 	ret
+#endif
 PSEUDO_END (pthread_spin_trylock)
diff -Naur glibc-2.11.orig/ports/sysdeps/unix/sysv/linux/mips/sys/tas.h glibc-2.11/ports/sysdeps/unix/sysv/linux/mips/sys/tas.h
--- glibc-2.11.orig/ports/sysdeps/unix/sysv/linux/mips/sys/tas.h	2011-04-13 20:05:08.274349865 -0400
+++ glibc-2.11/ports/sysdeps/unix/sysv/linux/mips/sys/tas.h	2011-04-13 20:37:54.024345224 -0400
@@ -37,6 +37,30 @@
 _EXTERN_INLINE int
 __NTH (_test_and_set (int *__p, int __v))
 {
+#if defined(_MIPS_ARCH_XLP)
+	int newval = __v;
+	__asm__ __volatile__
+		("/* Inline test and set */\n"
+		 ".set  push\n\t"
+		 "swapw %0,%2\n\t"
+		 ".set  pop\n\t"
+		 : "=&r" (__v), "=m" (*__p)
+		 : "r" (__p), "0" (__v));
+	return (__v == newval);
+#elif defined (_MIPS_ARCH_XLR)
+	int newval = __v;
+	__asm__ __volatile__
+		("/* Inline test and set */\n"
+		 ".set  push\n\t"
+     	 "sync\n\t"
+		 "swapw %0,%2\n\t"
+     	 "sync\n\t"
+		 ".set  pop\n\t"
+		 : "=&r" (__v), "=m" (*__p)
+		 : "r" (__p), "0" (__v));
+	return (__v == newval);
+#else
+  /* Default scenario */
   int __r, __t;
 
   __asm__ __volatile__
@@ -61,6 +85,7 @@
      : "memory");
 
   return __r;
+#endif
 }
 
 #endif /* __USE_EXTERN_INLINES */
diff -Naur glibc-2.11.orig/string/strstr.c glibc-2.11/string/strstr.c
--- glibc-2.11.orig/string/strstr.c	2011-04-13 20:05:08.044398202 -0400
+++ glibc-2.11/string/strstr.c	2011-04-13 20:26:19.184347926 -0400
@@ -1,6 +1,5 @@
 /* Return the offset of one string within another.
-   Copyright (C) 1994,1996,1997,2000,2001,2003,2008,2009
-   Free Software Foundation, Inc.
+   Copyright (C) 1994,1996,1997,2000,2001,2003 Free Software Foundation, Inc.
    This file is part of the GNU C Library.
 
    The GNU C Library is free software; you can redistribute it and/or
@@ -18,75 +17,107 @@
    Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
    02111-1307 USA.  */
 
-/* This particular implementation was written by Eric Blake, 2008.  */
+/*
+ * My personal strstr() implementation that beats most other algorithms.
+ * Until someone tells me otherwise, I assume that this is the
+ * fastest implementation of strstr() in C.
+ * I deliberately chose not to comment it.  You should have at least
+ * as much fun trying to understand it, as I had to write it :-).
+ *
+ * Stephen R. van den Berg, berg@pool.informatik.rwth-aachen.de	*/
 
-#ifndef _LIBC
+#if HAVE_CONFIG_H
 # include <config.h>
 #endif
 
-/* Specification of strstr.  */
-#include <string.h>
-
-#include <stdbool.h>
-
-#ifndef _LIBC
-# define __builtin_expect(expr, val)   (expr)
+#if defined _LIBC || defined HAVE_STRING_H
+# include <string.h>
 #endif
 
-#define RETURN_TYPE char *
-#define AVAILABLE(h, h_l, j, n_l)			\
-  (!memchr ((h) + (h_l), '\0', (j) + (n_l) - (h_l))	\
-   && ((h_l) = (j) + (n_l)))
-#include "str-two-way.h"
+typedef unsigned chartype;
 
 #undef strstr
 
-#ifndef STRSTR
-#define STRSTR strstr
-#endif
-
-/* Return the first occurrence of NEEDLE in HAYSTACK.  Return HAYSTACK
-   if NEEDLE is empty, otherwise NULL if NEEDLE is not found in
-   HAYSTACK.  */
 char *
-STRSTR (const char *haystack_start, const char *needle_start)
+strstr (phaystack, pneedle)
+     const char *phaystack;
+     const char *pneedle;
 {
-  const char *haystack = haystack_start;
-  const char *needle = needle_start;
-  size_t needle_len; /* Length of NEEDLE.  */
-  size_t haystack_len; /* Known minimum length of HAYSTACK.  */
-  bool ok = true; /* True if NEEDLE is prefix of HAYSTACK.  */
-
-  /* Determine length of NEEDLE, and in the process, make sure
-     HAYSTACK is at least as long (no point processing all of a long
-     NEEDLE if HAYSTACK is too short).  */
-  while (*haystack && *needle)
-    ok &= *haystack++ == *needle++;
-  if (*needle)
-    return NULL;
-  if (ok)
-    return (char *) haystack_start;
-
-  /* Reduce the size of haystack using strchr, since it has a smaller
-     linear coefficient than the Two-Way algorithm.  */
-  needle_len = needle - needle_start;
-  haystack = strchr (haystack_start + 1, *needle_start);
-  if (!haystack || __builtin_expect (needle_len == 1, 0))
-    return (char *) haystack;
-  needle -= needle_len;
-  haystack_len = (haystack > haystack_start + needle_len ? 1
-		  : needle_len + haystack_start - haystack);
-
-  /* Perform the search.  Abstract memory is considered to be an array
-     of 'unsigned char' values, not an array of 'char' values.  See
-     ISO C 99 section 6.2.6.1.  */
-  if (needle_len < LONG_NEEDLE_THRESHOLD)
-    return two_way_short_needle ((const unsigned char *) haystack,
-				 haystack_len,
-				 (const unsigned char *) needle, needle_len);
-  return two_way_long_needle ((const unsigned char *) haystack, haystack_len,
-			      (const unsigned char *) needle, needle_len);
+  const unsigned char *haystack, *needle;
+  chartype b;
+  const unsigned char *rneedle;
+
+  haystack = (const unsigned char *) phaystack;
+
+  if ((b = *(needle = (const unsigned char *) pneedle)))
+    {
+      chartype c;
+      haystack--;		/* possible ANSI violation */
+
+      {
+	chartype a;
+	do
+	  if (!(a = *++haystack))
+	    goto ret0;
+	while (a != b);
+      }
+
+      if (!(c = *++needle))
+	goto foundneedle;
+      ++needle;
+      goto jin;
+
+      for (;;)
+	{
+	  {
+	    chartype a;
+	    if (0)
+	    jin:{
+		if ((a = *++haystack) == c)
+		  goto crest;
+	      }
+	    else
+	      a = *++haystack;
+	    do
+	      {
+		for (; a != b; a = *++haystack)
+		  {
+		    if (!a)
+		      goto ret0;
+		    if ((a = *++haystack) == b)
+		      break;
+		    if (!a)
+		      goto ret0;
+		  }
+	      }
+	    while ((a = *++haystack) != c);
+	  }
+	crest:
+	  {
+	    chartype a;
+	    {
+	      const unsigned char *rhaystack;
+	      if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle)))
+		do
+		  {
+		    if (!a)
+		      goto foundneedle;
+		    if (*++rhaystack != (a = *++needle))
+		      break;
+		    if (!a)
+		      goto foundneedle;
+		  }
+		while (*++rhaystack == (a = *++needle));
+	      needle = rneedle;	/* took the register-poor aproach */
+	    }
+	    if (!a)
+	      break;
+	  }
+	}
+    }
+foundneedle:
+  return (char *) haystack;
+ret0:
+  return 0;
 }
 libc_hidden_builtin_def (strstr)
-
-#undef LONG_NEEDLE_THRESHOLD
