libc: more math functions and string functions

remove -lc and -lm from nanohub compilation and add -nostdlib

Bug: 29003259
Change-Id: If73a539ddec303b61d36113a483f42bf5b2d7cf2
diff --git a/lib/libc/acle-compat.h b/lib/libc/acle-compat.h
new file mode 100644
index 0000000..888ae2e
--- /dev/null
+++ b/lib/libc/acle-compat.h
@@ -0,0 +1,182 @@
+/*
+ * Copyright (c) 2014 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef __ARM_ARCH
+
+/* ACLE standardises a set of pre-defines that describe the ARM architecture.
+   These were mostly implemented in GCC around GCC-4.8; older versions
+   have no, or only partial support.  To provide a level of backwards
+   compatibility we try to work out what the definitions should be, given
+   the older pre-defines that GCC did produce.  This isn't complete, but
+   it should be enough for use by routines that depend on this header.  */
+
+/* No need to handle ARMv8, GCC had ACLE support before that.  */
+
+# ifdef __ARM_ARCH_7__
+/* The common subset of ARMv7 in all profiles.  */
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_LDREX 7
+#  define __ARM_FEATURE_UNALIGNED
+# endif
+
+# if defined (__ARM_ARCH_7A__) || defined (__ARM_ARCH_7R__)
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 15
+#  define __ARM_FEATURE_UNALIGNED
+#  ifdef __ARM_ARCH_7A__
+#   define __ARM_ARCH_PROFILE 'A'
+#  else
+#   define __ARM_ARCH_PROFILE 'R'
+#  endif
+# endif
+
+# ifdef __ARM_ARCH_7EM__
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 7
+#  define __ARM_FEATURE_UNALIGNED
+#  define __ARM_ARCH_PROFILE 'M'
+# endif
+
+# ifdef __ARM_ARCH_7M__
+#  define __ARM_ARCH 7
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 7
+#  define __ARM_FEATURE_UNALIGNED
+#  define __ARM_ARCH_PROFILE 'M'
+# endif
+
+# ifdef __ARM_ARCH_6T2__
+#  define __ARM_ARCH 6
+#  define __ARM_ARCH_ISA_THUMB 2
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_LDREX 4
+#  define __ARM_FEATURE_UNALIGNED
+# endif
+
+# ifdef __ARM_ARCH_6M__
+#  define __ARM_ARCH 6
+#  define __ARM_ARCH_ISA_THUMB 1
+#  define __ARM_ARCH_PROFILE 'M'
+# endif
+
+# if defined (__ARM_ARCH_6__) || defined (__ARM_ARCH_6J__) \
+  || defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6Z__) \
+  || defined (__ARM_ARCH_6ZK__)
+#  define __ARM_ARCH 6
+#  define __ARM_ARCH_ISA_THUMB 1
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_SIMD32
+#  define __ARM_FEATURE_DSP
+#  define __ARM_FEATURE_QBIT
+#  define __ARM_FEATURE_SAT
+#  define __ARM_FEATURE_UNALIGNED
+#  ifndef __thumb__
+#   if defined (__ARM_ARCH_6K__) || defined (__ARM_ARCH_6ZK__)
+#    define __ARM_FEATURE_LDREX 15
+#   else
+#    define __ARM_FEATURE_LDREX 4
+#   endif
+#  endif
+# endif
+
+# if defined (__ARM_ARCH_5TE__) || defined (__ARM_ARCH_5E__)
+#  define __ARM_ARCH 5
+#  define __ARM_ARCH_ISA_ARM
+#  ifdef __ARM_ARCH_5TE__
+#   define __ARM_ARCH_ISA_THUMB 1
+#  endif
+#  define __ARM_FEATURE_CLZ
+#  define __ARM_FEATURE_DSP
+# endif
+
+# if defined (__ARM_ARCH_5T__) || defined (__ARM_ARCH_5__)
+#  define __ARM_ARCH 5
+#  define __ARM_ARCH_ISA_ARM
+#  ifdef __ARM_ARCH_5TE__
+#   define __ARM_ARCH_ISA_THUMB 1
+#  endif
+#  define __ARM_FEATURE_CLZ
+# endif
+
+# ifdef __ARM_ARCH_4T__
+#  define __ARM_ARCH 4
+#  define __ARM_ARCH_ISA_ARM
+#  define __ARM_ARCH_ISA_THUMB 1
+# endif
+
+# ifdef __ARM_ARCH_4__
+#  define __ARM_ARCH 4
+#  define __ARM_ARCH_ISA_ARM
+# endif
+
+# if defined (__ARM_ARCH_3__) || defined (__ARM_ARCH_3M__)
+#  define __ARM_ARCH 3
+#  define __ARM_ARCH_ISA_ARM
+# endif
+
+# ifdef __ARM_ARCH_2__
+#  define __ARM_ARCH 2
+#  define __ARM_ARCH_ISA_ARM
+# endif
+
+# ifdef __ARMEB__
+#  define __ARM_BIG_ENDIAN
+# endif
+
+/* If we still don't know what the target architecture is, then we're
+   probably not using GCC.  */
+# ifndef __ARM_ARCH
+#  error Unable to determine architecture version.
+# endif
+
+#endif /* __ARM_ARCH  */
+
diff --git a/lib/libc/arm_asm.h b/lib/libc/arm_asm.h
new file mode 100644
index 0000000..1bb5edb
--- /dev/null
+++ b/lib/libc/arm_asm.h
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2009 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef ARM_ASM__H
+#define ARM_ASM__H
+
+#include "acle-compat.h"
+
+#if __ARM_ARCH >= 7 && defined (__ARM_ARCH_ISA_ARM)
+# define _ISA_ARM_7
+#endif
+
+#if __ARM_ARCH >= 6 && defined (__ARM_ARCH_ISA_ARM)
+# define _ISA_ARM_6
+#endif
+
+#if __ARM_ARCH >= 5
+# define _ISA_ARM_5
+#endif
+
+#if __ARM_ARCH >= 4 && __ARM_ARCH_ISA_THUMB >= 1
+# define _ISA_ARM_4T
+#endif
+
+#if __ARM_ARCH >= 4 && __ARM_ARCH_ISA_THUMB == 0
+# define _ISA_ARM_4
+#endif
+
+
+#if __ARM_ARCH_ISA_THUMB >= 2
+# define _ISA_THUMB_2
+#endif
+
+#if __ARM_ARCH_ISA_THUMB >= 1
+# define _ISA_THUMB_1
+#endif
+
+
+/* Now some macros for common instruction sequences.  */
+#ifdef __ASSEMBLER__
+.macro  RETURN     cond=
+#if defined (_ISA_ARM_4T) || defined (_ISA_THUMB_1)
+	bx\cond	lr
+#else
+	mov\cond pc, lr
+#endif
+.endm
+
+.macro optpld	base, offset=#0
+#if defined (_ISA_ARM_7)
+	pld	[\base, \offset]
+#endif
+.endm
+
+#else
+asm(".macro  RETURN	cond=\n\t"
+#if defined (_ISA_ARM_4T) || defined (_ISA_THUMB_1)
+    "bx\\cond	lr\n\t"
+#else
+    "mov\\cond	pc, lr\n\t"
+#endif
+    ".endm"
+    );
+
+asm(".macro optpld	base, offset=#0\n\t"
+#if defined (_ISA_ARM_7)
+    "pld	[\\base, \\offset]\n\t"
+#endif
+    ".endm"
+    );
+#endif
+
+#endif /* ARM_ASM__H */
diff --git a/lib/libc/bcopy.c b/lib/libc/bcopy.c
new file mode 100644
index 0000000..854e964
--- /dev/null
+++ b/lib/libc/bcopy.c
@@ -0,0 +1,135 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stdint.h>
+#include <sys/types.h>
+
+/*
+ * sizeof(word) MUST BE A POWER OF TWO
+ * SO THAT wmask BELOW IS ALL ONES
+ */
+typedef	int word;		/* "word" used for optimal copy speed */
+
+#define	wsize	sizeof(word)
+#define	wmask	(wsize - 1)
+
+/*
+ * Copy a block of memory, handling overlap.
+ * This is the routine that actually implements
+ * (the portable versions of) bcopy, memcpy, and memmove.
+ */
+#if defined(MEMCOPY) || defined(MEMMOVE)
+#include <string.h>
+
+void *
+#ifdef MEMCOPY
+memcpy
+#else
+memmove
+#endif
+(void *dst0, const void *src0, size_t length)
+#else
+#include <strings.h>
+
+void
+bcopy(const void *src0, void *dst0, size_t length)
+#endif
+{
+	char *dst = dst0;
+	const char *src = src0;
+	size_t t;
+
+	if (length == 0 || dst == src)		/* nothing to do */
+		goto done;
+
+	/*
+	 * Macros: loop-t-times; and loop-t-times, t>0
+	 */
+#define	TLOOP(s) if (t) TLOOP1(s)
+#define	TLOOP1(s) do { s; } while (--t)
+
+	if ((unsigned long)dst < (unsigned long)src) {
+		/*
+		 * Copy forward.
+		 */
+		t = (uintptr_t)src;	/* only need low bits */
+		if ((t | (uintptr_t)dst) & wmask) {
+			/*
+			 * Try to align operands.  This cannot be done
+			 * unless the low bits match.
+			 */
+			if ((t ^ (uintptr_t)dst) & wmask || length < wsize)
+				t = length;
+			else
+				t = wsize - (t & wmask);
+			length -= t;
+			TLOOP1(*dst++ = *src++);
+		}
+		/*
+		 * Copy whole words, then mop up any trailing bytes.
+		 */
+		t = length / wsize;
+		TLOOP(*(word *)dst = *(word *)src; src += wsize; dst += wsize);
+		t = length & wmask;
+		TLOOP(*dst++ = *src++);
+	} else {
+		/*
+		 * Copy backwards.  Otherwise essentially the same.
+		 * Alignment works as before, except that it takes
+		 * (t&wmask) bytes to align, not wsize-(t&wmask).
+		 */
+		src += length;
+		dst += length;
+		t = (uintptr_t)src;
+		if ((t | (uintptr_t)dst) & wmask) {
+			if ((t ^ (uintptr_t)dst) & wmask || length <= wsize)
+				t = length;
+			else
+				t &= wmask;
+			length -= t;
+			TLOOP1(*--dst = *--src);
+		}
+		t = length / wsize;
+		TLOOP(src -= wsize; dst -= wsize; *(word *)dst = *(word *)src);
+		t = length & wmask;
+		TLOOP(*--dst = *--src);
+	}
+done:
+#if defined(MEMCOPY) || defined(MEMMOVE)
+	return (dst0);
+#else
+	return;
+#endif
+}
diff --git a/lib/libc/memcmp.c b/lib/libc/memcmp.c
new file mode 100644
index 0000000..9e6a575
--- /dev/null
+++ b/lib/libc/memcmp.c
@@ -0,0 +1,53 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <string.h>
+
+/*
+ * Compare memory regions.
+ */
+int
+memcmp(const void *s1, const void *s2, size_t n)
+{
+	if (n != 0) {
+		const unsigned char *p1 = s1, *p2 = s2;
+
+		do {
+			if (*p1++ != *p2++)
+				return (*--p1 - *--p2);
+		} while (--n != 0);
+	}
+	return (0);
+}
diff --git a/lib/libc/memcpy-armv7m.S b/lib/libc/memcpy-armv7m.S
new file mode 100644
index 0000000..8a70c7d
--- /dev/null
+++ b/lib/libc/memcpy-armv7m.S
@@ -0,0 +1,321 @@
+/*
+ * Copyright (c) 2013 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/* This memcpy routine is optimised for Cortex-M3/M4 cores with/without
+   unaligned access.
+
+   If compiled with GCC, this file should be enclosed within following
+   pre-processing check:
+   if defined (__ARM_ARCH_7M__) || defined (__ARM_ARCH_7EM__)
+
+   Prototype: void *memcpy (void *dst, const void *src, size_t count);
+
+   The job will be done in 5 steps.
+   Step 1: Align src/dest pointers, copy mis-aligned if fail to align both
+   Step 2: Repeatedly copy big block size of __OPT_BIG_BLOCK_SIZE
+   Step 3: Repeatedly copy big block size of __OPT_MID_BLOCK_SIZE
+   Step 4: Copy word by word
+   Step 5: Copy byte-to-byte
+
+   Tunable options:
+     __OPT_BIG_BLOCK_SIZE: Size of big block in words.  Default to 64.
+     __OPT_MID_BLOCK_SIZE: Size of big block in words.  Default to 16.
+ */
+#ifndef __OPT_BIG_BLOCK_SIZE
+#define __OPT_BIG_BLOCK_SIZE (4 * 16)
+#endif
+
+#ifndef __OPT_MID_BLOCK_SIZE
+#define __OPT_MID_BLOCK_SIZE (4 * 4)
+#endif
+
+#if __OPT_BIG_BLOCK_SIZE == 16
+#define BEGIN_UNROLL_BIG_BLOCK \
+  .irp offset, 0,4,8,12
+#elif __OPT_BIG_BLOCK_SIZE == 32
+#define BEGIN_UNROLL_BIG_BLOCK \
+  .irp offset, 0,4,8,12,16,20,24,28
+#elif __OPT_BIG_BLOCK_SIZE == 64
+#define BEGIN_UNROLL_BIG_BLOCK \
+  .irp offset, 0,4,8,12,16,20,24,28,32,36,40,44,48,52,56,60
+#else
+#error "Illegal __OPT_BIG_BLOCK_SIZE"
+#endif
+
+#if __OPT_MID_BLOCK_SIZE == 8
+#define BEGIN_UNROLL_MID_BLOCK \
+  .irp offset, 0,4
+#elif __OPT_MID_BLOCK_SIZE == 16
+#define BEGIN_UNROLL_MID_BLOCK \
+  .irp offset, 0,4,8,12
+#else
+#error "Illegal __OPT_MID_BLOCK_SIZE"
+#endif
+
+#define END_UNROLL .endr
+
+	.syntax unified
+	.text
+	.align	2
+	.global	memcpy
+	.thumb
+	.thumb_func
+	.type	memcpy, %function
+memcpy:
+	@ r0: dst
+	@ r1: src
+	@ r2: len
+#ifdef __ARM_FEATURE_UNALIGNED
+	/* In case of UNALIGNED access supported, ip is not used in
+	   function body.  */
+	mov	ip, r0
+#else
+	push	{r0}
+#endif
+	orr	r3, r1, r0
+	ands	r3, r3, #3
+	bne	.Lmisaligned_copy
+
+.Lbig_block:
+	subs	r2, __OPT_BIG_BLOCK_SIZE
+	blo	.Lmid_block
+
+	/* Kernel loop for big block copy */
+	.align 2
+.Lbig_block_loop:
+	BEGIN_UNROLL_BIG_BLOCK
+#ifdef __ARM_ARCH_7EM__
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	END_UNROLL
+#else /* __ARM_ARCH_7M__ */
+	ldr	r3, [r1, \offset]
+	str	r3, [r0, \offset]
+	END_UNROLL
+	adds	r0, __OPT_BIG_BLOCK_SIZE
+	adds	r1, __OPT_BIG_BLOCK_SIZE
+#endif
+	subs	r2, __OPT_BIG_BLOCK_SIZE
+	bhs .Lbig_block_loop
+
+.Lmid_block:
+	adds	r2, __OPT_BIG_BLOCK_SIZE - __OPT_MID_BLOCK_SIZE
+	blo	.Lcopy_word_by_word
+
+	/* Kernel loop for mid-block copy */
+	.align 2
+.Lmid_block_loop:
+	BEGIN_UNROLL_MID_BLOCK
+#ifdef __ARM_ARCH_7EM__
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	END_UNROLL
+#else /* __ARM_ARCH_7M__ */
+	ldr	r3, [r1, \offset]
+	str	r3, [r0, \offset]
+	END_UNROLL
+	adds    r0, __OPT_MID_BLOCK_SIZE
+	adds    r1, __OPT_MID_BLOCK_SIZE
+#endif
+	subs	r2, __OPT_MID_BLOCK_SIZE
+	bhs	.Lmid_block_loop
+
+.Lcopy_word_by_word:
+	adds	r2, __OPT_MID_BLOCK_SIZE - 4
+	blo	.Lcopy_less_than_4
+
+	/* Kernel loop for small block copy */
+	.align 2
+.Lcopy_word_by_word_loop:
+	ldr	r3, [r1], #4
+	str	r3, [r0], #4
+	subs	r2, #4
+	bhs	.Lcopy_word_by_word_loop
+
+.Lcopy_less_than_4:
+	adds	r2, #4
+	beq	.Ldone
+
+	lsls	r2, r2, #31
+	itt ne
+	ldrbne  r3, [r1], #1
+	strbne  r3, [r0], #1
+
+	bcc	.Ldone
+#ifdef __ARM_FEATURE_UNALIGNED
+	ldrh	r3, [r1]
+	strh	r3, [r0]
+#else
+	ldrb	r3, [r1]
+	strb	r3, [r0]
+	ldrb	r3, [r1, #1]
+	strb	r3, [r0, #1]
+#endif /* __ARM_FEATURE_UNALIGNED */
+
+.Ldone:
+#ifdef __ARM_FEATURE_UNALIGNED
+	mov	r0, ip
+#else
+	pop	{r0}
+#endif
+	bx	lr
+
+	.align 2
+.Lmisaligned_copy:
+#ifdef __ARM_FEATURE_UNALIGNED
+	/* Define label DST_ALIGNED to BIG_BLOCK.  It will go to aligned copy
+	   once destination is adjusted to aligned.  */
+#define Ldst_aligned Lbig_block
+
+	/* Copy word by word using LDR when alignment can be done in hardware,
+	i.e., SCTLR.A is set, supporting unaligned access in LDR and STR.  */
+
+	cmp	r2, #8
+	blo	.Lbyte_copy
+
+	/* if src is aligned, just go to the big block loop.  */
+	lsls	r3, r1, #30
+	beq	.Ldst_aligned
+#else
+	/* if len < 12, misalignment adjustment has more overhead than
+	just byte-to-byte copy.  Also, len must >=8 to guarantee code
+	afterward work correctly.  */
+	cmp	r2, #12
+	blo	.Lbyte_copy
+#endif /* __ARM_FEATURE_UNALIGNED */
+
+	/* Align dst only, not trying to align src.  That is the because
+	handling of aligned src and misaligned dst need more overhead than
+	otherwise.  By doing this the worst case is when initial src is aligned,
+	additional up to 4 byte additional copy will executed, which is
+	acceptable.  */
+
+	ands	r3, r0, #3
+	beq	.Ldst_aligned
+
+	rsb	r3, #4
+	subs	r2, r3
+
+	lsls    r3, r3, #31
+	itt ne
+	ldrbne  r3, [r1], #1
+	strbne  r3, [r0], #1
+
+	bcc .Ldst_aligned
+
+#ifdef __ARM_FEATURE_UNALIGNED
+	ldrh    r3, [r1], #2
+	strh    r3, [r0], #2
+	b	.Ldst_aligned
+#else
+	ldrb    r3, [r1], #1
+	strb    r3, [r0], #1
+	ldrb    r3, [r1], #1
+	strb    r3, [r0], #1
+	/* Now that dst is aligned */
+.Ldst_aligned:
+	/* if r1 is aligned now, it means r0/r1 has the same misalignment,
+	and they are both aligned now.  Go aligned copy.  */
+	ands	r3, r1, #3
+	beq	.Lbig_block
+
+	/* dst is aligned, but src isn't.  Misaligned copy.  */
+
+	push	{r4, r5}
+	subs	r2, #4
+
+	/* Backward r1 by misaligned bytes, to make r1 aligned.
+	Since we need to restore r1 to unaligned address after the loop,
+	we need keep the offset bytes to ip and sub it from r1 afterward.  */
+	subs	r1, r3
+	rsb	ip, r3, #4
+
+	/* Pre-load on word */
+	ldr	r4, [r1], #4
+
+	cmp	r3, #2
+	beq	.Lmisaligned_copy_2_2
+	cmp	r3, #3
+	beq	.Lmisaligned_copy_3_1
+
+	.macro mis_src_copy shift
+1:
+	lsrs	r4, r4, \shift
+	ldr	r3, [r1], #4
+	lsls	r5, r3, 32-\shift
+	orr	r4, r4, r5
+	str	r4, [r0], #4
+	mov	r4, r3
+	subs	r2, #4
+	bhs	1b
+	.endm
+
+.Lmisaligned_copy_1_3:
+	mis_src_copy shift=8
+	b	.Lsrc_misaligned_tail
+
+.Lmisaligned_copy_3_1:
+	mis_src_copy shift=24
+	b	.Lsrc_misaligned_tail
+
+.Lmisaligned_copy_2_2:
+	/* For 2_2 misalignment, ldr is still faster than 2 x ldrh.  */
+	mis_src_copy shift=16
+
+.Lsrc_misaligned_tail:
+	adds	r2, #4
+	subs	r1, ip
+	pop	{r4, r5}
+
+#endif /* __ARM_FEATURE_UNALIGNED */
+
+.Lbyte_copy:
+	subs	r2, #4
+	blo	.Lcopy_less_than_4
+
+.Lbyte_copy_loop:
+	subs    r2, #1
+	ldrb    r3, [r1], #1
+	strb    r3, [r0], #1
+	bhs	.Lbyte_copy_loop
+
+	ldrb	r3, [r1]
+	strb	r3, [r0]
+	ldrb	r3, [r1, #1]
+	strb	r3, [r0, #1]
+	ldrb	r3, [r1, #2]
+	strb	r3, [r0, #2]
+
+#ifdef __ARM_FEATURE_UNALIGNED
+	mov	r0, ip
+#else
+	pop	{r0}
+#endif
+	bx	lr
+
+	.size	memcpy, .-memcpy
diff --git a/lib/libc/memmove.c b/lib/libc/memmove.c
new file mode 100644
index 0000000..05cf75a
--- /dev/null
+++ b/lib/libc/memmove.c
@@ -0,0 +1,5 @@
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#define	MEMMOVE
+#include "bcopy.c"
diff --git a/lib/libc/memset.c b/lib/libc/memset.c
new file mode 100644
index 0000000..0f3ba01
--- /dev/null
+++ b/lib/libc/memset.c
@@ -0,0 +1,125 @@
+/*-
+ * Copyright (c) 1990, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * Mike Hibler and Chris Torek.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+
+#include <limits.h>
+
+#define	wsize	sizeof(u_int)
+#define	wmask	(wsize - 1)
+
+#ifdef BZERO
+#include <strings.h>
+
+#define	RETURN	return
+#define	VAL	0
+#define	WIDEVAL	0
+
+void
+bzero(void *dst0, size_t length)
+#else
+#include <string.h>
+
+#define	RETURN	return (dst0)
+#define	VAL	c0
+#define	WIDEVAL	c
+
+void *
+memset(void *dst0, int c0, size_t length)
+#endif
+{
+	size_t t;
+#ifndef BZERO
+	u_int c;
+#endif
+	u_char *dst;
+
+	dst = dst0;
+	/*
+	 * If not enough words, just fill bytes.  A length >= 2 words
+	 * guarantees that at least one of them is `complete' after
+	 * any necessary alignment.  For instance:
+	 *
+	 *	|-----------|-----------|-----------|
+	 *	|00|01|02|03|04|05|06|07|08|09|0A|00|
+	 *	          ^---------------------^
+	 *		 dst		 dst+length-1
+	 *
+	 * but we use a minimum of 3 here since the overhead of the code
+	 * to do word writes is substantial.
+	 */
+	if (length < 3 * wsize) {
+		while (length != 0) {
+			*dst++ = VAL;
+			--length;
+		}
+		RETURN;
+	}
+
+#ifndef BZERO
+	if ((c = (u_char)c0) != 0) {	/* Fill the word. */
+		c = (c << 8) | c;	/* u_int is 16 bits. */
+#if UINT_MAX > 0xffff
+		c = (c << 16) | c;	/* u_int is 32 bits. */
+#endif
+#if UINT_MAX > 0xffffffff
+		c = (c << 32) | c;	/* u_int is 64 bits. */
+#endif
+	}
+#endif
+	/* Align destination by filling in bytes. */
+	if ((t = (long)dst & wmask) != 0) {
+		t = wsize - t;
+		length -= t;
+		do {
+			*dst++ = VAL;
+		} while (--t != 0);
+	}
+
+	/* Fill words.  Length was >= 2*words so we know t >= 1 here. */
+	t = length / wsize;
+	do {
+		*(u_int *)dst = WIDEVAL;
+		dst += wsize;
+	} while (--t != 0);
+
+	/* Mop up trailing bytes, if any. */
+	t = length & wmask;
+	if (t != 0)
+		do {
+			*dst++ = VAL;
+		} while (--t != 0);
+	RETURN;
+}
diff --git a/lib/libc/strcasecmp.c b/lib/libc/strcasecmp.c
new file mode 100644
index 0000000..2be0913
--- /dev/null
+++ b/lib/libc/strcasecmp.c
@@ -0,0 +1,105 @@
+/*	$OpenBSD: strcasecmp.c,v 1.6 2005/08/08 08:05:37 espie Exp $	*/
+
+/*
+ * Copyright (c) 1987, 1993
+ *	The Regents of the University of California.  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <string.h>
+
+typedef unsigned char u_char;
+
+/*
+ * This array is designed for mapping upper and lower case letter
+ * together for a case independent comparison.  The mappings are
+ * based upon ascii character sequences.
+ */
+static const u_char charmap[] = {
+	'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
+	'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
+	'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
+	'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
+	'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
+	'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
+	'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
+	'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
+	'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+	'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
+	'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
+	'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
+	'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
+	'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
+	'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
+	'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
+	'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
+	'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
+	'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
+	'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
+	'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
+	'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
+	'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
+	'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
+	'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
+	'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
+	'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
+	'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
+	'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
+	'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
+};
+
+int
+strcasecmp(const char *s1, const char *s2)
+{
+	const u_char *cm = charmap;
+	const u_char *us1 = (const u_char *)s1;
+	const u_char *us2 = (const u_char *)s2;
+
+	while (cm[*us1] == cm[*us2++])
+		if (*us1++ == '\0')
+			return (0);
+	return (cm[*us1] - cm[*--us2]);
+}
+
+int
+strncasecmp(const char *s1, const char *s2, size_t n)
+{
+	if (n != 0) {
+		const u_char *cm = charmap;
+		const u_char *us1 = (const u_char *)s1;
+		const u_char *us2 = (const u_char *)s2;
+
+		do {
+			if (cm[*us1] != cm[*us2++])
+				return (cm[*us1] - cm[*--us2]);
+			if (*us1++ == '\0')
+				break;
+		} while (--n != 0);
+	}
+	return (0);
+}
diff --git a/lib/libc/strlen.c b/lib/libc/strlen.c
new file mode 100644
index 0000000..7e59e75
--- /dev/null
+++ b/lib/libc/strlen.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright (c) 2008 ARM Ltd
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. The name of the company may not be used to endorse or promote
+ *    products derived from this software without specific prior written
+ *    permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED
+ * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "arm_asm.h"
+#include <_ansi.h>
+#include <string.h>
+#include <limits.h>
+
+#if defined (__OPTIMIZE_SIZE__) || defined (PREFER_SIZE_OVER_SPEED) || \
+  (defined (__thumb__) && !defined (__thumb2__))
+
+# if !defined (PREFER_SIZE_OVER_SPEED) && !defined (__OPTIMIZE_SIZE__)
+/* Thumb1 only variant.
+   If speed is preferred, the strlen() function in ../../string/strlen.c
+   will be used.
+
+   Leave this field blank.  So the strlen() is not defined, and this will
+   automatically pull in the default C definition of strlen() from
+   ../../string/strlen.c.  No need to include this file explicitely.
+   The lib_a-strlen.o will not be generated, so it won't replace the default
+   lib_a-strlen.o which is generated by ../../string/strlen.c.  See the
+   commands in configure.in and Makefile.am for more details.
+
+   However, if we need to rewrite this function to be more efficient,
+   we can add the corresponding assembly code into this field and change
+   the commands in configure.in and Makefile.am to allow the corresponding
+   lib_a-strlen.o to be generated.
+*/
+# else
+size_t
+strlen (const char* str)
+{
+  int scratch;
+#if defined (__thumb__) && !defined (__thumb2__)
+  size_t len;
+  asm ("mov	%0, #0\n"
+       "1:\n\t"
+       "ldrb	%1, [%2, %0]\n\t"
+       "add	%0, %0, #1\n\t"
+       "cmp	%1, #0\n\t"
+       "bne	1b"
+       : "=&r" (len), "=&r" (scratch) : "r" (str) : "memory", "cc");
+  return len - 1;
+#else
+  const char* end;
+  asm ("1:\n\t"
+       "ldrb	%1, [%0], #1\n\t"
+       "cmp	%1, #0\n\t"
+       "bne	1b"
+       : "=&r" (end), "=&r" (scratch) : "0" (str) : "memory", "cc");
+  return end - str - 1;
+#endif
+}
+#endif
+#else
+
+#if !(defined(_ISA_ARM_7) || defined(__ARM_ARCH_6T2__))
+
+size_t __attribute__((naked))
+strlen (const char* str)
+{
+  asm ("len .req r0\n\t"
+       "data .req r3\n\t"
+       "addr .req r1\n\t"
+
+       "optpld r0\n\t"
+       /* Word-align address */
+       "bic	addr, r0, #3\n\t"
+       /* Get adjustment for start ... */
+       "ands	len, r0, #3\n\t"
+       "neg	len, len\n\t"
+       /* First word of data */
+       "ldr	data, [addr], #4\n\t"
+       /* Ensure bytes preceeding start ... */
+       "add	ip, len, #4\n\t"
+       "mov	ip, ip, asl #3\n\t"
+       "mvn	r2, #0\n\t"
+       /* ... are masked out */
+#ifdef __thumb__
+       "itt	ne\n\t"
+# ifdef __ARMEB__
+       "lslne	r2, ip\n\t"
+# else
+       "lsrne	r2, ip\n\t"
+# endif
+       "orrne	data, data, r2\n\t"
+#else
+       "it	ne\n\t"
+# ifdef __ARMEB__
+       "orrne	data, data, r2, lsl ip\n\t"
+# else
+       "orrne	data, data, r2, lsr ip\n\t"
+# endif
+#endif
+       /* Magic const 0x01010101 */
+#ifdef _ISA_ARM_7
+       "movw	ip, #0x101\n\t"
+#else
+       "mov	ip, #0x1\n\t"
+       "orr	ip, ip, ip, lsl #8\n\t"
+#endif
+       "orr	ip, ip, ip, lsl #16\n"
+
+	/* This is the main loop.  We subtract one from each byte in
+	   the word: the sign bit changes iff the byte was zero or
+	   0x80 -- we eliminate the latter case by anding the result
+	   with the 1-s complement of the data.  */
+       "1:\n\t"
+       /* test (data - 0x01010101)  */
+       "sub	r2, data, ip\n\t"
+       /* ... & ~data */
+       "bic	r2, r2, data\n\t"
+       /* ... & 0x80808080 == 0? */
+       "ands	r2, r2, ip, lsl #7\n\t"
+#ifdef _ISA_ARM_7
+       /* yes, get more data... */
+       "itt	eq\n\t"
+       "ldreq	data, [addr], #4\n\t"
+       /* and 4 more bytes  */
+       "addeq	len, len, #4\n\t"
+	/* If we have PLD, then unroll the loop a bit.  */
+       "optpld addr, #8\n\t"
+       /*  test (data - 0x01010101)  */
+       "ittt	eq\n\t"
+       "subeq	r2, data, ip\n\t"
+       /* ... & ~data */
+       "biceq	r2, r2, data\n\t"
+       /* ... & 0x80808080 == 0? */
+       "andeqs	r2, r2, ip, lsl #7\n\t"
+#endif
+       "itt	eq\n\t"
+       /* yes, get more data... */
+       "ldreq	data, [addr], #4\n\t"
+       /* and 4 more bytes  */
+       "addeq	len, len, #4\n\t"
+       "beq	1b\n\t"
+#ifdef __ARMEB__
+       "tst	data, #0xff000000\n\t"
+       "itttt	ne\n\t"
+       "addne	len, len, #1\n\t"
+       "tstne	data, #0xff0000\n\t"
+       "addne	len, len, #1\n\t"
+       "tstne	data, #0xff00\n\t"
+       "it	ne\n\t"
+       "addne	len, len, #1\n\t"
+#else
+# ifdef _ISA_ARM_5
+	/* R2 is the residual sign bits from the above test.  All we
+	need to do now is establish the position of the first zero
+	byte... */
+	/* Little-endian is harder, we need the number of trailing
+	zeros / 8 */
+#  ifdef _ISA_ARM_7
+       "rbit	r2, r2\n\t"
+       "clz	r2, r2\n\t"
+#  else
+       "rsb	r1, r2, #0\n\t"
+       "and	r2, r2, r1\n\t"
+       "clz	r2, r2\n\t"
+       "rsb	r2, r2, #31\n\t"
+#  endif
+       "add	len, len, r2, lsr #3\n\t"
+# else  /* No CLZ instruction */
+       "tst	data, #0xff\n\t"
+       "itttt	ne\n\t"
+       "addne	len, len, #1\n\t"
+       "tstne	data, #0xff00\n\t"
+       "addne	len, len, #1\n\t"
+       "tstne	data, #0xff0000\n\t"
+       "it	ne\n\t"
+       "addne	len, len, #1\n\t"
+# endif
+#endif
+       "RETURN");
+}
+#endif
+#endif