Updating libutf to a newer version.

This brings us in sync with the google3 //third_party/utf
implementation.

Change-Id: I0f3e013304bab6d609ddf0b2619f3b5b8ec6f047
diff --git a/plan9.h b/plan9.h
deleted file mode 100644
index e40e33e..0000000
--- a/plan9.h
+++ /dev/null
@@ -1,29 +0,0 @@
-/*
- * compiler directive on Plan 9
- */
-#ifndef USED
-#define USED(x) if(x);else
-#endif
-
-/*
- * easiest way to make sure these are defined
- */
-#define uchar	_utfuchar
-#define ushort	_utfushort
-#define uint	_utfuint
-#define ulong	_utfulong
-typedef unsigned char		uchar;
-typedef unsigned short		ushort;
-typedef unsigned int		uint;
-typedef unsigned long		ulong;
-
-/*
- * nil cannot be ((void*)0) on ANSI C,
- * because it is used for function pointers
- */
-#undef	nil
-#define	nil	0
-
-#undef	nelem
-#define	nelem(x)	(sizeof (x)/sizeof (x)[0])
-
diff --git a/rune.c b/rune.c
index f594480..65df3d3 100644
--- a/rune.c
+++ b/rune.c
@@ -7,14 +7,14 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 enum
 {
@@ -23,7 +23,7 @@
 	Bit2	= 5,
 	Bit3	= 4,
 	Bit4	= 3,
-	Bit5	= 2,
+	Bit5	= 2, 
 
 	T1	= ((1<<(Bit1+1))-1) ^ 0xFF,	/* 0000 0000 */
 	Tx	= ((1<<(Bitx+1))-1) ^ 0xFF,	/* 1000 0000 */
@@ -32,19 +32,139 @@
 	T4	= ((1<<(Bit4+1))-1) ^ 0xFF,	/* 1111 0000 */
 	T5	= ((1<<(Bit5+1))-1) ^ 0xFF,	/* 1111 1000 */
 
-	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0000 0000 0111 1111 */
-	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0000 0000 0111 1111 1111 */
-	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 0000 0000 1111 1111 1111 1111 */
-	Rune4	= (1<<(Bit4+3*Bitx))-1,		/* 0011 1111 1111 1111 1111 1111 */
+	Rune1	= (1<<(Bit1+0*Bitx))-1,		/* 0000 0000 0111 1111 */
+	Rune2	= (1<<(Bit2+1*Bitx))-1,		/* 0000 0111 1111 1111 */
+	Rune3	= (1<<(Bit3+2*Bitx))-1,		/* 1111 1111 1111 1111 */
+	Rune4	= (1<<(Bit4+3*Bitx))-1,
+                                        /* 0001 1111 1111 1111 1111 1111 */
 
 	Maskx	= (1<<Bitx)-1,			/* 0011 1111 */
 	Testx	= Maskx ^ 0xFF,			/* 1100 0000 */
 
-	Bad	= Runeerror
+	Bad	= Runeerror,
 };
 
+/*
+ * Modified by Wei-Hwa Huang, Google Inc., on 2004-09-24
+ * This is a slower but "safe" version of the old chartorune 
+ * that works on strings that are not necessarily null-terminated.
+ * 
+ * If you know for sure that your string is null-terminated,
+ * chartorune will be a bit faster.
+ *
+ * It is guaranteed not to attempt to access "length"
+ * past the incoming pointer.  This is to avoid
+ * possible access violations.  If the string appears to be
+ * well-formed but incomplete (i.e., to get the whole Rune
+ * we'd need to read past str+length) then we'll set the Rune
+ * to Bad and return 0.
+ *
+ * Note that if we have decoding problems for other
+ * reasons, we return 1 instead of 0.
+ */
 int
-chartorune(Rune *rune, char *str)
+charntorune(Rune *rune, const char *str, int length)
+{
+	int c, c1, c2, c3;
+	long l;
+
+	/* When we're not allowed to read anything */
+	if(length <= 0) {
+		goto badlen;
+	}
+
+	/*
+	 * one character sequence (7-bit value)
+	 *	00000-0007F => T1
+	 */
+	c = *(uchar*)str;
+	if(c < Tx) {
+		*rune = c;
+		return 1;
+	}
+
+	// If we can't read more than one character we must stop
+	if(length <= 1) {
+		goto badlen;
+	}
+
+	/*
+	 * two character sequence (11-bit value)
+	 *	0080-07FF => T2 Tx
+	 */
+	c1 = *(uchar*)(str+1) ^ Tx;
+	if(c1 & Testx)
+		goto bad;
+	if(c < T3) {
+		if(c < T2)
+			goto bad;
+		l = ((c << Bitx) | c1) & Rune2;
+		if(l <= Rune1)
+			goto bad;
+		*rune = l;
+		return 2;
+	}
+
+	// If we can't read more than two characters we must stop
+	if(length <= 2) {
+		goto badlen;
+	}
+
+	/*
+	 * three character sequence (16-bit value)
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	c2 = *(uchar*)(str+2) ^ Tx;
+	if(c2 & Testx)
+		goto bad;
+	if(c < T4) {
+		l = ((((c << Bitx) | c1) << Bitx) | c2) & Rune3;
+		if(l <= Rune2)
+			goto bad;
+		*rune = l;
+		return 3;
+	}
+
+	if (length <= 3)
+		goto badlen;
+
+	/*
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => T4 Tx Tx Tx
+	 */
+	c3 = *(uchar*)(str+3) ^ Tx;
+	if (c3 & Testx)
+		goto bad;
+	if (c < T5) {
+		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+		if (l <= Rune3)
+			goto bad;
+		*rune = l;
+		return 4;
+	}
+
+	// Support for 5-byte or longer UTF-8 would go here, but
+	// since we don't have that, we'll just fall through to bad.
+
+	/*
+	 * bad decoding
+	 */
+bad:
+	*rune = Bad;
+	return 1;
+badlen:
+	*rune = Bad;
+	return 0;
+
+}
+
+
+/*
+ * This is the older "unsafe" version, which works fine on 
+ * null-terminated strings.
+ */
+int
+chartorune(Rune *rune, const char *str)
 {
 	int c, c1, c2, c3;
 	long l;
@@ -92,25 +212,26 @@
 	}
 
 	/*
-	 * four character sequence
-	 *	10000-10FFFF => T4 Tx Tx Tx
+	 * four character sequence (21-bit value)
+	 *	10000-1FFFFF => T4 Tx Tx Tx
 	 */
-	if(UTFmax >= 4) {
-		c3 = *(uchar*)(str+3) ^ Tx;
-		if(c3 & Testx)
+	c3 = *(uchar*)(str+3) ^ Tx;
+	if (c3 & Testx)
+		goto bad;
+	if (c < T5) {
+		l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
+		if (l <= Rune3)
 			goto bad;
-		if(c < T5) {
-			l = ((((((c << Bitx) | c1) << Bitx) | c2) << Bitx) | c3) & Rune4;
-			if(l <= Rune3)
-				goto bad;
-			if(l > Runemax)
-				goto bad;
-			*rune = l;
-			return 4;
-		}
+		*rune = l;
+		return 4;
 	}
 
 	/*
+	 * Support for 5-byte or longer UTF-8 would go here, but
+	 * since we don't have that, we'll just fall through to bad.
+	 */
+
+	/*
 	 * bad decoding
 	 */
 bad:
@@ -119,9 +240,16 @@
 }
 
 int
-runetochar(char *str, Rune *rune)
+isvalidcharntorune(const char* str, int length, Rune* rune, int* consumed) {
+	*consumed = charntorune(rune, str, length);
+	return *rune != Runeerror || *consumed == 3;
+}
+    
+int
+runetochar(char *str, const Rune *rune)
 {
-	long c;
+	/* Runes are signed, so convert to unsigned for range check. */
+	unsigned long c;
 
 	/*
 	 * one character sequence
@@ -135,7 +263,7 @@
 
 	/*
 	 * two character sequence
-	 *	00080-007FF => T2 Tx
+	 *	0080-07FF => T2 Tx
 	 */
 	if(c <= Rune2) {
 		str[0] = T2 | (c >> 1*Bitx);
@@ -144,74 +272,79 @@
 	}
 
 	/*
-	 * three character sequence
-	 *	00800-0FFFF => T3 Tx Tx
+	 * If the Rune is out of range, convert it to the error rune.
+	 * Do this test here because the error rune encodes to three bytes.
+	 * Doing it earlier would duplicate work, since an out of range
+	 * Rune wouldn't have fit in one or two bytes.
 	 */
-	if(c > Runemax)
+	if (c > Runemax)
 		c = Runeerror;
-	if(c <= Rune3) {
+
+	/*
+	 * three character sequence
+	 *	0800-FFFF => T3 Tx Tx
+	 */
+	if (c <= Rune3) {
 		str[0] = T3 |  (c >> 2*Bitx);
 		str[1] = Tx | ((c >> 1*Bitx) & Maskx);
 		str[2] = Tx |  (c & Maskx);
 		return 3;
 	}
-	
+
 	/*
-	 * four character sequence
-	 *	010000-1FFFFF => T4 Tx Tx Tx
+	 * four character sequence (21-bit value)
+	 *     10000-1FFFFF => T4 Tx Tx Tx
 	 */
-	str[0] = T4 |  (c >> 3*Bitx);
+	str[0] = T4 | (c >> 3*Bitx);
 	str[1] = Tx | ((c >> 2*Bitx) & Maskx);
 	str[2] = Tx | ((c >> 1*Bitx) & Maskx);
-	str[3] = Tx |  (c & Maskx);
+	str[3] = Tx | (c & Maskx);
 	return 4;
 }
 
 int
-runelen(long c)
+runelen(Rune rune)
 {
-	Rune rune;
 	char str[10];
 
-	rune = c;
 	return runetochar(str, &rune);
 }
 
 int
-runenlen(Rune *r, int nrune)
+runenlen(const Rune *r, int nrune)
 {
 	int nb, c;
 
 	nb = 0;
 	while(nrune--) {
 		c = *r++;
-		if(c <= Rune1)
+		if (c <= Rune1)
 			nb++;
-		else
-		if(c <= Rune2)
+		else if (c <= Rune2)
 			nb += 2;
-		else
-		if(c <= Rune3 || c > Runemax)
+		else if (c <= Rune3)
 			nb += 3;
-		else
+		else /* assert(c <= Rune4) */ 
 			nb += 4;
 	}
 	return nb;
 }
 
 int
-fullrune(char *str, int n)
+fullrune(const char *str, int n)
 {
-	int c;
-
-	if(n <= 0)
-		return 0;
-	c = *(uchar*)str;
-	if(c < Tx)
-		return 1;
-	if(c < T3)
-		return n >= 2;
-	if(UTFmax == 3 || c < T4)
-		return n >= 3;
-	return n >= 4;
+	if (n > 0) {
+		int c = *(uchar*)str;
+		if (c < Tx)
+			return 1;
+		if (n > 1) {
+			if (c < T3)
+				return 1;
+			if (n > 2) {
+				if (c < T4 || n > 3)
+					return 1;
+			}
+		}
+	}
+	return 0;
 }
diff --git a/runestrcat.c b/runestrcat.c
index 65d4c0f..ccb7cde 100644
--- a/runestrcat.c
+++ b/runestrcat.c
@@ -7,19 +7,19 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 Rune*
-runestrcat(Rune *s1, Rune *s2)
+runestrcat(Rune *s1, const Rune *s2)
 {
 
-	runestrcpy(runestrchr(s1, 0), s2);
+	runestrcpy((Rune*)runestrchr(s1, 0), s2);
 	return s1;
 }
diff --git a/runestrchr.c b/runestrchr.c
index 21fbeeb..7acca84 100644
--- a/runestrchr.c
+++ b/runestrchr.c
@@ -7,17 +7,18 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
+const
 Rune*
-runestrchr(Rune *s, Rune c)
+runestrchr(const Rune *s, Rune c)
 {
 	Rune c0 = c;
 	Rune c1;
@@ -28,7 +29,7 @@
 		return s-1;
 	}
 
-	while(c1 = *s++)
+	while((c1 = *s++) != 0)
 		if(c1 == c0)
 			return s-1;
 	return 0;
diff --git a/runestrcmp.c b/runestrcmp.c
index a368613..c274625 100644
--- a/runestrcmp.c
+++ b/runestrcmp.c
@@ -7,17 +7,17 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 int
-runestrcmp(Rune *s1, Rune *s2)
+runestrcmp(const Rune *s1, const Rune *s2)
 {
 	Rune c1, c2;
 
diff --git a/runestrcpy.c b/runestrcpy.c
index 0659fc3..99c463d 100644
--- a/runestrcpy.c
+++ b/runestrcpy.c
@@ -7,22 +7,22 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 Rune*
-runestrcpy(Rune *s1, Rune *s2)
+runestrcpy(Rune *s1, const Rune *s2)
 {
 	Rune *os1;
 
 	os1 = s1;
-	while(*s1++ = *s2++)
+	while((*s1++ = *s2++) != 0)
 		;
 	return os1;
 }
diff --git a/runestrdup.c b/runestrdup.c
index 8170e7b..345f2b0 100644
--- a/runestrdup.c
+++ b/runestrdup.c
@@ -7,22 +7,22 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
 #include <stdlib.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 Rune*
-runestrdup(Rune *s) 
+runestrdup(const Rune *s) 
 {  
 	Rune *ns;
 
-	ns = malloc(sizeof(Rune)*(runestrlen(s) + 1));
+	ns = (Rune*)malloc(sizeof(Rune)*(runestrlen(s) + 1));
 	if(ns == 0)
 		return 0;
 
diff --git a/runestrecpy.c b/runestrecpy.c
index c543e22..d095e3a 100644
--- a/runestrecpy.c
+++ b/runestrecpy.c
@@ -7,22 +7,22 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 Rune*
-runestrecpy(Rune *s1, Rune *es1, Rune *s2)
+runestrecpy(Rune *s1, Rune *es1, const Rune *s2)
 {
 	if(s1 >= es1)
 		return s1;
 
-	while(*s1++ = *s2++){
+	while((*s1++ = *s2++) != 0){
 		if(s1 == es1){
 			*--s1 = '\0';
 			break;
diff --git a/runestrlen.c b/runestrlen.c
index 0a13ecd..ebf76da 100644
--- a/runestrlen.c
+++ b/runestrlen.c
@@ -7,17 +7,17 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 long
-runestrlen(Rune *s)
+runestrlen(const Rune *s)
 {
 
 	return runestrchr(s, 0) - s;
diff --git a/runestrncat.c b/runestrncat.c
index 9653637..3ad827e 100644
--- a/runestrncat.c
+++ b/runestrncat.c
@@ -7,23 +7,23 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 Rune*
-runestrncat(Rune *s1, Rune *s2, long n)
+runestrncat(Rune *s1, const Rune *s2, long n)
 {
 	Rune *os1;
 
 	os1 = s1;
-	s1 = runestrchr(s1, 0);
-	while(*s1++ = *s2++)
+	s1 = (Rune*)runestrchr(s1, 0);
+	while((*s1++ = *s2++) != 0)
 		if(--n < 0) {
 			s1[-1] = 0;
 			break;
diff --git a/runestrncmp.c b/runestrncmp.c
index 5e9a3b6..060a425 100644
--- a/runestrncmp.c
+++ b/runestrncmp.c
@@ -7,17 +7,17 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 int
-runestrncmp(Rune *s1, Rune *s2, long n)
+runestrncmp(const Rune *s1, const Rune *s2, long n)
 {
 	Rune c1, c2;
 
diff --git a/runestrncpy.c b/runestrncpy.c
index ffcb3e1..4deeaaf 100644
--- a/runestrncpy.c
+++ b/runestrncpy.c
@@ -7,17 +7,17 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 Rune*
-runestrncpy(Rune *s1, Rune *s2, long n)
+runestrncpy(Rune *s1, const Rune *s2, long n)
 {
 	int i;
 	Rune *os1;
diff --git a/runestrrchr.c b/runestrrchr.c
index 1b0edbb..c7fb3e1 100644
--- a/runestrrchr.c
+++ b/runestrrchr.c
@@ -7,24 +7,25 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
+const
 Rune*
-runestrrchr(Rune *s, Rune c)
+runestrrchr(const Rune *s, Rune c)
 {
-	Rune *r;
+	const Rune *r;
 
 	if(c == 0)
 		return runestrchr(s, 0);
 	r = 0;
-	while(s = runestrchr(s, c))
+	while((s = runestrchr(s, c)) != 0)
 		r = s++;
 	return r;
 }
diff --git a/runestrstr.c b/runestrstr.c
index f5fa997..fc5fd96 100644
--- a/runestrstr.c
+++ b/runestrstr.c
@@ -7,23 +7,24 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 /*
  * Return pointer to first occurrence of s2 in s1,
  * 0 if none
  */
+const
 Rune*
-runestrstr(Rune *s1, Rune *s2)
+runestrstr(const Rune *s1, const Rune *s2)
 {
-	Rune *p, *pa, *pb;
+	const Rune *p, *pa, *pb;
 	int c0, c;
 
 	c0 = *s2;
diff --git a/runetype.c b/runetype.c
index ac6d7b5..64ba1dc 100644
--- a/runetype.c
+++ b/runetype.c
@@ -7,1037 +7,22 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
-#include <stdarg.h>
-#include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
-/*
- * alpha ranges -
- *	only covers ranges not in lower||upper
- */
 static
-Rune	__alpha2[] =
-{
-	0x00d8,	0x00f6,	/* Ø - ö */
-	0x00f8,	0x01f5,	/* ø - ǵ */
-	0x0250,	0x02a8,	/* ɐ - ʨ */
-	0x038e,	0x03a1,	/* Ύ - Ρ */
-	0x03a3,	0x03ce,	/* Σ - ώ */
-	0x03d0,	0x03d6,	/* ϐ - ϖ */
-	0x03e2,	0x03f3,	/* Ϣ - ϳ */
-	0x0490,	0x04c4,	/* Ґ - ӄ */
-	0x0561,	0x0587,	/* ա - և */
-	0x05d0,	0x05ea,	/* א - ת */
-	0x05f0,	0x05f2,	/* װ - ײ */
-	0x0621,	0x063a,	/* ء - غ */
-	0x0640,	0x064a,	/* ـ - ي */
-	0x0671,	0x06b7,	/* ٱ - ڷ */
-	0x06ba,	0x06be,	/* ں - ھ */
-	0x06c0,	0x06ce,	/* ۀ - ێ */
-	0x06d0,	0x06d3,	/* ې - ۓ */
-	0x0905,	0x0939,	/* अ - ह */
-	0x0958,	0x0961,	/* क़ - ॡ */
-	0x0985,	0x098c,	/* অ - ঌ */
-	0x098f,	0x0990,	/* এ - ঐ */
-	0x0993,	0x09a8,	/* ও - ন */
-	0x09aa,	0x09b0,	/* প - র */
-	0x09b6,	0x09b9,	/* শ - হ */
-	0x09dc,	0x09dd,	/* ড় - ঢ় */
-	0x09df,	0x09e1,	/* য় - ৡ */
-	0x09f0,	0x09f1,	/* ৰ - ৱ */
-	0x0a05,	0x0a0a,	/* ਅ - ਊ */
-	0x0a0f,	0x0a10,	/* ਏ - ਐ */
-	0x0a13,	0x0a28,	/* ਓ - ਨ */
-	0x0a2a,	0x0a30,	/* ਪ - ਰ */
-	0x0a32,	0x0a33,	/* ਲ - ਲ਼ */
-	0x0a35,	0x0a36,	/* ਵ - ਸ਼ */
-	0x0a38,	0x0a39,	/* ਸ - ਹ */
-	0x0a59,	0x0a5c,	/* ਖ਼ - ੜ */
-	0x0a85,	0x0a8b,	/* અ - ઋ */
-	0x0a8f,	0x0a91,	/* એ - ઑ */
-	0x0a93,	0x0aa8,	/* ઓ - ન */
-	0x0aaa,	0x0ab0,	/* પ - ર */
-	0x0ab2,	0x0ab3,	/* લ - ળ */
-	0x0ab5,	0x0ab9,	/* વ - હ */
-	0x0b05,	0x0b0c,	/* ଅ - ଌ */
-	0x0b0f,	0x0b10,	/* ଏ - ଐ */
-	0x0b13,	0x0b28,	/* ଓ - ନ */
-	0x0b2a,	0x0b30,	/* ପ - ର */
-	0x0b32,	0x0b33,	/* ଲ - ଳ */
-	0x0b36,	0x0b39,	/* ଶ - ହ */
-	0x0b5c,	0x0b5d,	/* ଡ଼ - ଢ଼ */
-	0x0b5f,	0x0b61,	/* ୟ - ୡ */
-	0x0b85,	0x0b8a,	/* அ - ஊ */
-	0x0b8e,	0x0b90,	/* எ - ஐ */
-	0x0b92,	0x0b95,	/* ஒ - க */
-	0x0b99,	0x0b9a,	/* ங - ச */
-	0x0b9e,	0x0b9f,	/* ஞ - ட */
-	0x0ba3,	0x0ba4,	/* ண - த */
-	0x0ba8,	0x0baa,	/* ந - ப */
-	0x0bae,	0x0bb5,	/* ம - வ */
-	0x0bb7,	0x0bb9,	/* ஷ - ஹ */
-	0x0c05,	0x0c0c,	/* అ - ఌ */
-	0x0c0e,	0x0c10,	/* ఎ - ఐ */
-	0x0c12,	0x0c28,	/* ఒ - న */
-	0x0c2a,	0x0c33,	/* ప - ళ */
-	0x0c35,	0x0c39,	/* వ - హ */
-	0x0c60,	0x0c61,	/* ౠ - ౡ */
-	0x0c85,	0x0c8c,	/* ಅ - ಌ */
-	0x0c8e,	0x0c90,	/* ಎ - ಐ */
-	0x0c92,	0x0ca8,	/* ಒ - ನ */
-	0x0caa,	0x0cb3,	/* ಪ - ಳ */
-	0x0cb5,	0x0cb9,	/* ವ - ಹ */
-	0x0ce0,	0x0ce1,	/* ೠ - ೡ */
-	0x0d05,	0x0d0c,	/* അ - ഌ */
-	0x0d0e,	0x0d10,	/* എ - ഐ */
-	0x0d12,	0x0d28,	/* ഒ - ന */
-	0x0d2a,	0x0d39,	/* പ - ഹ */
-	0x0d60,	0x0d61,	/* ൠ - ൡ */
-	0x0e01,	0x0e30,	/* ก - ะ */
-	0x0e32,	0x0e33,	/* า - ำ */
-	0x0e40,	0x0e46,	/* เ - ๆ */
-	0x0e5a,	0x0e5b,	/* ๚ - ๛ */
-	0x0e81,	0x0e82,	/* ກ - ຂ */
-	0x0e87,	0x0e88,	/* ງ - ຈ */
-	0x0e94,	0x0e97,	/* ດ - ທ */
-	0x0e99,	0x0e9f,	/* ນ - ຟ */
-	0x0ea1,	0x0ea3,	/* ມ - ຣ */
-	0x0eaa,	0x0eab,	/* ສ - ຫ */
-	0x0ead,	0x0eae,	/* ອ - ຮ */
-	0x0eb2,	0x0eb3,	/* າ - ຳ */
-	0x0ec0,	0x0ec4,	/* ເ - ໄ */
-	0x0edc,	0x0edd,	/* ໜ - ໝ */
-	0x0f18,	0x0f19,	/* ༘ - ༙ */
-	0x0f40,	0x0f47,	/* ཀ - ཇ */
-	0x0f49,	0x0f69,	/* ཉ - ཀྵ */
-	0x10d0,	0x10f6,	/* ა - ჶ */
-	0x1100,	0x1159,	/* ᄀ - ᅙ */
-	0x115f,	0x11a2,	/* ᅟ - ᆢ */
-	0x11a8,	0x11f9,	/* ᆨ - ᇹ */
-	0x1e00,	0x1e9b,	/* Ḁ - ẛ */
-	0x1f50,	0x1f57,	/* ὐ - ὗ */
-	0x1f80,	0x1fb4,	/* ᾀ - ᾴ */
-	0x1fb6,	0x1fbc,	/* ᾶ - ᾼ */
-	0x1fc2,	0x1fc4,	/* ῂ - ῄ */
-	0x1fc6,	0x1fcc,	/* ῆ - ῌ */
-	0x1fd0,	0x1fd3,	/* ῐ - ΐ */
-	0x1fd6,	0x1fdb,	/* ῖ - Ί */
-	0x1fe0,	0x1fec,	/* ῠ - Ῥ */
-	0x1ff2,	0x1ff4,	/* ῲ - ῴ */
-	0x1ff6,	0x1ffc,	/* ῶ - ῼ */
-	0x210a,	0x2113,	/* ℊ - ℓ */
-	0x2115,	0x211d,	/* ℕ - ℝ */
-	0x2120,	0x2122,	/* ℠ - ™ */
-	0x212a,	0x2131,	/* K - ℱ */
-	0x2133,	0x2138,	/* ℳ - ℸ */
-	0x3041,	0x3094,	/* ぁ - ゔ */
-	0x30a1,	0x30fa,	/* ァ - ヺ */
-	0x3105,	0x312c,	/* ㄅ - ㄬ */
-	0x3131,	0x318e,	/* ㄱ - ㆎ */
-	0x3192,	0x319f,	/* ㆒ - ㆟ */
-	0x3260,	0x327b,	/* ㉠ - ㉻ */
-	0x328a,	0x32b0,	/* ㊊ - ㊰ */
-	0x32d0,	0x32fe,	/* ㋐ - ㋾ */
-	0x3300,	0x3357,	/* ㌀ - ㍗ */
-	0x3371,	0x3376,	/* ㍱ - ㍶ */
-	0x337b,	0x3394,	/* ㍻ - ㎔ */
-	0x3399,	0x339e,	/* ㎙ - ㎞ */
-	0x33a9,	0x33ad,	/* ㎩ - ㎭ */
-	0x33b0,	0x33c1,	/* ㎰ - ㏁ */
-	0x33c3,	0x33c5,	/* ㏃ - ㏅ */
-	0x33c7,	0x33d7,	/* ㏇ - ㏗ */
-	0x33d9,	0x33dd,	/* ㏙ - ㏝ */
-	0x4e00,	0x9fff,	/* 一 - 鿿 */
-	0xac00,	0xd7a3,	/* 가 - 힣 */
-	0xf900,	0xfb06,	/* 豈 - st */
-	0xfb13,	0xfb17,	/* ﬓ - ﬗ */
-	0xfb1f,	0xfb28,	/* ײַ - ﬨ */
-	0xfb2a,	0xfb36,	/* שׁ - זּ */
-	0xfb38,	0xfb3c,	/* טּ - לּ */
-	0xfb40,	0xfb41,	/* נּ - סּ */
-	0xfb43,	0xfb44,	/* ףּ - פּ */
-	0xfb46,	0xfbb1,	/* צּ - ﮱ */
-	0xfbd3,	0xfd3d,	/* ﯓ - ﴽ */
-	0xfd50,	0xfd8f,	/* ﵐ - ﶏ */
-	0xfd92,	0xfdc7,	/* ﶒ - ﷇ */
-	0xfdf0,	0xfdf9,	/* ﷰ - ﷹ */
-	0xfe70,	0xfe72,	/* ﹰ - ﹲ */
-	0xfe76,	0xfefc,	/* ﹶ - ﻼ */
-	0xff66,	0xff6f,	/* ヲ - ッ */
-	0xff71,	0xff9d,	/* ア - ン */
-	0xffa0,	0xffbe,	/* ᅠ - ᄒ */
-	0xffc2,	0xffc7,	/* ᅡ - ᅦ */
-	0xffca,	0xffcf,	/* ᅧ - ᅬ */
-	0xffd2,	0xffd7,	/* ᅭ - ᅲ */
-	0xffda,	0xffdc,	/* ᅳ - ᅵ */
-};
-
-/*
- * alpha singlets -
- *	only covers ranges not in lower||upper
- */
-static
-Rune	__alpha1[] =
-{
-	0x00aa,	/* ª */
-	0x00b5,	/* µ */
-	0x00ba,	/* º */
-	0x03da,	/* Ϛ */
-	0x03dc,	/* Ϝ */
-	0x03de,	/* Ϟ */
-	0x03e0,	/* Ϡ */
-	0x06d5,	/* ە */
-	0x09b2,	/* ল */
-	0x0a5e,	/* ਫ਼ */
-	0x0a8d,	/* ઍ */
-	0x0ae0,	/* ૠ */
-	0x0b9c,	/* ஜ */
-	0x0cde,	/* ೞ */
-	0x0e4f,	/* ๏ */
-	0x0e84,	/* ຄ */
-	0x0e8a,	/* ຊ */
-	0x0e8d,	/* ຍ */
-	0x0ea5,	/* ລ */
-	0x0ea7,	/* ວ */
-	0x0eb0,	/* ະ */
-	0x0ebd,	/* ຽ */
-	0x1fbe,	/* ι */
-	0x207f,	/* ⁿ */
-	0x20a8,	/* ₨ */
-	0x2102,	/* ℂ */
-	0x2107,	/* ℇ */
-	0x2124,	/* ℤ */
-	0x2126,	/* Ω */
-	0x2128,	/* ℨ */
-	0xfb3e,	/* מּ */
-	0xfe74,	/* ﹴ */
-};
-
-/*
- * space ranges
- */
-static
-Rune	__space2[] =
-{
-	0x0009,	0x000a,	/* tab and newline */
-	0x0020,	0x0020,	/* space */
-	0x00a0,	0x00a0,	/*   */
-	0x2000,	0x200b,	/*   - ​ */
-	0x2028,	0x2029,	/* 
 - 
 */
-	0x3000,	0x3000,	/*   */
-	0xfeff,	0xfeff,	/*  */
-};
-
-/*
- * lower case ranges
- *	3rd col is conversion excess 500
- */
-static
-Rune	__toupper2[] =
-{
-	0x0061,	0x007a, 468,	/* a-z A-Z */
-	0x00e0,	0x00f6, 468,	/* à-ö À-Ö */
-	0x00f8,	0x00fe, 468,	/* ø-þ Ø-Þ */
-	0x0256,	0x0257, 295,	/* ɖ-ɗ Ɖ-Ɗ */
-	0x0258,	0x0259, 298,	/* ɘ-ə Ǝ-Ə */
-	0x028a,	0x028b, 283,	/* ʊ-ʋ Ʊ-Ʋ */
-	0x03ad,	0x03af, 463,	/* έ-ί Έ-Ί */
-	0x03b1,	0x03c1, 468,	/* α-ρ Α-Ρ */
-	0x03c3,	0x03cb, 468,	/* σ-ϋ Σ-Ϋ */
-	0x03cd,	0x03ce, 437,	/* ύ-ώ Ύ-Ώ */
-	0x0430,	0x044f, 468,	/* а-я А-Я */
-	0x0451,	0x045c, 420,	/* ё-ќ Ё-Ќ */
-	0x045e,	0x045f, 420,	/* ў-џ Ў-Џ */
-	0x0561,	0x0586, 452,	/* ա-ֆ Ա-Ֆ */
-	0x1f00,	0x1f07, 508,	/* ἀ-ἇ Ἀ-Ἇ */
-	0x1f10,	0x1f15, 508,	/* ἐ-ἕ Ἐ-Ἕ */
-	0x1f20,	0x1f27, 508,	/* ἠ-ἧ Ἠ-Ἧ */
-	0x1f30,	0x1f37, 508,	/* ἰ-ἷ Ἰ-Ἷ */
-	0x1f40,	0x1f45, 508,	/* ὀ-ὅ Ὀ-Ὅ */
-	0x1f60,	0x1f67, 508,	/* ὠ-ὧ Ὠ-Ὧ */
-	0x1f70,	0x1f71, 574,	/* ὰ-ά Ὰ-Ά */
-	0x1f72,	0x1f75, 586,	/* ὲ-ή Ὲ-Ή */
-	0x1f76,	0x1f77, 600,	/* ὶ-ί Ὶ-Ί */
-	0x1f78,	0x1f79, 628,	/* ὸ-ό Ὸ-Ό */
-	0x1f7a,	0x1f7b, 612,	/* ὺ-ύ Ὺ-Ύ */
-	0x1f7c,	0x1f7d, 626,	/* ὼ-ώ Ὼ-Ώ */
-	0x1f80,	0x1f87, 508,	/* ᾀ-ᾇ ᾈ-ᾏ */
-	0x1f90,	0x1f97, 508,	/* ᾐ-ᾗ ᾘ-ᾟ */
-	0x1fa0,	0x1fa7, 508,	/* ᾠ-ᾧ ᾨ-ᾯ */
-	0x1fb0,	0x1fb1, 508,	/* ᾰ-ᾱ Ᾰ-Ᾱ */
-	0x1fd0,	0x1fd1, 508,	/* ῐ-ῑ Ῐ-Ῑ */
-	0x1fe0,	0x1fe1, 508,	/* ῠ-ῡ Ῠ-Ῡ */
-	0x2170,	0x217f, 484,	/* ⅰ-ⅿ Ⅰ-Ⅿ */
-	0x24d0,	0x24e9, 474,	/* ⓐ-ⓩ Ⓐ-Ⓩ */
-	0xff41,	0xff5a, 468,	/* a-z A-Z */
-};
-
-/*
- * lower case singlets
- *	2nd col is conversion excess 500
- */
-static
-Rune	__toupper1[] =
-{
-	0x00ff, 621,	/* ÿ Ÿ */
-	0x0101, 499,	/* ā Ā */
-	0x0103, 499,	/* ă Ă */
-	0x0105, 499,	/* ą Ą */
-	0x0107, 499,	/* ć Ć */
-	0x0109, 499,	/* ĉ Ĉ */
-	0x010b, 499,	/* ċ Ċ */
-	0x010d, 499,	/* č Č */
-	0x010f, 499,	/* ď Ď */
-	0x0111, 499,	/* đ Đ */
-	0x0113, 499,	/* ē Ē */
-	0x0115, 499,	/* ĕ Ĕ */
-	0x0117, 499,	/* ė Ė */
-	0x0119, 499,	/* ę Ę */
-	0x011b, 499,	/* ě Ě */
-	0x011d, 499,	/* ĝ Ĝ */
-	0x011f, 499,	/* ğ Ğ */
-	0x0121, 499,	/* ġ Ġ */
-	0x0123, 499,	/* ģ Ģ */
-	0x0125, 499,	/* ĥ Ĥ */
-	0x0127, 499,	/* ħ Ħ */
-	0x0129, 499,	/* ĩ Ĩ */
-	0x012b, 499,	/* ī Ī */
-	0x012d, 499,	/* ĭ Ĭ */
-	0x012f, 499,	/* į Į */
-	0x0131, 268,	/* ı I */
-	0x0133, 499,	/* ij IJ */
-	0x0135, 499,	/* ĵ Ĵ */
-	0x0137, 499,	/* ķ Ķ */
-	0x013a, 499,	/* ĺ Ĺ */
-	0x013c, 499,	/* ļ Ļ */
-	0x013e, 499,	/* ľ Ľ */
-	0x0140, 499,	/* ŀ Ŀ */
-	0x0142, 499,	/* ł Ł */
-	0x0144, 499,	/* ń Ń */
-	0x0146, 499,	/* ņ Ņ */
-	0x0148, 499,	/* ň Ň */
-	0x014b, 499,	/* ŋ Ŋ */
-	0x014d, 499,	/* ō Ō */
-	0x014f, 499,	/* ŏ Ŏ */
-	0x0151, 499,	/* ő Ő */
-	0x0153, 499,	/* œ Œ */
-	0x0155, 499,	/* ŕ Ŕ */
-	0x0157, 499,	/* ŗ Ŗ */
-	0x0159, 499,	/* ř Ř */
-	0x015b, 499,	/* ś Ś */
-	0x015d, 499,	/* ŝ Ŝ */
-	0x015f, 499,	/* ş Ş */
-	0x0161, 499,	/* š Š */
-	0x0163, 499,	/* ţ Ţ */
-	0x0165, 499,	/* ť Ť */
-	0x0167, 499,	/* ŧ Ŧ */
-	0x0169, 499,	/* ũ Ũ */
-	0x016b, 499,	/* ū Ū */
-	0x016d, 499,	/* ŭ Ŭ */
-	0x016f, 499,	/* ů Ů */
-	0x0171, 499,	/* ű Ű */
-	0x0173, 499,	/* ų Ų */
-	0x0175, 499,	/* ŵ Ŵ */
-	0x0177, 499,	/* ŷ Ŷ */
-	0x017a, 499,	/* ź Ź */
-	0x017c, 499,	/* ż Ż */
-	0x017e, 499,	/* ž Ž */
-	0x017f, 200,	/* ſ S */
-	0x0183, 499,	/* ƃ Ƃ */
-	0x0185, 499,	/* ƅ Ƅ */
-	0x0188, 499,	/* ƈ Ƈ */
-	0x018c, 499,	/* ƌ Ƌ */
-	0x0192, 499,	/* ƒ Ƒ */
-	0x0199, 499,	/* ƙ Ƙ */
-	0x01a1, 499,	/* ơ Ơ */
-	0x01a3, 499,	/* ƣ Ƣ */
-	0x01a5, 499,	/* ƥ Ƥ */
-	0x01a8, 499,	/* ƨ Ƨ */
-	0x01ad, 499,	/* ƭ Ƭ */
-	0x01b0, 499,	/* ư Ư */
-	0x01b4, 499,	/* ƴ Ƴ */
-	0x01b6, 499,	/* ƶ Ƶ */
-	0x01b9, 499,	/* ƹ Ƹ */
-	0x01bd, 499,	/* ƽ Ƽ */
-	0x01c5, 499,	/* Dž DŽ */
-	0x01c6, 498,	/* dž DŽ */
-	0x01c8, 499,	/* Lj LJ */
-	0x01c9, 498,	/* lj LJ */
-	0x01cb, 499,	/* Nj NJ */
-	0x01cc, 498,	/* nj NJ */
-	0x01ce, 499,	/* ǎ Ǎ */
-	0x01d0, 499,	/* ǐ Ǐ */
-	0x01d2, 499,	/* ǒ Ǒ */
-	0x01d4, 499,	/* ǔ Ǔ */
-	0x01d6, 499,	/* ǖ Ǖ */
-	0x01d8, 499,	/* ǘ Ǘ */
-	0x01da, 499,	/* ǚ Ǚ */
-	0x01dc, 499,	/* ǜ Ǜ */
-	0x01df, 499,	/* ǟ Ǟ */
-	0x01e1, 499,	/* ǡ Ǡ */
-	0x01e3, 499,	/* ǣ Ǣ */
-	0x01e5, 499,	/* ǥ Ǥ */
-	0x01e7, 499,	/* ǧ Ǧ */
-	0x01e9, 499,	/* ǩ Ǩ */
-	0x01eb, 499,	/* ǫ Ǫ */
-	0x01ed, 499,	/* ǭ Ǭ */
-	0x01ef, 499,	/* ǯ Ǯ */
-	0x01f2, 499,	/* Dz DZ */
-	0x01f3, 498,	/* dz DZ */
-	0x01f5, 499,	/* ǵ Ǵ */
-	0x01fb, 499,	/* ǻ Ǻ */
-	0x01fd, 499,	/* ǽ Ǽ */
-	0x01ff, 499,	/* ǿ Ǿ */
-	0x0201, 499,	/* ȁ Ȁ */
-	0x0203, 499,	/* ȃ Ȃ */
-	0x0205, 499,	/* ȅ Ȅ */
-	0x0207, 499,	/* ȇ Ȇ */
-	0x0209, 499,	/* ȉ Ȉ */
-	0x020b, 499,	/* ȋ Ȋ */
-	0x020d, 499,	/* ȍ Ȍ */
-	0x020f, 499,	/* ȏ Ȏ */
-	0x0211, 499,	/* ȑ Ȑ */
-	0x0213, 499,	/* ȓ Ȓ */
-	0x0215, 499,	/* ȕ Ȕ */
-	0x0217, 499,	/* ȗ Ȗ */
-	0x0253, 290,	/* ɓ Ɓ */
-	0x0254, 294,	/* ɔ Ɔ */
-	0x025b, 297,	/* ɛ Ɛ */
-	0x0260, 295,	/* ɠ Ɠ */
-	0x0263, 293,	/* ɣ Ɣ */
-	0x0268, 291,	/* ɨ Ɨ */
-	0x0269, 289,	/* ɩ Ɩ */
-	0x026f, 289,	/* ɯ Ɯ */
-	0x0272, 287,	/* ɲ Ɲ */
-	0x0283, 282,	/* ʃ Ʃ */
-	0x0288, 282,	/* ʈ Ʈ */
-	0x0292, 281,	/* ʒ Ʒ */
-	0x03ac, 462,	/* ά Ά */
-	0x03cc, 436,	/* ό Ό */
-	0x03d0, 438,	/* ϐ Β */
-	0x03d1, 443,	/* ϑ Θ */
-	0x03d5, 453,	/* ϕ Φ */
-	0x03d6, 446,	/* ϖ Π */
-	0x03e3, 499,	/* ϣ Ϣ */
-	0x03e5, 499,	/* ϥ Ϥ */
-	0x03e7, 499,	/* ϧ Ϧ */
-	0x03e9, 499,	/* ϩ Ϩ */
-	0x03eb, 499,	/* ϫ Ϫ */
-	0x03ed, 499,	/* ϭ Ϭ */
-	0x03ef, 499,	/* ϯ Ϯ */
-	0x03f0, 414,	/* ϰ Κ */
-	0x03f1, 420,	/* ϱ Ρ */
-	0x0461, 499,	/* ѡ Ѡ */
-	0x0463, 499,	/* ѣ Ѣ */
-	0x0465, 499,	/* ѥ Ѥ */
-	0x0467, 499,	/* ѧ Ѧ */
-	0x0469, 499,	/* ѩ Ѩ */
-	0x046b, 499,	/* ѫ Ѫ */
-	0x046d, 499,	/* ѭ Ѭ */
-	0x046f, 499,	/* ѯ Ѯ */
-	0x0471, 499,	/* ѱ Ѱ */
-	0x0473, 499,	/* ѳ Ѳ */
-	0x0475, 499,	/* ѵ Ѵ */
-	0x0477, 499,	/* ѷ Ѷ */
-	0x0479, 499,	/* ѹ Ѹ */
-	0x047b, 499,	/* ѻ Ѻ */
-	0x047d, 499,	/* ѽ Ѽ */
-	0x047f, 499,	/* ѿ Ѿ */
-	0x0481, 499,	/* ҁ Ҁ */
-	0x0491, 499,	/* ґ Ґ */
-	0x0493, 499,	/* ғ Ғ */
-	0x0495, 499,	/* ҕ Ҕ */
-	0x0497, 499,	/* җ Җ */
-	0x0499, 499,	/* ҙ Ҙ */
-	0x049b, 499,	/* қ Қ */
-	0x049d, 499,	/* ҝ Ҝ */
-	0x049f, 499,	/* ҟ Ҟ */
-	0x04a1, 499,	/* ҡ Ҡ */
-	0x04a3, 499,	/* ң Ң */
-	0x04a5, 499,	/* ҥ Ҥ */
-	0x04a7, 499,	/* ҧ Ҧ */
-	0x04a9, 499,	/* ҩ Ҩ */
-	0x04ab, 499,	/* ҫ Ҫ */
-	0x04ad, 499,	/* ҭ Ҭ */
-	0x04af, 499,	/* ү Ү */
-	0x04b1, 499,	/* ұ Ұ */
-	0x04b3, 499,	/* ҳ Ҳ */
-	0x04b5, 499,	/* ҵ Ҵ */
-	0x04b7, 499,	/* ҷ Ҷ */
-	0x04b9, 499,	/* ҹ Ҹ */
-	0x04bb, 499,	/* һ Һ */
-	0x04bd, 499,	/* ҽ Ҽ */
-	0x04bf, 499,	/* ҿ Ҿ */
-	0x04c2, 499,	/* ӂ Ӂ */
-	0x04c4, 499,	/* ӄ Ӄ */
-	0x04c8, 499,	/* ӈ Ӈ */
-	0x04cc, 499,	/* ӌ Ӌ */
-	0x04d1, 499,	/* ӑ Ӑ */
-	0x04d3, 499,	/* ӓ Ӓ */
-	0x04d5, 499,	/* ӕ Ӕ */
-	0x04d7, 499,	/* ӗ Ӗ */
-	0x04d9, 499,	/* ә Ә */
-	0x04db, 499,	/* ӛ Ӛ */
-	0x04dd, 499,	/* ӝ Ӝ */
-	0x04df, 499,	/* ӟ Ӟ */
-	0x04e1, 499,	/* ӡ Ӡ */
-	0x04e3, 499,	/* ӣ Ӣ */
-	0x04e5, 499,	/* ӥ Ӥ */
-	0x04e7, 499,	/* ӧ Ӧ */
-	0x04e9, 499,	/* ө Ө */
-	0x04eb, 499,	/* ӫ Ӫ */
-	0x04ef, 499,	/* ӯ Ӯ */
-	0x04f1, 499,	/* ӱ Ӱ */
-	0x04f3, 499,	/* ӳ Ӳ */
-	0x04f5, 499,	/* ӵ Ӵ */
-	0x04f9, 499,	/* ӹ Ӹ */
-	0x1e01, 499,	/* ḁ Ḁ */
-	0x1e03, 499,	/* ḃ Ḃ */
-	0x1e05, 499,	/* ḅ Ḅ */
-	0x1e07, 499,	/* ḇ Ḇ */
-	0x1e09, 499,	/* ḉ Ḉ */
-	0x1e0b, 499,	/* ḋ Ḋ */
-	0x1e0d, 499,	/* ḍ Ḍ */
-	0x1e0f, 499,	/* ḏ Ḏ */
-	0x1e11, 499,	/* ḑ Ḑ */
-	0x1e13, 499,	/* ḓ Ḓ */
-	0x1e15, 499,	/* ḕ Ḕ */
-	0x1e17, 499,	/* ḗ Ḗ */
-	0x1e19, 499,	/* ḙ Ḙ */
-	0x1e1b, 499,	/* ḛ Ḛ */
-	0x1e1d, 499,	/* ḝ Ḝ */
-	0x1e1f, 499,	/* ḟ Ḟ */
-	0x1e21, 499,	/* ḡ Ḡ */
-	0x1e23, 499,	/* ḣ Ḣ */
-	0x1e25, 499,	/* ḥ Ḥ */
-	0x1e27, 499,	/* ḧ Ḧ */
-	0x1e29, 499,	/* ḩ Ḩ */
-	0x1e2b, 499,	/* ḫ Ḫ */
-	0x1e2d, 499,	/* ḭ Ḭ */
-	0x1e2f, 499,	/* ḯ Ḯ */
-	0x1e31, 499,	/* ḱ Ḱ */
-	0x1e33, 499,	/* ḳ Ḳ */
-	0x1e35, 499,	/* ḵ Ḵ */
-	0x1e37, 499,	/* ḷ Ḷ */
-	0x1e39, 499,	/* ḹ Ḹ */
-	0x1e3b, 499,	/* ḻ Ḻ */
-	0x1e3d, 499,	/* ḽ Ḽ */
-	0x1e3f, 499,	/* ḿ Ḿ */
-	0x1e41, 499,	/* ṁ Ṁ */
-	0x1e43, 499,	/* ṃ Ṃ */
-	0x1e45, 499,	/* ṅ Ṅ */
-	0x1e47, 499,	/* ṇ Ṇ */
-	0x1e49, 499,	/* ṉ Ṉ */
-	0x1e4b, 499,	/* ṋ Ṋ */
-	0x1e4d, 499,	/* ṍ Ṍ */
-	0x1e4f, 499,	/* ṏ Ṏ */
-	0x1e51, 499,	/* ṑ Ṑ */
-	0x1e53, 499,	/* ṓ Ṓ */
-	0x1e55, 499,	/* ṕ Ṕ */
-	0x1e57, 499,	/* ṗ Ṗ */
-	0x1e59, 499,	/* ṙ Ṙ */
-	0x1e5b, 499,	/* ṛ Ṛ */
-	0x1e5d, 499,	/* ṝ Ṝ */
-	0x1e5f, 499,	/* ṟ Ṟ */
-	0x1e61, 499,	/* ṡ Ṡ */
-	0x1e63, 499,	/* ṣ Ṣ */
-	0x1e65, 499,	/* ṥ Ṥ */
-	0x1e67, 499,	/* ṧ Ṧ */
-	0x1e69, 499,	/* ṩ Ṩ */
-	0x1e6b, 499,	/* ṫ Ṫ */
-	0x1e6d, 499,	/* ṭ Ṭ */
-	0x1e6f, 499,	/* ṯ Ṯ */
-	0x1e71, 499,	/* ṱ Ṱ */
-	0x1e73, 499,	/* ṳ Ṳ */
-	0x1e75, 499,	/* ṵ Ṵ */
-	0x1e77, 499,	/* ṷ Ṷ */
-	0x1e79, 499,	/* ṹ Ṹ */
-	0x1e7b, 499,	/* ṻ Ṻ */
-	0x1e7d, 499,	/* ṽ Ṽ */
-	0x1e7f, 499,	/* ṿ Ṿ */
-	0x1e81, 499,	/* ẁ Ẁ */
-	0x1e83, 499,	/* ẃ Ẃ */
-	0x1e85, 499,	/* ẅ Ẅ */
-	0x1e87, 499,	/* ẇ Ẇ */
-	0x1e89, 499,	/* ẉ Ẉ */
-	0x1e8b, 499,	/* ẋ Ẋ */
-	0x1e8d, 499,	/* ẍ Ẍ */
-	0x1e8f, 499,	/* ẏ Ẏ */
-	0x1e91, 499,	/* ẑ Ẑ */
-	0x1e93, 499,	/* ẓ Ẓ */
-	0x1e95, 499,	/* ẕ Ẕ */
-	0x1ea1, 499,	/* ạ Ạ */
-	0x1ea3, 499,	/* ả Ả */
-	0x1ea5, 499,	/* ấ Ấ */
-	0x1ea7, 499,	/* ầ Ầ */
-	0x1ea9, 499,	/* ẩ Ẩ */
-	0x1eab, 499,	/* ẫ Ẫ */
-	0x1ead, 499,	/* ậ Ậ */
-	0x1eaf, 499,	/* ắ Ắ */
-	0x1eb1, 499,	/* ằ Ằ */
-	0x1eb3, 499,	/* ẳ Ẳ */
-	0x1eb5, 499,	/* ẵ Ẵ */
-	0x1eb7, 499,	/* ặ Ặ */
-	0x1eb9, 499,	/* ẹ Ẹ */
-	0x1ebb, 499,	/* ẻ Ẻ */
-	0x1ebd, 499,	/* ẽ Ẽ */
-	0x1ebf, 499,	/* ế Ế */
-	0x1ec1, 499,	/* ề Ề */
-	0x1ec3, 499,	/* ể Ể */
-	0x1ec5, 499,	/* ễ Ễ */
-	0x1ec7, 499,	/* ệ Ệ */
-	0x1ec9, 499,	/* ỉ Ỉ */
-	0x1ecb, 499,	/* ị Ị */
-	0x1ecd, 499,	/* ọ Ọ */
-	0x1ecf, 499,	/* ỏ Ỏ */
-	0x1ed1, 499,	/* ố Ố */
-	0x1ed3, 499,	/* ồ Ồ */
-	0x1ed5, 499,	/* ổ Ổ */
-	0x1ed7, 499,	/* ỗ Ỗ */
-	0x1ed9, 499,	/* ộ Ộ */
-	0x1edb, 499,	/* ớ Ớ */
-	0x1edd, 499,	/* ờ Ờ */
-	0x1edf, 499,	/* ở Ở */
-	0x1ee1, 499,	/* ỡ Ỡ */
-	0x1ee3, 499,	/* ợ Ợ */
-	0x1ee5, 499,	/* ụ Ụ */
-	0x1ee7, 499,	/* ủ Ủ */
-	0x1ee9, 499,	/* ứ Ứ */
-	0x1eeb, 499,	/* ừ Ừ */
-	0x1eed, 499,	/* ử Ử */
-	0x1eef, 499,	/* ữ Ữ */
-	0x1ef1, 499,	/* ự Ự */
-	0x1ef3, 499,	/* ỳ Ỳ */
-	0x1ef5, 499,	/* ỵ Ỵ */
-	0x1ef7, 499,	/* ỷ Ỷ */
-	0x1ef9, 499,	/* ỹ Ỹ */
-	0x1f51, 508,	/* ὑ Ὑ */
-	0x1f53, 508,	/* ὓ Ὓ */
-	0x1f55, 508,	/* ὕ Ὕ */
-	0x1f57, 508,	/* ὗ Ὗ */
-	0x1fb3, 509,	/* ᾳ ᾼ */
-	0x1fc3, 509,	/* ῃ ῌ */
-	0x1fe5, 507,	/* ῥ Ῥ */
-	0x1ff3, 509,	/* ῳ ῼ */
-};
-
-/*
- * upper case ranges
- *	3rd col is conversion excess 500
- */
-static
-Rune	__tolower2[] =
-{
-	0x0041,	0x005a, 532,	/* A-Z a-z */
-	0x00c0,	0x00d6, 532,	/* À-Ö à-ö */
-	0x00d8,	0x00de, 532,	/* Ø-Þ ø-þ */
-	0x0189,	0x018a, 705,	/* Ɖ-Ɗ ɖ-ɗ */
-	0x018e,	0x018f, 702,	/* Ǝ-Ə ɘ-ə */
-	0x01b1,	0x01b2, 717,	/* Ʊ-Ʋ ʊ-ʋ */
-	0x0388,	0x038a, 537,	/* Έ-Ί έ-ί */
-	0x038e,	0x038f, 563,	/* Ύ-Ώ ύ-ώ */
-	0x0391,	0x03a1, 532,	/* Α-Ρ α-ρ */
-	0x03a3,	0x03ab, 532,	/* Σ-Ϋ σ-ϋ */
-	0x0401,	0x040c, 580,	/* Ё-Ќ ё-ќ */
-	0x040e,	0x040f, 580,	/* Ў-Џ ў-џ */
-	0x0410,	0x042f, 532,	/* А-Я а-я */
-	0x0531,	0x0556, 548,	/* Ա-Ֆ ա-ֆ */
-	0x10a0,	0x10c5, 548,	/* Ⴀ-Ⴥ ა-ჵ */
-	0x1f08,	0x1f0f, 492,	/* Ἀ-Ἇ ἀ-ἇ */
-	0x1f18,	0x1f1d, 492,	/* Ἐ-Ἕ ἐ-ἕ */
-	0x1f28,	0x1f2f, 492,	/* Ἠ-Ἧ ἠ-ἧ */
-	0x1f38,	0x1f3f, 492,	/* Ἰ-Ἷ ἰ-ἷ */
-	0x1f48,	0x1f4d, 492,	/* Ὀ-Ὅ ὀ-ὅ */
-	0x1f68,	0x1f6f, 492,	/* Ὠ-Ὧ ὠ-ὧ */
-	0x1f88,	0x1f8f, 492,	/* ᾈ-ᾏ ᾀ-ᾇ */
-	0x1f98,	0x1f9f, 492,	/* ᾘ-ᾟ ᾐ-ᾗ */
-	0x1fa8,	0x1faf, 492,	/* ᾨ-ᾯ ᾠ-ᾧ */
-	0x1fb8,	0x1fb9, 492,	/* Ᾰ-Ᾱ ᾰ-ᾱ */
-	0x1fba,	0x1fbb, 426,	/* Ὰ-Ά ὰ-ά */
-	0x1fc8,	0x1fcb, 414,	/* Ὲ-Ή ὲ-ή */
-	0x1fd8,	0x1fd9, 492,	/* Ῐ-Ῑ ῐ-ῑ */
-	0x1fda,	0x1fdb, 400,	/* Ὶ-Ί ὶ-ί */
-	0x1fe8,	0x1fe9, 492,	/* Ῠ-Ῡ ῠ-ῡ */
-	0x1fea,	0x1feb, 388,	/* Ὺ-Ύ ὺ-ύ */
-	0x1ff8,	0x1ff9, 372,	/* Ὸ-Ό ὸ-ό */
-	0x1ffa,	0x1ffb, 374,	/* Ὼ-Ώ ὼ-ώ */
-	0x2160,	0x216f, 516,	/* Ⅰ-Ⅿ ⅰ-ⅿ */
-	0x24b6,	0x24cf, 526,	/* Ⓐ-Ⓩ ⓐ-ⓩ */
-	0xff21,	0xff3a, 532,	/* A-Z a-z */
-};
-
-/*
- * upper case singlets
- *	2nd col is conversion excess 500
- */
-static
-Rune	__tolower1[] =
-{
-	0x0100, 501,	/* Ā ā */
-	0x0102, 501,	/* Ă ă */
-	0x0104, 501,	/* Ą ą */
-	0x0106, 501,	/* Ć ć */
-	0x0108, 501,	/* Ĉ ĉ */
-	0x010a, 501,	/* Ċ ċ */
-	0x010c, 501,	/* Č č */
-	0x010e, 501,	/* Ď ď */
-	0x0110, 501,	/* Đ đ */
-	0x0112, 501,	/* Ē ē */
-	0x0114, 501,	/* Ĕ ĕ */
-	0x0116, 501,	/* Ė ė */
-	0x0118, 501,	/* Ę ę */
-	0x011a, 501,	/* Ě ě */
-	0x011c, 501,	/* Ĝ ĝ */
-	0x011e, 501,	/* Ğ ğ */
-	0x0120, 501,	/* Ġ ġ */
-	0x0122, 501,	/* Ģ ģ */
-	0x0124, 501,	/* Ĥ ĥ */
-	0x0126, 501,	/* Ħ ħ */
-	0x0128, 501,	/* Ĩ ĩ */
-	0x012a, 501,	/* Ī ī */
-	0x012c, 501,	/* Ĭ ĭ */
-	0x012e, 501,	/* Į į */
-	0x0130, 301,	/* İ i */
-	0x0132, 501,	/* IJ ij */
-	0x0134, 501,	/* Ĵ ĵ */
-	0x0136, 501,	/* Ķ ķ */
-	0x0139, 501,	/* Ĺ ĺ */
-	0x013b, 501,	/* Ļ ļ */
-	0x013d, 501,	/* Ľ ľ */
-	0x013f, 501,	/* Ŀ ŀ */
-	0x0141, 501,	/* Ł ł */
-	0x0143, 501,	/* Ń ń */
-	0x0145, 501,	/* Ņ ņ */
-	0x0147, 501,	/* Ň ň */
-	0x014a, 501,	/* Ŋ ŋ */
-	0x014c, 501,	/* Ō ō */
-	0x014e, 501,	/* Ŏ ŏ */
-	0x0150, 501,	/* Ő ő */
-	0x0152, 501,	/* Œ œ */
-	0x0154, 501,	/* Ŕ ŕ */
-	0x0156, 501,	/* Ŗ ŗ */
-	0x0158, 501,	/* Ř ř */
-	0x015a, 501,	/* Ś ś */
-	0x015c, 501,	/* Ŝ ŝ */
-	0x015e, 501,	/* Ş ş */
-	0x0160, 501,	/* Š š */
-	0x0162, 501,	/* Ţ ţ */
-	0x0164, 501,	/* Ť ť */
-	0x0166, 501,	/* Ŧ ŧ */
-	0x0168, 501,	/* Ũ ũ */
-	0x016a, 501,	/* Ū ū */
-	0x016c, 501,	/* Ŭ ŭ */
-	0x016e, 501,	/* Ů ů */
-	0x0170, 501,	/* Ű ű */
-	0x0172, 501,	/* Ų ų */
-	0x0174, 501,	/* Ŵ ŵ */
-	0x0176, 501,	/* Ŷ ŷ */
-	0x0178, 379,	/* Ÿ ÿ */
-	0x0179, 501,	/* Ź ź */
-	0x017b, 501,	/* Ż ż */
-	0x017d, 501,	/* Ž ž */
-	0x0181, 710,	/* Ɓ ɓ */
-	0x0182, 501,	/* Ƃ ƃ */
-	0x0184, 501,	/* Ƅ ƅ */
-	0x0186, 706,	/* Ɔ ɔ */
-	0x0187, 501,	/* Ƈ ƈ */
-	0x018b, 501,	/* Ƌ ƌ */
-	0x0190, 703,	/* Ɛ ɛ */
-	0x0191, 501,	/* Ƒ ƒ */
-	0x0193, 705,	/* Ɠ ɠ */
-	0x0194, 707,	/* Ɣ ɣ */
-	0x0196, 711,	/* Ɩ ɩ */
-	0x0197, 709,	/* Ɨ ɨ */
-	0x0198, 501,	/* Ƙ ƙ */
-	0x019c, 711,	/* Ɯ ɯ */
-	0x019d, 713,	/* Ɲ ɲ */
-	0x01a0, 501,	/* Ơ ơ */
-	0x01a2, 501,	/* Ƣ ƣ */
-	0x01a4, 501,	/* Ƥ ƥ */
-	0x01a7, 501,	/* Ƨ ƨ */
-	0x01a9, 718,	/* Ʃ ʃ */
-	0x01ac, 501,	/* Ƭ ƭ */
-	0x01ae, 718,	/* Ʈ ʈ */
-	0x01af, 501,	/* Ư ư */
-	0x01b3, 501,	/* Ƴ ƴ */
-	0x01b5, 501,	/* Ƶ ƶ */
-	0x01b7, 719,	/* Ʒ ʒ */
-	0x01b8, 501,	/* Ƹ ƹ */
-	0x01bc, 501,	/* Ƽ ƽ */
-	0x01c4, 502,	/* DŽ dž */
-	0x01c5, 501,	/* Dž dž */
-	0x01c7, 502,	/* LJ lj */
-	0x01c8, 501,	/* Lj lj */
-	0x01ca, 502,	/* NJ nj */
-	0x01cb, 501,	/* Nj nj */
-	0x01cd, 501,	/* Ǎ ǎ */
-	0x01cf, 501,	/* Ǐ ǐ */
-	0x01d1, 501,	/* Ǒ ǒ */
-	0x01d3, 501,	/* Ǔ ǔ */
-	0x01d5, 501,	/* Ǖ ǖ */
-	0x01d7, 501,	/* Ǘ ǘ */
-	0x01d9, 501,	/* Ǚ ǚ */
-	0x01db, 501,	/* Ǜ ǜ */
-	0x01de, 501,	/* Ǟ ǟ */
-	0x01e0, 501,	/* Ǡ ǡ */
-	0x01e2, 501,	/* Ǣ ǣ */
-	0x01e4, 501,	/* Ǥ ǥ */
-	0x01e6, 501,	/* Ǧ ǧ */
-	0x01e8, 501,	/* Ǩ ǩ */
-	0x01ea, 501,	/* Ǫ ǫ */
-	0x01ec, 501,	/* Ǭ ǭ */
-	0x01ee, 501,	/* Ǯ ǯ */
-	0x01f1, 502,	/* DZ dz */
-	0x01f2, 501,	/* Dz dz */
-	0x01f4, 501,	/* Ǵ ǵ */
-	0x01fa, 501,	/* Ǻ ǻ */
-	0x01fc, 501,	/* Ǽ ǽ */
-	0x01fe, 501,	/* Ǿ ǿ */
-	0x0200, 501,	/* Ȁ ȁ */
-	0x0202, 501,	/* Ȃ ȃ */
-	0x0204, 501,	/* Ȅ ȅ */
-	0x0206, 501,	/* Ȇ ȇ */
-	0x0208, 501,	/* Ȉ ȉ */
-	0x020a, 501,	/* Ȋ ȋ */
-	0x020c, 501,	/* Ȍ ȍ */
-	0x020e, 501,	/* Ȏ ȏ */
-	0x0210, 501,	/* Ȑ ȑ */
-	0x0212, 501,	/* Ȓ ȓ */
-	0x0214, 501,	/* Ȕ ȕ */
-	0x0216, 501,	/* Ȗ ȗ */
-	0x0386, 538,	/* Ά ά */
-	0x038c, 564,	/* Ό ό */
-	0x03e2, 501,	/* Ϣ ϣ */
-	0x03e4, 501,	/* Ϥ ϥ */
-	0x03e6, 501,	/* Ϧ ϧ */
-	0x03e8, 501,	/* Ϩ ϩ */
-	0x03ea, 501,	/* Ϫ ϫ */
-	0x03ec, 501,	/* Ϭ ϭ */
-	0x03ee, 501,	/* Ϯ ϯ */
-	0x0460, 501,	/* Ѡ ѡ */
-	0x0462, 501,	/* Ѣ ѣ */
-	0x0464, 501,	/* Ѥ ѥ */
-	0x0466, 501,	/* Ѧ ѧ */
-	0x0468, 501,	/* Ѩ ѩ */
-	0x046a, 501,	/* Ѫ ѫ */
-	0x046c, 501,	/* Ѭ ѭ */
-	0x046e, 501,	/* Ѯ ѯ */
-	0x0470, 501,	/* Ѱ ѱ */
-	0x0472, 501,	/* Ѳ ѳ */
-	0x0474, 501,	/* Ѵ ѵ */
-	0x0476, 501,	/* Ѷ ѷ */
-	0x0478, 501,	/* Ѹ ѹ */
-	0x047a, 501,	/* Ѻ ѻ */
-	0x047c, 501,	/* Ѽ ѽ */
-	0x047e, 501,	/* Ѿ ѿ */
-	0x0480, 501,	/* Ҁ ҁ */
-	0x0490, 501,	/* Ґ ґ */
-	0x0492, 501,	/* Ғ ғ */
-	0x0494, 501,	/* Ҕ ҕ */
-	0x0496, 501,	/* Җ җ */
-	0x0498, 501,	/* Ҙ ҙ */
-	0x049a, 501,	/* Қ қ */
-	0x049c, 501,	/* Ҝ ҝ */
-	0x049e, 501,	/* Ҟ ҟ */
-	0x04a0, 501,	/* Ҡ ҡ */
-	0x04a2, 501,	/* Ң ң */
-	0x04a4, 501,	/* Ҥ ҥ */
-	0x04a6, 501,	/* Ҧ ҧ */
-	0x04a8, 501,	/* Ҩ ҩ */
-	0x04aa, 501,	/* Ҫ ҫ */
-	0x04ac, 501,	/* Ҭ ҭ */
-	0x04ae, 501,	/* Ү ү */
-	0x04b0, 501,	/* Ұ ұ */
-	0x04b2, 501,	/* Ҳ ҳ */
-	0x04b4, 501,	/* Ҵ ҵ */
-	0x04b6, 501,	/* Ҷ ҷ */
-	0x04b8, 501,	/* Ҹ ҹ */
-	0x04ba, 501,	/* Һ һ */
-	0x04bc, 501,	/* Ҽ ҽ */
-	0x04be, 501,	/* Ҿ ҿ */
-	0x04c1, 501,	/* Ӂ ӂ */
-	0x04c3, 501,	/* Ӄ ӄ */
-	0x04c7, 501,	/* Ӈ ӈ */
-	0x04cb, 501,	/* Ӌ ӌ */
-	0x04d0, 501,	/* Ӑ ӑ */
-	0x04d2, 501,	/* Ӓ ӓ */
-	0x04d4, 501,	/* Ӕ ӕ */
-	0x04d6, 501,	/* Ӗ ӗ */
-	0x04d8, 501,	/* Ә ә */
-	0x04da, 501,	/* Ӛ ӛ */
-	0x04dc, 501,	/* Ӝ ӝ */
-	0x04de, 501,	/* Ӟ ӟ */
-	0x04e0, 501,	/* Ӡ ӡ */
-	0x04e2, 501,	/* Ӣ ӣ */
-	0x04e4, 501,	/* Ӥ ӥ */
-	0x04e6, 501,	/* Ӧ ӧ */
-	0x04e8, 501,	/* Ө ө */
-	0x04ea, 501,	/* Ӫ ӫ */
-	0x04ee, 501,	/* Ӯ ӯ */
-	0x04f0, 501,	/* Ӱ ӱ */
-	0x04f2, 501,	/* Ӳ ӳ */
-	0x04f4, 501,	/* Ӵ ӵ */
-	0x04f8, 501,	/* Ӹ ӹ */
-	0x1e00, 501,	/* Ḁ ḁ */
-	0x1e02, 501,	/* Ḃ ḃ */
-	0x1e04, 501,	/* Ḅ ḅ */
-	0x1e06, 501,	/* Ḇ ḇ */
-	0x1e08, 501,	/* Ḉ ḉ */
-	0x1e0a, 501,	/* Ḋ ḋ */
-	0x1e0c, 501,	/* Ḍ ḍ */
-	0x1e0e, 501,	/* Ḏ ḏ */
-	0x1e10, 501,	/* Ḑ ḑ */
-	0x1e12, 501,	/* Ḓ ḓ */
-	0x1e14, 501,	/* Ḕ ḕ */
-	0x1e16, 501,	/* Ḗ ḗ */
-	0x1e18, 501,	/* Ḙ ḙ */
-	0x1e1a, 501,	/* Ḛ ḛ */
-	0x1e1c, 501,	/* Ḝ ḝ */
-	0x1e1e, 501,	/* Ḟ ḟ */
-	0x1e20, 501,	/* Ḡ ḡ */
-	0x1e22, 501,	/* Ḣ ḣ */
-	0x1e24, 501,	/* Ḥ ḥ */
-	0x1e26, 501,	/* Ḧ ḧ */
-	0x1e28, 501,	/* Ḩ ḩ */
-	0x1e2a, 501,	/* Ḫ ḫ */
-	0x1e2c, 501,	/* Ḭ ḭ */
-	0x1e2e, 501,	/* Ḯ ḯ */
-	0x1e30, 501,	/* Ḱ ḱ */
-	0x1e32, 501,	/* Ḳ ḳ */
-	0x1e34, 501,	/* Ḵ ḵ */
-	0x1e36, 501,	/* Ḷ ḷ */
-	0x1e38, 501,	/* Ḹ ḹ */
-	0x1e3a, 501,	/* Ḻ ḻ */
-	0x1e3c, 501,	/* Ḽ ḽ */
-	0x1e3e, 501,	/* Ḿ ḿ */
-	0x1e40, 501,	/* Ṁ ṁ */
-	0x1e42, 501,	/* Ṃ ṃ */
-	0x1e44, 501,	/* Ṅ ṅ */
-	0x1e46, 501,	/* Ṇ ṇ */
-	0x1e48, 501,	/* Ṉ ṉ */
-	0x1e4a, 501,	/* Ṋ ṋ */
-	0x1e4c, 501,	/* Ṍ ṍ */
-	0x1e4e, 501,	/* Ṏ ṏ */
-	0x1e50, 501,	/* Ṑ ṑ */
-	0x1e52, 501,	/* Ṓ ṓ */
-	0x1e54, 501,	/* Ṕ ṕ */
-	0x1e56, 501,	/* Ṗ ṗ */
-	0x1e58, 501,	/* Ṙ ṙ */
-	0x1e5a, 501,	/* Ṛ ṛ */
-	0x1e5c, 501,	/* Ṝ ṝ */
-	0x1e5e, 501,	/* Ṟ ṟ */
-	0x1e60, 501,	/* Ṡ ṡ */
-	0x1e62, 501,	/* Ṣ ṣ */
-	0x1e64, 501,	/* Ṥ ṥ */
-	0x1e66, 501,	/* Ṧ ṧ */
-	0x1e68, 501,	/* Ṩ ṩ */
-	0x1e6a, 501,	/* Ṫ ṫ */
-	0x1e6c, 501,	/* Ṭ ṭ */
-	0x1e6e, 501,	/* Ṯ ṯ */
-	0x1e70, 501,	/* Ṱ ṱ */
-	0x1e72, 501,	/* Ṳ ṳ */
-	0x1e74, 501,	/* Ṵ ṵ */
-	0x1e76, 501,	/* Ṷ ṷ */
-	0x1e78, 501,	/* Ṹ ṹ */
-	0x1e7a, 501,	/* Ṻ ṻ */
-	0x1e7c, 501,	/* Ṽ ṽ */
-	0x1e7e, 501,	/* Ṿ ṿ */
-	0x1e80, 501,	/* Ẁ ẁ */
-	0x1e82, 501,	/* Ẃ ẃ */
-	0x1e84, 501,	/* Ẅ ẅ */
-	0x1e86, 501,	/* Ẇ ẇ */
-	0x1e88, 501,	/* Ẉ ẉ */
-	0x1e8a, 501,	/* Ẋ ẋ */
-	0x1e8c, 501,	/* Ẍ ẍ */
-	0x1e8e, 501,	/* Ẏ ẏ */
-	0x1e90, 501,	/* Ẑ ẑ */
-	0x1e92, 501,	/* Ẓ ẓ */
-	0x1e94, 501,	/* Ẕ ẕ */
-	0x1ea0, 501,	/* Ạ ạ */
-	0x1ea2, 501,	/* Ả ả */
-	0x1ea4, 501,	/* Ấ ấ */
-	0x1ea6, 501,	/* Ầ ầ */
-	0x1ea8, 501,	/* Ẩ ẩ */
-	0x1eaa, 501,	/* Ẫ ẫ */
-	0x1eac, 501,	/* Ậ ậ */
-	0x1eae, 501,	/* Ắ ắ */
-	0x1eb0, 501,	/* Ằ ằ */
-	0x1eb2, 501,	/* Ẳ ẳ */
-	0x1eb4, 501,	/* Ẵ ẵ */
-	0x1eb6, 501,	/* Ặ ặ */
-	0x1eb8, 501,	/* Ẹ ẹ */
-	0x1eba, 501,	/* Ẻ ẻ */
-	0x1ebc, 501,	/* Ẽ ẽ */
-	0x1ebe, 501,	/* Ế ế */
-	0x1ec0, 501,	/* Ề ề */
-	0x1ec2, 501,	/* Ể ể */
-	0x1ec4, 501,	/* Ễ ễ */
-	0x1ec6, 501,	/* Ệ ệ */
-	0x1ec8, 501,	/* Ỉ ỉ */
-	0x1eca, 501,	/* Ị ị */
-	0x1ecc, 501,	/* Ọ ọ */
-	0x1ece, 501,	/* Ỏ ỏ */
-	0x1ed0, 501,	/* Ố ố */
-	0x1ed2, 501,	/* Ồ ồ */
-	0x1ed4, 501,	/* Ổ ổ */
-	0x1ed6, 501,	/* Ỗ ỗ */
-	0x1ed8, 501,	/* Ộ ộ */
-	0x1eda, 501,	/* Ớ ớ */
-	0x1edc, 501,	/* Ờ ờ */
-	0x1ede, 501,	/* Ở ở */
-	0x1ee0, 501,	/* Ỡ ỡ */
-	0x1ee2, 501,	/* Ợ ợ */
-	0x1ee4, 501,	/* Ụ ụ */
-	0x1ee6, 501,	/* Ủ ủ */
-	0x1ee8, 501,	/* Ứ ứ */
-	0x1eea, 501,	/* Ừ ừ */
-	0x1eec, 501,	/* Ử ử */
-	0x1eee, 501,	/* Ữ ữ */
-	0x1ef0, 501,	/* Ự ự */
-	0x1ef2, 501,	/* Ỳ ỳ */
-	0x1ef4, 501,	/* Ỵ ỵ */
-	0x1ef6, 501,	/* Ỷ ỷ */
-	0x1ef8, 501,	/* Ỹ ỹ */
-	0x1f59, 492,	/* Ὑ ὑ */
-	0x1f5b, 492,	/* Ὓ ὓ */
-	0x1f5d, 492,	/* Ὕ ὕ */
-	0x1f5f, 492,	/* Ὗ ὗ */
-	0x1fbc, 491,	/* ᾼ ᾳ */
-	0x1fcc, 491,	/* ῌ ῃ */
-	0x1fec, 493,	/* Ῥ ῥ */
-	0x1ffc, 491,	/* ῼ ῳ */
-};
-
-/*
- * title characters are those between
- * upper and lower case. ie DZ Dz dz
- */
-static
-Rune	__totitle1[] =
-{
-	0x01c4, 501,	/* DŽ Dž */
-	0x01c6, 499,	/* dž Dž */
-	0x01c7, 501,	/* LJ Lj */
-	0x01c9, 499,	/* lj Lj */
-	0x01ca, 501,	/* NJ Nj */
-	0x01cc, 499,	/* nj Nj */
-	0x01f1, 501,	/* DZ Dz */
-	0x01f3, 499,	/* dz Dz */
-};
-
-static Rune*
-bsearch(Rune c, Rune *t, int n, int ne)
+Rune*
+rbsearch(Rune c, Rune *t, int n, int ne)
 {
 	Rune *p;
 	int m;
 
 	while(n > 1) {
-		m = n/2;
+		m = n >> 1;
 		p = t + m*ne;
 		if(c >= p[0]) {
 			t = p;
@@ -1050,102 +35,36 @@
 	return 0;
 }
 
-Rune
-tolowerrune(Rune c)
-{
-	Rune *p;
-
-	p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3);
-	if(p && c >= p[0] && c <= p[1])
-		return c + p[2] - 500;
-	p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2);
-	if(p && c == p[0])
-		return c + p[1] - 500;
-	return c;
-}
-
-Rune
-toupperrune(Rune c)
-{
-	Rune *p;
-
-	p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3);
-	if(p && c >= p[0] && c <= p[1])
-		return c + p[2] - 500;
-	p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2);
-	if(p && c == p[0])
-		return c + p[1] - 500;
-	return c;
-}
-
-Rune
-totitlerune(Rune c)
-{
-	Rune *p;
-
-	p = bsearch(c, __totitle1, nelem(__totitle1)/2, 2);
-	if(p && c == p[0])
-		return c + p[1] - 500;
-	return c;
-}
+/*
+ * The "ideographic" property is hard to extract from UnicodeData.txt,
+ * so it is hard coded here.
+ *
+ * It is defined in the Unicode PropList.txt file, for example
+ * PropList-3.0.0.txt.  Unlike the UnicodeData.txt file, the format of
+ * PropList changes between versions.  This property appears relatively static;
+ * it is the same in version 4.0.1, except that version defines some >16 bit
+ * chars as ideographic as well: 20000..2a6d6, and 2f800..2Fa1d.
+ */
+static Rune __isideographicr[] = {
+	0x3006, 0x3007,			/* 3006 not in Unicode 2, in 2.1 */
+	0x3021, 0x3029,
+	0x3038, 0x303a,			/* not in Unicode 2 or 2.1 */
+	0x3400, 0x4db5,			/* not in Unicode 2 or 2.1 */
+	0x4e00, 0x9fbb,			/* 0x9FA6..0x9FBB added for 4.1.0? */
+	0xf900, 0xfa2d,
+        0x20000, 0x2A6D6,
+        0x2F800, 0x2FA1D,
+};
 
 int
-islowerrune(Rune c)
+isideographicrune(Rune c)
 {
 	Rune *p;
 
-	p = bsearch(c, __toupper2, nelem(__toupper2)/3, 3);
+	p = rbsearch(c, __isideographicr, nelem(__isideographicr)/2, 2);
 	if(p && c >= p[0] && c <= p[1])
 		return 1;
-	p = bsearch(c, __toupper1, nelem(__toupper1)/2, 2);
-	if(p && c == p[0])
-		return 1;
 	return 0;
 }
 
-int
-isupperrune(Rune c)
-{
-	Rune *p;
-
-	p = bsearch(c, __tolower2, nelem(__tolower2)/3, 3);
-	if(p && c >= p[0] && c <= p[1])
-		return 1;
-	p = bsearch(c, __tolower1, nelem(__tolower1)/2, 2);
-	if(p && c == p[0])
-		return 1;
-	return 0;
-}
-
-int
-isalpharune(Rune c)
-{
-	Rune *p;
-
-	if(isupperrune(c) || islowerrune(c))
-		return 1;
-	p = bsearch(c, __alpha2, nelem(__alpha2)/2, 2);
-	if(p && c >= p[0] && c <= p[1])
-		return 1;
-	p = bsearch(c, __alpha1, nelem(__alpha1), 1);
-	if(p && c == p[0])
-		return 1;
-	return 0;
-}
-
-int
-istitlerune(Rune c)
-{
-	return isupperrune(c) && islowerrune(c);
-}
-
-int
-isspacerune(Rune c)
-{
-	Rune *p;
-
-	p = bsearch(c, __space2, nelem(__space2)/2, 2);
-	if(p && c >= p[0] && c <= p[1])
-		return 1;
-	return 0;
-}
+#include "runetypebody-5.0.0.h"
diff --git a/runetypebody-5.0.0.h b/runetypebody-5.0.0.h
new file mode 100644
index 0000000..67a645d
--- /dev/null
+++ b/runetypebody-5.0.0.h
@@ -0,0 +1,1361 @@
+/* generated automatically by mkrunetype.c from UnicodeData-5.0.0.txt */
+
+static Rune __isspacer[] = {
+	0x0009, 0x000d,
+	0x0020, 0x0020,
+	0x0085, 0x0085,
+	0x00a0, 0x00a0,
+	0x1680, 0x1680,
+	0x180e, 0x180e,
+	0x2000, 0x200a,
+	0x2028, 0x2029,
+	0x202f, 0x202f,
+	0x205f, 0x205f,
+	0x3000, 0x3000,
+	0xfeff, 0xfeff,
+};
+
+int
+isspacerune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __isspacer, nelem(__isspacer)/2, 2);
+	if(p && c >= p[0] && c <= p[1])
+		return 1;
+	return 0;
+}
+
+static Rune __isdigitr[] = {
+	0x0030, 0x0039,
+	0x0660, 0x0669,
+	0x06f0, 0x06f9,
+	0x07c0, 0x07c9,
+	0x0966, 0x096f,
+	0x09e6, 0x09ef,
+	0x0a66, 0x0a6f,
+	0x0ae6, 0x0aef,
+	0x0b66, 0x0b6f,
+	0x0be6, 0x0bef,
+	0x0c66, 0x0c6f,
+	0x0ce6, 0x0cef,
+	0x0d66, 0x0d6f,
+	0x0e50, 0x0e59,
+	0x0ed0, 0x0ed9,
+	0x0f20, 0x0f29,
+	0x1040, 0x1049,
+	0x17e0, 0x17e9,
+	0x1810, 0x1819,
+	0x1946, 0x194f,
+	0x19d0, 0x19d9,
+	0x1b50, 0x1b59,
+	0xff10, 0xff19,
+	0x104a0, 0x104a9,
+	0x1d7ce, 0x1d7ff,
+};
+
+int
+isdigitrune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __isdigitr, nelem(__isdigitr)/2, 2);
+	if(p && c >= p[0] && c <= p[1])
+		return 1;
+	return 0;
+}
+
+static Rune __isalphar[] = {
+	0x0041, 0x005a,
+	0x0061, 0x007a,
+	0x00c0, 0x00d6,
+	0x00d8, 0x00f6,
+	0x00f8, 0x02c1,
+	0x02c6, 0x02d1,
+	0x02e0, 0x02e4,
+	0x037a, 0x037d,
+	0x0388, 0x038a,
+	0x038e, 0x03a1,
+	0x03a3, 0x03ce,
+	0x03d0, 0x03f5,
+	0x03f7, 0x0481,
+	0x048a, 0x0513,
+	0x0531, 0x0556,
+	0x0561, 0x0587,
+	0x05d0, 0x05ea,
+	0x05f0, 0x05f2,
+	0x0621, 0x063a,
+	0x0640, 0x064a,
+	0x066e, 0x066f,
+	0x0671, 0x06d3,
+	0x06e5, 0x06e6,
+	0x06ee, 0x06ef,
+	0x06fa, 0x06fc,
+	0x0712, 0x072f,
+	0x074d, 0x076d,
+	0x0780, 0x07a5,
+	0x07ca, 0x07ea,
+	0x07f4, 0x07f5,
+	0x0904, 0x0939,
+	0x0958, 0x0961,
+	0x097b, 0x097f,
+	0x0985, 0x098c,
+	0x098f, 0x0990,
+	0x0993, 0x09a8,
+	0x09aa, 0x09b0,
+	0x09b6, 0x09b9,
+	0x09dc, 0x09dd,
+	0x09df, 0x09e1,
+	0x09f0, 0x09f1,
+	0x0a05, 0x0a0a,
+	0x0a0f, 0x0a10,
+	0x0a13, 0x0a28,
+	0x0a2a, 0x0a30,
+	0x0a32, 0x0a33,
+	0x0a35, 0x0a36,
+	0x0a38, 0x0a39,
+	0x0a59, 0x0a5c,
+	0x0a72, 0x0a74,
+	0x0a85, 0x0a8d,
+	0x0a8f, 0x0a91,
+	0x0a93, 0x0aa8,
+	0x0aaa, 0x0ab0,
+	0x0ab2, 0x0ab3,
+	0x0ab5, 0x0ab9,
+	0x0ae0, 0x0ae1,
+	0x0b05, 0x0b0c,
+	0x0b0f, 0x0b10,
+	0x0b13, 0x0b28,
+	0x0b2a, 0x0b30,
+	0x0b32, 0x0b33,
+	0x0b35, 0x0b39,
+	0x0b5c, 0x0b5d,
+	0x0b5f, 0x0b61,
+	0x0b85, 0x0b8a,
+	0x0b8e, 0x0b90,
+	0x0b92, 0x0b95,
+	0x0b99, 0x0b9a,
+	0x0b9e, 0x0b9f,
+	0x0ba3, 0x0ba4,
+	0x0ba8, 0x0baa,
+	0x0bae, 0x0bb9,
+	0x0c05, 0x0c0c,
+	0x0c0e, 0x0c10,
+	0x0c12, 0x0c28,
+	0x0c2a, 0x0c33,
+	0x0c35, 0x0c39,
+	0x0c60, 0x0c61,
+	0x0c85, 0x0c8c,
+	0x0c8e, 0x0c90,
+	0x0c92, 0x0ca8,
+	0x0caa, 0x0cb3,
+	0x0cb5, 0x0cb9,
+	0x0ce0, 0x0ce1,
+	0x0d05, 0x0d0c,
+	0x0d0e, 0x0d10,
+	0x0d12, 0x0d28,
+	0x0d2a, 0x0d39,
+	0x0d60, 0x0d61,
+	0x0d85, 0x0d96,
+	0x0d9a, 0x0db1,
+	0x0db3, 0x0dbb,
+	0x0dc0, 0x0dc6,
+	0x0e01, 0x0e30,
+	0x0e32, 0x0e33,
+	0x0e40, 0x0e46,
+	0x0e81, 0x0e82,
+	0x0e87, 0x0e88,
+	0x0e94, 0x0e97,
+	0x0e99, 0x0e9f,
+	0x0ea1, 0x0ea3,
+	0x0eaa, 0x0eab,
+	0x0ead, 0x0eb0,
+	0x0eb2, 0x0eb3,
+	0x0ec0, 0x0ec4,
+	0x0edc, 0x0edd,
+	0x0f40, 0x0f47,
+	0x0f49, 0x0f6a,
+	0x0f88, 0x0f8b,
+	0x1000, 0x1021,
+	0x1023, 0x1027,
+	0x1029, 0x102a,
+	0x1050, 0x1055,
+	0x10a0, 0x10c5,
+	0x10d0, 0x10fa,
+	0x1100, 0x1159,
+	0x115f, 0x11a2,
+	0x11a8, 0x11f9,
+	0x1200, 0x1248,
+	0x124a, 0x124d,
+	0x1250, 0x1256,
+	0x125a, 0x125d,
+	0x1260, 0x1288,
+	0x128a, 0x128d,
+	0x1290, 0x12b0,
+	0x12b2, 0x12b5,
+	0x12b8, 0x12be,
+	0x12c2, 0x12c5,
+	0x12c8, 0x12d6,
+	0x12d8, 0x1310,
+	0x1312, 0x1315,
+	0x1318, 0x135a,
+	0x1380, 0x138f,
+	0x13a0, 0x13f4,
+	0x1401, 0x166c,
+	0x166f, 0x1676,
+	0x1681, 0x169a,
+	0x16a0, 0x16ea,
+	0x1700, 0x170c,
+	0x170e, 0x1711,
+	0x1720, 0x1731,
+	0x1740, 0x1751,
+	0x1760, 0x176c,
+	0x176e, 0x1770,
+	0x1780, 0x17b3,
+	0x1820, 0x1877,
+	0x1880, 0x18a8,
+	0x1900, 0x191c,
+	0x1950, 0x196d,
+	0x1970, 0x1974,
+	0x1980, 0x19a9,
+	0x19c1, 0x19c7,
+	0x1a00, 0x1a16,
+	0x1b05, 0x1b33,
+	0x1b45, 0x1b4b,
+	0x1d00, 0x1dbf,
+	0x1e00, 0x1e9b,
+	0x1ea0, 0x1ef9,
+	0x1f00, 0x1f15,
+	0x1f18, 0x1f1d,
+	0x1f20, 0x1f45,
+	0x1f48, 0x1f4d,
+	0x1f50, 0x1f57,
+	0x1f5f, 0x1f7d,
+	0x1f80, 0x1fb4,
+	0x1fb6, 0x1fbc,
+	0x1fc2, 0x1fc4,
+	0x1fc6, 0x1fcc,
+	0x1fd0, 0x1fd3,
+	0x1fd6, 0x1fdb,
+	0x1fe0, 0x1fec,
+	0x1ff2, 0x1ff4,
+	0x1ff6, 0x1ffc,
+	0x2090, 0x2094,
+	0x210a, 0x2113,
+	0x2119, 0x211d,
+	0x212a, 0x212d,
+	0x212f, 0x2139,
+	0x213c, 0x213f,
+	0x2145, 0x2149,
+	0x2183, 0x2184,
+	0x2c00, 0x2c2e,
+	0x2c30, 0x2c5e,
+	0x2c60, 0x2c6c,
+	0x2c74, 0x2c77,
+	0x2c80, 0x2ce4,
+	0x2d00, 0x2d25,
+	0x2d30, 0x2d65,
+	0x2d80, 0x2d96,
+	0x2da0, 0x2da6,
+	0x2da8, 0x2dae,
+	0x2db0, 0x2db6,
+	0x2db8, 0x2dbe,
+	0x2dc0, 0x2dc6,
+	0x2dc8, 0x2dce,
+	0x2dd0, 0x2dd6,
+	0x2dd8, 0x2dde,
+	0x3005, 0x3006,
+	0x3031, 0x3035,
+	0x303b, 0x303c,
+	0x3041, 0x3096,
+	0x309d, 0x309f,
+	0x30a1, 0x30fa,
+	0x30fc, 0x30ff,
+	0x3105, 0x312c,
+	0x3131, 0x318e,
+	0x31a0, 0x31b7,
+	0x31f0, 0x31ff,
+	0x3400, 0x4db5,
+	0x4e00, 0x9fbb,
+	0xa000, 0xa48c,
+	0xa717, 0xa71a,
+	0xa800, 0xa801,
+	0xa803, 0xa805,
+	0xa807, 0xa80a,
+	0xa80c, 0xa822,
+	0xa840, 0xa873,
+	0xac00, 0xd7a3,
+	0xf900, 0xfa2d,
+	0xfa30, 0xfa6a,
+	0xfa70, 0xfad9,
+	0xfb00, 0xfb06,
+	0xfb13, 0xfb17,
+	0xfb1f, 0xfb28,
+	0xfb2a, 0xfb36,
+	0xfb38, 0xfb3c,
+	0xfb40, 0xfb41,
+	0xfb43, 0xfb44,
+	0xfb46, 0xfbb1,
+	0xfbd3, 0xfd3d,
+	0xfd50, 0xfd8f,
+	0xfd92, 0xfdc7,
+	0xfdf0, 0xfdfb,
+	0xfe70, 0xfe74,
+	0xfe76, 0xfefc,
+	0xff21, 0xff3a,
+	0xff41, 0xff5a,
+	0xff66, 0xffbe,
+	0xffc2, 0xffc7,
+	0xffca, 0xffcf,
+	0xffd2, 0xffd7,
+	0xffda, 0xffdc,
+	0x10000, 0x1000b,
+	0x1000d, 0x10026,
+	0x10028, 0x1003a,
+	0x1003c, 0x1003d,
+	0x1003f, 0x1004d,
+	0x10050, 0x1005d,
+	0x10080, 0x100fa,
+	0x10300, 0x1031e,
+	0x10330, 0x10340,
+	0x10342, 0x10349,
+	0x10380, 0x1039d,
+	0x103a0, 0x103c3,
+	0x103c8, 0x103cf,
+	0x10400, 0x1049d,
+	0x10800, 0x10805,
+	0x1080a, 0x10835,
+	0x10837, 0x10838,
+	0x10900, 0x10915,
+	0x10a10, 0x10a13,
+	0x10a15, 0x10a17,
+	0x10a19, 0x10a33,
+	0x12000, 0x1236e,
+	0x1d400, 0x1d454,
+	0x1d456, 0x1d49c,
+	0x1d49e, 0x1d49f,
+	0x1d4a5, 0x1d4a6,
+	0x1d4a9, 0x1d4ac,
+	0x1d4ae, 0x1d4b9,
+	0x1d4bd, 0x1d4c3,
+	0x1d4c5, 0x1d505,
+	0x1d507, 0x1d50a,
+	0x1d50d, 0x1d514,
+	0x1d516, 0x1d51c,
+	0x1d51e, 0x1d539,
+	0x1d53b, 0x1d53e,
+	0x1d540, 0x1d544,
+	0x1d54a, 0x1d550,
+	0x1d552, 0x1d6a5,
+	0x1d6a8, 0x1d6c0,
+	0x1d6c2, 0x1d6da,
+	0x1d6dc, 0x1d6fa,
+	0x1d6fc, 0x1d714,
+	0x1d716, 0x1d734,
+	0x1d736, 0x1d74e,
+	0x1d750, 0x1d76e,
+	0x1d770, 0x1d788,
+	0x1d78a, 0x1d7a8,
+	0x1d7aa, 0x1d7c2,
+	0x1d7c4, 0x1d7cb,
+	0x20000, 0x2a6d6,
+	0x2f800, 0x2fa1d,
+};
+
+static Rune __isalphas[] = {
+	0x00aa,
+	0x00b5,
+	0x00ba,
+	0x02ee,
+	0x0386,
+	0x038c,
+	0x0559,
+	0x06d5,
+	0x06ff,
+	0x0710,
+	0x07b1,
+	0x07fa,
+	0x093d,
+	0x0950,
+	0x09b2,
+	0x09bd,
+	0x09ce,
+	0x0a5e,
+	0x0abd,
+	0x0ad0,
+	0x0b3d,
+	0x0b71,
+	0x0b83,
+	0x0b9c,
+	0x0cbd,
+	0x0cde,
+	0x0dbd,
+	0x0e84,
+	0x0e8a,
+	0x0e8d,
+	0x0ea5,
+	0x0ea7,
+	0x0ebd,
+	0x0ec6,
+	0x0f00,
+	0x10fc,
+	0x1258,
+	0x12c0,
+	0x17d7,
+	0x17dc,
+	0x1f59,
+	0x1f5b,
+	0x1f5d,
+	0x1fbe,
+	0x2071,
+	0x207f,
+	0x2102,
+	0x2107,
+	0x2115,
+	0x2124,
+	0x2126,
+	0x2128,
+	0x214e,
+	0x2d6f,
+	0xfb1d,
+	0xfb3e,
+	0x10808,
+	0x1083c,
+	0x1083f,
+	0x10a00,
+	0x1d4a2,
+	0x1d4bb,
+	0x1d546,
+};
+
+int
+isalpharune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __isalphar, nelem(__isalphar)/2, 2);
+	if(p && c >= p[0] && c <= p[1])
+		return 1;
+	p = rbsearch(c, __isalphas, nelem(__isalphas), 1);
+	if(p && c == p[0])
+		return 1;
+	return 0;
+}
+
+static Rune __isupperr[] = {
+	0x0041, 0x005a,
+	0x00c0, 0x00d6,
+	0x00d8, 0x00de,
+	0x0178, 0x0179,
+	0x0181, 0x0182,
+	0x0186, 0x0187,
+	0x0189, 0x018b,
+	0x018e, 0x0191,
+	0x0193, 0x0194,
+	0x0196, 0x0198,
+	0x019c, 0x019d,
+	0x019f, 0x01a0,
+	0x01a6, 0x01a7,
+	0x01ae, 0x01af,
+	0x01b1, 0x01b3,
+	0x01b7, 0x01b8,
+	0x01f6, 0x01f8,
+	0x023a, 0x023b,
+	0x023d, 0x023e,
+	0x0243, 0x0246,
+	0x0388, 0x038a,
+	0x038e, 0x038f,
+	0x0391, 0x03a1,
+	0x03a3, 0x03ab,
+	0x03d2, 0x03d4,
+	0x03f9, 0x03fa,
+	0x03fd, 0x042f,
+	0x04c0, 0x04c1,
+	0x0531, 0x0556,
+	0x10a0, 0x10c5,
+	0x1f08, 0x1f0f,
+	0x1f18, 0x1f1d,
+	0x1f28, 0x1f2f,
+	0x1f38, 0x1f3f,
+	0x1f48, 0x1f4d,
+	0x1f68, 0x1f6f,
+	0x1f88, 0x1f8f,
+	0x1f98, 0x1f9f,
+	0x1fa8, 0x1faf,
+	0x1fb8, 0x1fbc,
+	0x1fc8, 0x1fcc,
+	0x1fd8, 0x1fdb,
+	0x1fe8, 0x1fec,
+	0x1ff8, 0x1ffc,
+	0x210b, 0x210d,
+	0x2110, 0x2112,
+	0x2119, 0x211d,
+	0x212a, 0x212d,
+	0x2130, 0x2133,
+	0x213e, 0x213f,
+	0x2160, 0x216f,
+	0x24b6, 0x24cf,
+	0x2c00, 0x2c2e,
+	0x2c62, 0x2c64,
+	0xff21, 0xff3a,
+	0x10400, 0x10427,
+	0x1d400, 0x1d419,
+	0x1d434, 0x1d44d,
+	0x1d468, 0x1d481,
+	0x1d49e, 0x1d49f,
+	0x1d4a5, 0x1d4a6,
+	0x1d4a9, 0x1d4ac,
+	0x1d4ae, 0x1d4b5,
+	0x1d4d0, 0x1d4e9,
+	0x1d504, 0x1d505,
+	0x1d507, 0x1d50a,
+	0x1d50d, 0x1d514,
+	0x1d516, 0x1d51c,
+	0x1d538, 0x1d539,
+	0x1d53b, 0x1d53e,
+	0x1d540, 0x1d544,
+	0x1d54a, 0x1d550,
+	0x1d56c, 0x1d585,
+	0x1d5a0, 0x1d5b9,
+	0x1d5d4, 0x1d5ed,
+	0x1d608, 0x1d621,
+	0x1d63c, 0x1d655,
+	0x1d670, 0x1d689,
+	0x1d6a8, 0x1d6c0,
+	0x1d6e2, 0x1d6fa,
+	0x1d71c, 0x1d734,
+	0x1d756, 0x1d76e,
+	0x1d790, 0x1d7a8,
+};
+
+static Rune __isupperp[] = {
+	0x0100, 0x0136,
+	0x0139, 0x0147,
+	0x014a, 0x0176,
+	0x017b, 0x017d,
+	0x01a2, 0x01a4,
+	0x01cd, 0x01db,
+	0x01de, 0x01ee,
+	0x01fa, 0x0232,
+	0x0248, 0x024e,
+	0x03d8, 0x03ee,
+	0x0460, 0x0480,
+	0x048a, 0x04be,
+	0x04c3, 0x04cd,
+	0x04d0, 0x0512,
+	0x1e00, 0x1e94,
+	0x1ea0, 0x1ef8,
+	0x1f59, 0x1f5f,
+	0x2124, 0x2128,
+	0x2c67, 0x2c6b,
+	0x2c80, 0x2ce2,
+};
+
+static Rune __isuppers[] = {
+	0x0184,
+	0x01a9,
+	0x01ac,
+	0x01b5,
+	0x01bc,
+	0x01c4,
+	0x01c7,
+	0x01ca,
+	0x01f1,
+	0x01f4,
+	0x0241,
+	0x0386,
+	0x038c,
+	0x03f4,
+	0x03f7,
+	0x2102,
+	0x2107,
+	0x2115,
+	0x2145,
+	0x2183,
+	0x2c60,
+	0x2c75,
+	0x1d49c,
+	0x1d4a2,
+	0x1d546,
+	0x1d7ca,
+};
+
+int
+isupperrune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __isupperr, nelem(__isupperr)/2, 2);
+	if(p && c >= p[0] && c <= p[1])
+		return 1;
+	p = rbsearch(c, __isupperp, nelem(__isupperp)/2, 2);
+	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+		return 1;
+	p = rbsearch(c, __isuppers, nelem(__isuppers), 1);
+	if(p && c == p[0])
+		return 1;
+	return 0;
+}
+
+static Rune __islowerr[] = {
+	0x0061, 0x007a,
+	0x00df, 0x00f6,
+	0x00f8, 0x00ff,
+	0x0137, 0x0138,
+	0x0148, 0x0149,
+	0x017e, 0x0180,
+	0x018c, 0x018d,
+	0x0199, 0x019b,
+	0x01aa, 0x01ab,
+	0x01b9, 0x01ba,
+	0x01bd, 0x01bf,
+	0x01dc, 0x01dd,
+	0x01ef, 0x01f0,
+	0x0233, 0x0239,
+	0x023f, 0x0240,
+	0x024f, 0x0293,
+	0x0295, 0x02af,
+	0x037b, 0x037d,
+	0x03ac, 0x03ce,
+	0x03d0, 0x03d1,
+	0x03d5, 0x03d7,
+	0x03ef, 0x03f3,
+	0x03fb, 0x03fc,
+	0x0430, 0x045f,
+	0x04ce, 0x04cf,
+	0x0561, 0x0587,
+	0x1d00, 0x1d2b,
+	0x1d62, 0x1d77,
+	0x1d79, 0x1d9a,
+	0x1e95, 0x1e9b,
+	0x1f00, 0x1f07,
+	0x1f10, 0x1f15,
+	0x1f20, 0x1f27,
+	0x1f30, 0x1f37,
+	0x1f40, 0x1f45,
+	0x1f50, 0x1f57,
+	0x1f60, 0x1f67,
+	0x1f70, 0x1f7d,
+	0x1f80, 0x1f87,
+	0x1f90, 0x1f97,
+	0x1fa0, 0x1fa7,
+	0x1fb0, 0x1fb4,
+	0x1fb6, 0x1fb7,
+	0x1fc2, 0x1fc4,
+	0x1fc6, 0x1fc7,
+	0x1fd0, 0x1fd3,
+	0x1fd6, 0x1fd7,
+	0x1fe0, 0x1fe7,
+	0x1ff2, 0x1ff4,
+	0x1ff6, 0x1ff7,
+	0x210e, 0x210f,
+	0x213c, 0x213d,
+	0x2146, 0x2149,
+	0x2170, 0x217f,
+	0x24d0, 0x24e9,
+	0x2c30, 0x2c5e,
+	0x2c65, 0x2c66,
+	0x2c76, 0x2c77,
+	0x2ce3, 0x2ce4,
+	0x2d00, 0x2d25,
+	0xfb00, 0xfb06,
+	0xfb13, 0xfb17,
+	0xff41, 0xff5a,
+	0x10428, 0x1044f,
+	0x1d41a, 0x1d433,
+	0x1d44e, 0x1d454,
+	0x1d456, 0x1d467,
+	0x1d482, 0x1d49b,
+	0x1d4b6, 0x1d4b9,
+	0x1d4bd, 0x1d4c3,
+	0x1d4c5, 0x1d4cf,
+	0x1d4ea, 0x1d503,
+	0x1d51e, 0x1d537,
+	0x1d552, 0x1d56b,
+	0x1d586, 0x1d59f,
+	0x1d5ba, 0x1d5d3,
+	0x1d5ee, 0x1d607,
+	0x1d622, 0x1d63b,
+	0x1d656, 0x1d66f,
+	0x1d68a, 0x1d6a5,
+	0x1d6c2, 0x1d6da,
+	0x1d6dc, 0x1d6e1,
+	0x1d6fc, 0x1d714,
+	0x1d716, 0x1d71b,
+	0x1d736, 0x1d74e,
+	0x1d750, 0x1d755,
+	0x1d770, 0x1d788,
+	0x1d78a, 0x1d78f,
+	0x1d7aa, 0x1d7c2,
+	0x1d7c4, 0x1d7c9,
+};
+
+static Rune __islowerp[] = {
+	0x0101, 0x0135,
+	0x013a, 0x0146,
+	0x014b, 0x0177,
+	0x017a, 0x017c,
+	0x0183, 0x0185,
+	0x01a1, 0x01a5,
+	0x01b4, 0x01b6,
+	0x01cc, 0x01da,
+	0x01df, 0x01ed,
+	0x01f3, 0x01f5,
+	0x01f9, 0x0231,
+	0x0247, 0x024d,
+	0x03d9, 0x03ed,
+	0x0461, 0x0481,
+	0x048b, 0x04bf,
+	0x04c2, 0x04cc,
+	0x04d1, 0x0513,
+	0x1e01, 0x1e93,
+	0x1ea1, 0x1ef9,
+	0x2c68, 0x2c6c,
+	0x2c81, 0x2ce1,
+};
+
+static Rune __islowers[] = {
+	0x00aa,
+	0x00b5,
+	0x00ba,
+	0x0188,
+	0x0192,
+	0x0195,
+	0x019e,
+	0x01a8,
+	0x01ad,
+	0x01b0,
+	0x01c6,
+	0x01c9,
+	0x023c,
+	0x0242,
+	0x0390,
+	0x03f5,
+	0x03f8,
+	0x1fbe,
+	0x2071,
+	0x207f,
+	0x210a,
+	0x2113,
+	0x212f,
+	0x2134,
+	0x2139,
+	0x214e,
+	0x2184,
+	0x2c61,
+	0x2c74,
+	0x1d4bb,
+	0x1d7cb,
+};
+
+int
+islowerrune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __islowerr, nelem(__islowerr)/2, 2);
+	if(p && c >= p[0] && c <= p[1])
+		return 1;
+	p = rbsearch(c, __islowerp, nelem(__islowerp)/2, 2);
+	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+		return 1;
+	p = rbsearch(c, __islowers, nelem(__islowers), 1);
+	if(p && c == p[0])
+		return 1;
+	return 0;
+}
+
+static Rune __istitler[] = {
+	0x0041, 0x005a,
+	0x00c0, 0x00d6,
+	0x00d8, 0x00de,
+	0x0178, 0x0179,
+	0x0181, 0x0182,
+	0x0186, 0x0187,
+	0x0189, 0x018b,
+	0x018e, 0x0191,
+	0x0193, 0x0194,
+	0x0196, 0x0198,
+	0x019c, 0x019d,
+	0x019f, 0x01a0,
+	0x01a6, 0x01a7,
+	0x01ae, 0x01af,
+	0x01b1, 0x01b3,
+	0x01b7, 0x01b8,
+	0x01f6, 0x01f8,
+	0x023a, 0x023b,
+	0x023d, 0x023e,
+	0x0243, 0x0246,
+	0x0388, 0x038a,
+	0x038e, 0x038f,
+	0x0391, 0x03a1,
+	0x03a3, 0x03ab,
+	0x03f9, 0x03fa,
+	0x03fd, 0x042f,
+	0x04c0, 0x04c1,
+	0x0531, 0x0556,
+	0x10a0, 0x10c5,
+	0x1f08, 0x1f0f,
+	0x1f18, 0x1f1d,
+	0x1f28, 0x1f2f,
+	0x1f38, 0x1f3f,
+	0x1f48, 0x1f4d,
+	0x1f68, 0x1f6f,
+	0x1f88, 0x1f8f,
+	0x1f98, 0x1f9f,
+	0x1fa8, 0x1faf,
+	0x1fb8, 0x1fbc,
+	0x1fc8, 0x1fcc,
+	0x1fd8, 0x1fdb,
+	0x1fe8, 0x1fec,
+	0x1ff8, 0x1ffc,
+	0x2160, 0x216f,
+	0x24b6, 0x24cf,
+	0x2c00, 0x2c2e,
+	0x2c62, 0x2c64,
+	0xff21, 0xff3a,
+	0x10400, 0x10427,
+};
+
+static Rune __istitlep[] = {
+	0x0100, 0x012e,
+	0x0132, 0x0136,
+	0x0139, 0x0147,
+	0x014a, 0x0176,
+	0x017b, 0x017d,
+	0x01a2, 0x01a4,
+	0x01cb, 0x01db,
+	0x01de, 0x01ee,
+	0x01f2, 0x01f4,
+	0x01fa, 0x0232,
+	0x0248, 0x024e,
+	0x03d8, 0x03ee,
+	0x0460, 0x0480,
+	0x048a, 0x04be,
+	0x04c3, 0x04cd,
+	0x04d0, 0x0512,
+	0x1e00, 0x1e94,
+	0x1ea0, 0x1ef8,
+	0x1f59, 0x1f5f,
+	0x2c67, 0x2c6b,
+	0x2c80, 0x2ce2,
+};
+
+static Rune __istitles[] = {
+	0x0184,
+	0x01a9,
+	0x01ac,
+	0x01b5,
+	0x01bc,
+	0x01c5,
+	0x01c8,
+	0x0241,
+	0x0386,
+	0x038c,
+	0x03f7,
+	0x2132,
+	0x2183,
+	0x2c60,
+	0x2c75,
+};
+
+int
+istitlerune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __istitler, nelem(__istitler)/2, 2);
+	if(p && c >= p[0] && c <= p[1])
+		return 1;
+	p = rbsearch(c, __istitlep, nelem(__istitlep)/2, 2);
+	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+		return 1;
+	p = rbsearch(c, __istitles, nelem(__istitles), 1);
+	if(p && c == p[0])
+		return 1;
+	return 0;
+}
+
+static Rune __toupperr[] = {
+	0x0061, 0x007a, 1048544,
+	0x00e0, 0x00f6, 1048544,
+	0x00f8, 0x00fe, 1048544,
+	0x0256, 0x0257, 1048371,
+	0x028a, 0x028b, 1048359,
+	0x037b, 0x037d, 1048706,
+	0x03ad, 0x03af, 1048539,
+	0x03b1, 0x03c1, 1048544,
+	0x03c3, 0x03cb, 1048544,
+	0x03cd, 0x03ce, 1048513,
+	0x0430, 0x044f, 1048544,
+	0x0450, 0x045f, 1048496,
+	0x0561, 0x0586, 1048528,
+	0x1f00, 0x1f07, 1048584,
+	0x1f10, 0x1f15, 1048584,
+	0x1f20, 0x1f27, 1048584,
+	0x1f30, 0x1f37, 1048584,
+	0x1f40, 0x1f45, 1048584,
+	0x1f60, 0x1f67, 1048584,
+	0x1f70, 0x1f71, 1048650,
+	0x1f72, 0x1f75, 1048662,
+	0x1f76, 0x1f77, 1048676,
+	0x1f78, 0x1f79, 1048704,
+	0x1f7a, 0x1f7b, 1048688,
+	0x1f7c, 0x1f7d, 1048702,
+	0x1f80, 0x1f87, 1048584,
+	0x1f90, 0x1f97, 1048584,
+	0x1fa0, 0x1fa7, 1048584,
+	0x1fb0, 0x1fb1, 1048584,
+	0x1fd0, 0x1fd1, 1048584,
+	0x1fe0, 0x1fe1, 1048584,
+	0x2170, 0x217f, 1048560,
+	0x24d0, 0x24e9, 1048550,
+	0x2c30, 0x2c5e, 1048528,
+	0x2d00, 0x2d25, 1041312,
+	0xff41, 0xff5a, 1048544,
+	0x10428, 0x1044f, 1048536,
+};
+
+static Rune __toupperp[] = {
+	0x0101, 0x012f, 1048575,
+	0x0133, 0x0137, 1048575,
+	0x013a, 0x0148, 1048575,
+	0x014b, 0x0177, 1048575,
+	0x017a, 0x017e, 1048575,
+	0x0183, 0x0185, 1048575,
+	0x01a1, 0x01a5, 1048575,
+	0x01b4, 0x01b6, 1048575,
+	0x01ce, 0x01dc, 1048575,
+	0x01df, 0x01ef, 1048575,
+	0x01f9, 0x021f, 1048575,
+	0x0223, 0x0233, 1048575,
+	0x0247, 0x024f, 1048575,
+	0x03d9, 0x03ef, 1048575,
+	0x0461, 0x0481, 1048575,
+	0x048b, 0x04bf, 1048575,
+	0x04c2, 0x04ce, 1048575,
+	0x04d1, 0x0513, 1048575,
+	0x1e01, 0x1e95, 1048575,
+	0x1ea1, 0x1ef9, 1048575,
+	0x1f51, 0x1f57, 1048584,
+	0x2c68, 0x2c6c, 1048575,
+	0x2c81, 0x2ce3, 1048575,
+};
+
+static Rune __touppers[] = {
+	0x00b5, 1049319,
+	0x00ff, 1048697,
+	0x0131, 1048344,
+	0x017f, 1048276,
+	0x0180, 1048771,
+	0x0188, 1048575,
+	0x018c, 1048575,
+	0x0192, 1048575,
+	0x0195, 1048673,
+	0x0199, 1048575,
+	0x019a, 1048739,
+	0x019e, 1048706,
+	0x01a8, 1048575,
+	0x01ad, 1048575,
+	0x01b0, 1048575,
+	0x01b9, 1048575,
+	0x01bd, 1048575,
+	0x01bf, 1048632,
+	0x01c5, 1048575,
+	0x01c6, 1048574,
+	0x01c8, 1048575,
+	0x01c9, 1048574,
+	0x01cb, 1048575,
+	0x01cc, 1048574,
+	0x01dd, 1048497,
+	0x01f2, 1048575,
+	0x01f3, 1048574,
+	0x01f5, 1048575,
+	0x023c, 1048575,
+	0x0242, 1048575,
+	0x0253, 1048366,
+	0x0254, 1048370,
+	0x0259, 1048374,
+	0x025b, 1048373,
+	0x0260, 1048371,
+	0x0263, 1048369,
+	0x0268, 1048367,
+	0x0269, 1048365,
+	0x026b, 1059319,
+	0x026f, 1048365,
+	0x0272, 1048363,
+	0x0275, 1048362,
+	0x027d, 1059303,
+	0x0280, 1048358,
+	0x0283, 1048358,
+	0x0288, 1048358,
+	0x0289, 1048507,
+	0x028c, 1048505,
+	0x0292, 1048357,
+	0x0345, 1048660,
+	0x03ac, 1048538,
+	0x03c2, 1048545,
+	0x03cc, 1048512,
+	0x03d0, 1048514,
+	0x03d1, 1048519,
+	0x03d5, 1048529,
+	0x03d6, 1048522,
+	0x03f0, 1048490,
+	0x03f1, 1048496,
+	0x03f2, 1048583,
+	0x03f5, 1048480,
+	0x03f8, 1048575,
+	0x03fb, 1048575,
+	0x04cf, 1048561,
+	0x1d7d, 1052390,
+	0x1e9b, 1048517,
+	0x1fb3, 1048585,
+	0x1fbe, 1041371,
+	0x1fc3, 1048585,
+	0x1fe5, 1048583,
+	0x1ff3, 1048585,
+	0x214e, 1048548,
+	0x2184, 1048575,
+	0x2c61, 1048575,
+	0x2c65, 1037781,
+	0x2c66, 1037784,
+	0x2c76, 1048575,
+};
+
+Rune
+toupperrune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __toupperr, nelem(__toupperr)/3, 3);
+	if(p && c >= p[0] && c <= p[1])
+		return c + p[2] - 1048576;
+	p = rbsearch(c, __toupperp, nelem(__toupperp)/3, 3);
+	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+		return c + p[2] - 1048576;
+	p = rbsearch(c, __touppers, nelem(__touppers)/2, 2);
+	if(p && c == p[0])
+		return c + p[1] - 1048576;
+	return c;
+}
+
+static Rune __tolowerr[] = {
+	0x0041, 0x005a, 1048608,
+	0x00c0, 0x00d6, 1048608,
+	0x00d8, 0x00de, 1048608,
+	0x0189, 0x018a, 1048781,
+	0x01b1, 0x01b2, 1048793,
+	0x0388, 0x038a, 1048613,
+	0x038e, 0x038f, 1048639,
+	0x0391, 0x03a1, 1048608,
+	0x03a3, 0x03ab, 1048608,
+	0x03fd, 0x03ff, 1048446,
+	0x0400, 0x040f, 1048656,
+	0x0410, 0x042f, 1048608,
+	0x0531, 0x0556, 1048624,
+	0x10a0, 0x10c5, 1055840,
+	0x1f08, 0x1f0f, 1048568,
+	0x1f18, 0x1f1d, 1048568,
+	0x1f28, 0x1f2f, 1048568,
+	0x1f38, 0x1f3f, 1048568,
+	0x1f48, 0x1f4d, 1048568,
+	0x1f68, 0x1f6f, 1048568,
+	0x1f88, 0x1f8f, 1048568,
+	0x1f98, 0x1f9f, 1048568,
+	0x1fa8, 0x1faf, 1048568,
+	0x1fb8, 0x1fb9, 1048568,
+	0x1fba, 0x1fbb, 1048502,
+	0x1fc8, 0x1fcb, 1048490,
+	0x1fd8, 0x1fd9, 1048568,
+	0x1fda, 0x1fdb, 1048476,
+	0x1fe8, 0x1fe9, 1048568,
+	0x1fea, 0x1feb, 1048464,
+	0x1ff8, 0x1ff9, 1048448,
+	0x1ffa, 0x1ffb, 1048450,
+	0x2160, 0x216f, 1048592,
+	0x24b6, 0x24cf, 1048602,
+	0x2c00, 0x2c2e, 1048624,
+	0xff21, 0xff3a, 1048608,
+	0x10400, 0x10427, 1048616,
+};
+
+static Rune __tolowerp[] = {
+	0x0100, 0x012e, 1048577,
+	0x0132, 0x0136, 1048577,
+	0x0139, 0x0147, 1048577,
+	0x014a, 0x0176, 1048577,
+	0x017b, 0x017d, 1048577,
+	0x01a2, 0x01a4, 1048577,
+	0x01b3, 0x01b5, 1048577,
+	0x01cd, 0x01db, 1048577,
+	0x01de, 0x01ee, 1048577,
+	0x01f8, 0x021e, 1048577,
+	0x0222, 0x0232, 1048577,
+	0x0248, 0x024e, 1048577,
+	0x03d8, 0x03ee, 1048577,
+	0x0460, 0x0480, 1048577,
+	0x048a, 0x04be, 1048577,
+	0x04c3, 0x04cd, 1048577,
+	0x04d0, 0x0512, 1048577,
+	0x1e00, 0x1e94, 1048577,
+	0x1ea0, 0x1ef8, 1048577,
+	0x1f59, 0x1f5f, 1048568,
+	0x2c67, 0x2c6b, 1048577,
+	0x2c80, 0x2ce2, 1048577,
+};
+
+static Rune __tolowers[] = {
+	0x0130, 1048377,
+	0x0178, 1048455,
+	0x0179, 1048577,
+	0x0181, 1048786,
+	0x0182, 1048577,
+	0x0184, 1048577,
+	0x0186, 1048782,
+	0x0187, 1048577,
+	0x018b, 1048577,
+	0x018e, 1048655,
+	0x018f, 1048778,
+	0x0190, 1048779,
+	0x0191, 1048577,
+	0x0193, 1048781,
+	0x0194, 1048783,
+	0x0196, 1048787,
+	0x0197, 1048785,
+	0x0198, 1048577,
+	0x019c, 1048787,
+	0x019d, 1048789,
+	0x019f, 1048790,
+	0x01a0, 1048577,
+	0x01a6, 1048794,
+	0x01a7, 1048577,
+	0x01a9, 1048794,
+	0x01ac, 1048577,
+	0x01ae, 1048794,
+	0x01af, 1048577,
+	0x01b7, 1048795,
+	0x01b8, 1048577,
+	0x01bc, 1048577,
+	0x01c4, 1048578,
+	0x01c5, 1048577,
+	0x01c7, 1048578,
+	0x01c8, 1048577,
+	0x01ca, 1048578,
+	0x01cb, 1048577,
+	0x01f1, 1048578,
+	0x01f2, 1048577,
+	0x01f4, 1048577,
+	0x01f6, 1048479,
+	0x01f7, 1048520,
+	0x0220, 1048446,
+	0x023a, 1059371,
+	0x023b, 1048577,
+	0x023d, 1048413,
+	0x023e, 1059368,
+	0x0241, 1048577,
+	0x0243, 1048381,
+	0x0244, 1048645,
+	0x0245, 1048647,
+	0x0246, 1048577,
+	0x0386, 1048614,
+	0x038c, 1048640,
+	0x03f4, 1048516,
+	0x03f7, 1048577,
+	0x03f9, 1048569,
+	0x03fa, 1048577,
+	0x04c0, 1048591,
+	0x04c1, 1048577,
+	0x1fbc, 1048567,
+	0x1fcc, 1048567,
+	0x1fec, 1048569,
+	0x1ffc, 1048567,
+	0x2126, 1041059,
+	0x212a, 1040193,
+	0x212b, 1040314,
+	0x2132, 1048604,
+	0x2183, 1048577,
+	0x2c60, 1048577,
+	0x2c62, 1037833,
+	0x2c63, 1044762,
+	0x2c64, 1037849,
+	0x2c75, 1048577,
+};
+
+Rune
+tolowerrune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __tolowerr, nelem(__tolowerr)/3, 3);
+	if(p && c >= p[0] && c <= p[1])
+		return c + p[2] - 1048576;
+	p = rbsearch(c, __tolowerp, nelem(__tolowerp)/3, 3);
+	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+		return c + p[2] - 1048576;
+	p = rbsearch(c, __tolowers, nelem(__tolowers)/2, 2);
+	if(p && c == p[0])
+		return c + p[1] - 1048576;
+	return c;
+}
+
+static Rune __totitler[] = {
+	0x0061, 0x007a, 1048544,
+	0x00e0, 0x00f6, 1048544,
+	0x00f8, 0x00fe, 1048544,
+	0x0256, 0x0257, 1048371,
+	0x028a, 0x028b, 1048359,
+	0x037b, 0x037d, 1048706,
+	0x03ad, 0x03af, 1048539,
+	0x03b1, 0x03c1, 1048544,
+	0x03c3, 0x03cb, 1048544,
+	0x03cd, 0x03ce, 1048513,
+	0x0430, 0x044f, 1048544,
+	0x0450, 0x045f, 1048496,
+	0x0561, 0x0586, 1048528,
+	0x1f00, 0x1f07, 1048584,
+	0x1f10, 0x1f15, 1048584,
+	0x1f20, 0x1f27, 1048584,
+	0x1f30, 0x1f37, 1048584,
+	0x1f40, 0x1f45, 1048584,
+	0x1f60, 0x1f67, 1048584,
+	0x1f70, 0x1f71, 1048650,
+	0x1f72, 0x1f75, 1048662,
+	0x1f76, 0x1f77, 1048676,
+	0x1f78, 0x1f79, 1048704,
+	0x1f7a, 0x1f7b, 1048688,
+	0x1f7c, 0x1f7d, 1048702,
+	0x1f80, 0x1f87, 1048584,
+	0x1f90, 0x1f97, 1048584,
+	0x1fa0, 0x1fa7, 1048584,
+	0x1fb0, 0x1fb1, 1048584,
+	0x1fd0, 0x1fd1, 1048584,
+	0x1fe0, 0x1fe1, 1048584,
+	0x2170, 0x217f, 1048560,
+	0x24d0, 0x24e9, 1048550,
+	0x2c30, 0x2c5e, 1048528,
+	0x2d00, 0x2d25, 1041312,
+	0xff41, 0xff5a, 1048544,
+	0x10428, 0x1044f, 1048536,
+};
+
+static Rune __totitlep[] = {
+	0x0101, 0x012f, 1048575,
+	0x0133, 0x0137, 1048575,
+	0x013a, 0x0148, 1048575,
+	0x014b, 0x0177, 1048575,
+	0x017a, 0x017e, 1048575,
+	0x0183, 0x0185, 1048575,
+	0x01a1, 0x01a5, 1048575,
+	0x01b4, 0x01b6, 1048575,
+	0x01cc, 0x01dc, 1048575,
+	0x01df, 0x01ef, 1048575,
+	0x01f3, 0x01f5, 1048575,
+	0x01f9, 0x021f, 1048575,
+	0x0223, 0x0233, 1048575,
+	0x0247, 0x024f, 1048575,
+	0x03d9, 0x03ef, 1048575,
+	0x0461, 0x0481, 1048575,
+	0x048b, 0x04bf, 1048575,
+	0x04c2, 0x04ce, 1048575,
+	0x04d1, 0x0513, 1048575,
+	0x1e01, 0x1e95, 1048575,
+	0x1ea1, 0x1ef9, 1048575,
+	0x1f51, 0x1f57, 1048584,
+	0x2c68, 0x2c6c, 1048575,
+	0x2c81, 0x2ce3, 1048575,
+};
+
+static Rune __totitles[] = {
+	0x00b5, 1049319,
+	0x00ff, 1048697,
+	0x0131, 1048344,
+	0x017f, 1048276,
+	0x0180, 1048771,
+	0x0188, 1048575,
+	0x018c, 1048575,
+	0x0192, 1048575,
+	0x0195, 1048673,
+	0x0199, 1048575,
+	0x019a, 1048739,
+	0x019e, 1048706,
+	0x01a8, 1048575,
+	0x01ad, 1048575,
+	0x01b0, 1048575,
+	0x01b9, 1048575,
+	0x01bd, 1048575,
+	0x01bf, 1048632,
+	0x01c4, 1048577,
+	0x01c6, 1048575,
+	0x01c7, 1048577,
+	0x01c9, 1048575,
+	0x01ca, 1048577,
+	0x01dd, 1048497,
+	0x01f1, 1048577,
+	0x023c, 1048575,
+	0x0242, 1048575,
+	0x0253, 1048366,
+	0x0254, 1048370,
+	0x0259, 1048374,
+	0x025b, 1048373,
+	0x0260, 1048371,
+	0x0263, 1048369,
+	0x0268, 1048367,
+	0x0269, 1048365,
+	0x026b, 1059319,
+	0x026f, 1048365,
+	0x0272, 1048363,
+	0x0275, 1048362,
+	0x027d, 1059303,
+	0x0280, 1048358,
+	0x0283, 1048358,
+	0x0288, 1048358,
+	0x0289, 1048507,
+	0x028c, 1048505,
+	0x0292, 1048357,
+	0x0345, 1048660,
+	0x03ac, 1048538,
+	0x03c2, 1048545,
+	0x03cc, 1048512,
+	0x03d0, 1048514,
+	0x03d1, 1048519,
+	0x03d5, 1048529,
+	0x03d6, 1048522,
+	0x03f0, 1048490,
+	0x03f1, 1048496,
+	0x03f2, 1048583,
+	0x03f5, 1048480,
+	0x03f8, 1048575,
+	0x03fb, 1048575,
+	0x04cf, 1048561,
+	0x1d7d, 1052390,
+	0x1e9b, 1048517,
+	0x1fb3, 1048585,
+	0x1fbe, 1041371,
+	0x1fc3, 1048585,
+	0x1fe5, 1048583,
+	0x1ff3, 1048585,
+	0x214e, 1048548,
+	0x2184, 1048575,
+	0x2c61, 1048575,
+	0x2c65, 1037781,
+	0x2c66, 1037784,
+	0x2c76, 1048575,
+};
+
+Rune
+totitlerune(Rune c)
+{
+	Rune *p;
+
+	p = rbsearch(c, __totitler, nelem(__totitler)/3, 3);
+	if(p && c >= p[0] && c <= p[1])
+		return c + p[2] - 1048576;
+	p = rbsearch(c, __totitlep, nelem(__totitlep)/3, 3);
+	if(p && c >= p[0] && c <= p[1] && !((c - p[0]) & 1))
+		return c + p[2] - 1048576;
+	p = rbsearch(c, __totitles, nelem(__totitles)/2, 2);
+	if(p && c == p[0])
+		return c + p[1] - 1048576;
+	return c;
+}
+
diff --git a/utf.h b/utf.h
index 44052f4..02ba472 100644
--- a/utf.h
+++ b/utf.h
@@ -1,54 +1,233 @@
-#ifndef _UTF_H_
-#define _UTF_H_ 1
-#if defined(__cplusplus)
-extern "C" { 
-#endif
+#ifndef _UTFH_
+#define _UTFH_ 1
 
-typedef unsigned int Rune;	/* 32 bits */
+#include <stdint.h>
+
+typedef signed int Rune;	/* Code-point values in Unicode 4.0 are 21 bits wide.*/
 
 enum
 {
-	UTFmax		= 4,		/* maximum bytes per rune */
-	Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
-	Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
-	Runeerror	= 0xFFFD,	/* decoding error in UTF */
-	Runemax = 0x10FFFF	/* maximum rune value */
+  UTFmax	= 4,		/* maximum bytes per rune */
+  Runesync	= 0x80,		/* cannot represent part of a UTF sequence (<) */
+  Runeself	= 0x80,		/* rune and UTF sequences are the same (<) */
+  Runeerror	= 0xFFFD,	/* decoding error in UTF */
+  Runemax	= 0x10FFFF,	/* maximum rune value */
 };
 
-/* Edit .+1,/^$/ | cfn $PLAN9/src/lib9/utf/?*.c | grep -v static |grep -v __ */
-int		chartorune(Rune *rune, char *str);
-int		fullrune(char *str, int n);
-int		isalpharune(Rune c);
-int		islowerrune(Rune c);
-int		isspacerune(Rune c);
-int		istitlerune(Rune c);
-int		isupperrune(Rune c);
-int		runelen(long c);
-int		runenlen(Rune *r, int nrune);
-Rune*		runestrcat(Rune *s1, Rune *s2);
-Rune*		runestrchr(Rune *s, Rune c);
-int		runestrcmp(Rune *s1, Rune *s2);
-Rune*		runestrcpy(Rune *s1, Rune *s2);
-Rune*		runestrdup(Rune *s) ;
-Rune*		runestrecpy(Rune *s1, Rune *es1, Rune *s2);
-long		runestrlen(Rune *s);
-Rune*		runestrncat(Rune *s1, Rune *s2, long n);
-int		runestrncmp(Rune *s1, Rune *s2, long n);
-Rune*		runestrncpy(Rune *s1, Rune *s2, long n);
-Rune*		runestrrchr(Rune *s, Rune c);
-Rune*		runestrstr(Rune *s1, Rune *s2);
-int		runetochar(char *str, Rune *rune);
-Rune		tolowerrune(Rune c);
-Rune		totitlerune(Rune c);
-Rune		toupperrune(Rune c);
-char*		utfecpy(char *to, char *e, char *from);
-int		utflen(char *s);
-int		utfnlen(char *s, long m);
-char*		utfrrune(char *s, long c);
-char*		utfrune(char *s, long c);
-char*		utfutf(char *s1, char *s2);
+#ifdef	__cplusplus
+extern "C" {
+#endif
 
-#if defined(__cplusplus)
+/*
+ * rune routines
+ */
+
+/*
+ * These routines were written by Rob Pike and Ken Thompson
+ * and first appeared in Plan 9.
+ * SEE ALSO
+ * utf (7)
+ * tcs (1)
+*/
+
+// runetochar copies (encodes) one rune, pointed to by r, to at most
+// UTFmax bytes starting at s and returns the number of bytes generated.
+
+int runetochar(char* s, const Rune* r);
+
+
+// chartorune copies (decodes) at most UTFmax bytes starting at s to
+// one rune, pointed to by r, and returns the number of bytes consumed.
+// If the input is not exactly in UTF format, chartorune will set *r
+// to Runeerror and return 1.
+//
+// Note: There is no special case for a "null-terminated" string. A
+// string whose first byte has the value 0 is the UTF8 encoding of the
+// Unicode value 0 (i.e., ASCII NULL). A byte value of 0 is illegal
+// anywhere else in a UTF sequence.
+
+int chartorune(Rune* r, const char* s);
+
+
+// charntorune is like chartorune, except that it will access at most
+// n bytes of s.  If the UTF sequence is incomplete within n bytes,
+// charntorune will set *r to Runeerror and return 0. If it is complete
+// but not in UTF format, it will set *r to Runeerror and return 1.
+// 
+// Added 2004-09-24 by Wei-Hwa Huang
+
+int charntorune(Rune* r, const char* s, int n);
+
+// isvalidcharntorune(str, n, r, consumed)
+// is a convenience function that calls "*consumed = charntorune(r, str, n)"
+// and returns an int (logically boolean) indicating whether the first
+// n bytes of str was a valid and complete UTF sequence.
+
+int isvalidcharntorune(const char* str, int n, Rune* r, int* consumed);
+
+// runelen returns the number of bytes required to convert r into UTF.
+
+int runelen(Rune r);
+
+
+// runenlen returns the number of bytes required to convert the n
+// runes pointed to by r into UTF.
+
+int runenlen(const Rune* r, int n);
+
+
+// fullrune returns 1 if the string s of length n is long enough to be
+// decoded by chartorune, and 0 otherwise. This does not guarantee
+// that the string contains a legal UTF encoding. This routine is used
+// by programs that obtain input one byte at a time and need to know
+// when a full rune has arrived.
+
+int fullrune(const char* s, int n);
+
+// The following routines are analogous to the corresponding string
+// routines with "utf" substituted for "str", and "rune" substituted
+// for "chr".
+
+// utflen returns the number of runes that are represented by the UTF
+// string s. (cf. strlen)
+
+int utflen(const char* s);
+
+
+// utfnlen returns the number of complete runes that are represented
+// by the first n bytes of the UTF string s. If the last few bytes of
+// the string contain an incompletely coded rune, utfnlen will not
+// count them; in this way, it differs from utflen, which includes
+// every byte of the string. (cf. strnlen)
+
+int utfnlen(const char* s, long n);
+
+
+// utfrune returns a pointer to the first occurrence of rune r in the
+// UTF string s, or 0 if r does not occur in the string.  The NULL
+// byte terminating a string is considered to be part of the string s.
+// (cf. strchr)
+
+const char* utfrune(const char* s, Rune r);
+
+
+// utfrrune returns a pointer to the last occurrence of rune r in the
+// UTF string s, or 0 if r does not occur in the string.  The NULL
+// byte terminating a string is considered to be part of the string s.
+// (cf. strrchr)
+
+const char* utfrrune(const char* s, Rune r);
+
+
+// utfutf returns a pointer to the first occurrence of the UTF string
+// s2 as a UTF substring of s1, or 0 if there is none. If s2 is the
+// null string, utfutf returns s1. (cf. strstr)
+
+const char* utfutf(const char* s1, const char* s2);
+
+
+// utfecpy copies UTF sequences until a null sequence has been copied,
+// but writes no sequences beyond es1.  If any sequences are copied,
+// s1 is terminated by a null sequence, and a pointer to that sequence
+// is returned.  Otherwise, the original s1 is returned. (cf. strecpy)
+
+char* utfecpy(char *s1, char *es1, const char *s2);
+
+
+
+// These functions are rune-string analogues of the corresponding
+// functions in strcat (3).
+// 
+// These routines first appeared in Plan 9.
+// SEE ALSO
+// memmove (3)
+// rune (3)
+// strcat (2)
+//
+// BUGS: The outcome of overlapping moves varies among implementations.
+
+Rune* runestrcat(Rune* s1, const Rune* s2);
+Rune* runestrncat(Rune* s1, const Rune* s2, long n);
+
+const Rune* runestrchr(const Rune* s, Rune c);
+
+int runestrcmp(const Rune* s1, const Rune* s2);
+int runestrncmp(const Rune* s1, const Rune* s2, long n);
+
+Rune* runestrcpy(Rune* s1, const Rune* s2);
+Rune* runestrncpy(Rune* s1, const Rune* s2, long n);
+Rune* runestrecpy(Rune* s1, Rune* es1, const Rune* s2);
+
+Rune* runestrdup(const Rune* s);
+
+const Rune* runestrrchr(const Rune* s, Rune c);
+long runestrlen(const Rune* s);
+const Rune* runestrstr(const Rune* s1, const Rune* s2);
+
+
+
+// The following routines test types and modify cases for Unicode
+// characters.  Unicode defines some characters as letters and
+// specifies three cases: upper, lower, and title.  Mappings among the
+// cases are also defined, although they are not exhaustive: some
+// upper case letters have no lower case mapping, and so on.  Unicode
+// also defines several character properties, a subset of which are
+// checked by these routines.  These routines are based on Unicode
+// version 3.0.0.
+//
+// NOTE: The routines are implemented in C, so the boolean functions
+// (e.g., isupperrune) return 0 for false and 1 for true.
+//
+//
+// toupperrune, tolowerrune, and totitlerune are the Unicode case
+// mappings. These routines return the character unchanged if it has
+// no defined mapping.
+
+Rune toupperrune(Rune r);
+Rune tolowerrune(Rune r);
+Rune totitlerune(Rune r);
+
+
+// isupperrune tests for upper case characters, including Unicode
+// upper case letters and targets of the toupper mapping. islowerrune
+// and istitlerune are defined analogously. 
+ 
+int isupperrune(Rune r);
+int islowerrune(Rune r);
+int istitlerune(Rune r);
+
+
+// isalpharune tests for Unicode letters; this includes ideographs in
+// addition to alphabetic characters.
+
+int isalpharune(Rune r);
+
+
+// isdigitrune tests for digits. Non-digit numbers, such as Roman
+// numerals, are not included.
+
+int isdigitrune(Rune r);
+
+
+// isideographicrune tests for ideographic characters and numbers, as
+// defined by the Unicode standard.
+
+int isideographicrune(Rune r);
+
+
+// isspacerune tests for whitespace characters, including "C" locale
+// whitespace, Unicode defined whitespace, and the "zero-width
+// non-break space" character.
+
+int isspacerune(Rune r);
+
+
+// (The comments in this file were copied from the manpage files rune.3,
+// isalpharune.3, and runestrcat.3. Some formatting changes were also made
+// to conform to Google style. /JRM 11/11/05)
+
+#ifdef	__cplusplus
 }
 #endif
+
 #endif
diff --git a/utfdef.h b/utfdef.h
index 1ff4181..4b58ae8 100644
--- a/utfdef.h
+++ b/utfdef.h
@@ -1,33 +1,14 @@
-/*
- * compiler directive on Plan 9
- */
-#ifndef USED
-#define USED(x) if(x);else
-#endif
+#define uchar _utfuchar
+#define ushort _utfushort
+#define uint _utfuint
+#define ulong _utfulong
+#define vlong _utfvlong
+#define uvlong _utfuvlong
 
-/*
- * easiest way to make sure these are defined
- */
-#define uchar	_fmtuchar
-#define ushort	_fmtushort
-#define uint	_fmtuint
-#define ulong	_fmtulong
-#define vlong	_fmtvlong
-#define uvlong	_fmtuvlong
 typedef unsigned char		uchar;
 typedef unsigned short		ushort;
 typedef unsigned int		uint;
 typedef unsigned long		ulong;
-typedef unsigned long long	uvlong;
-typedef long long		vlong;
 
-/*
- * nil cannot be ((void*)0) on ANSI C,
- * because it is used for function pointers
- */
-#undef	nil
-#define	nil	0
-
-#undef	nelem
-#define	nelem	((void*)0)
-
+#define nelem(x) (sizeof(x)/sizeof((x)[0]))
+#define nil ((void*)0)
diff --git a/utfecpy.c b/utfecpy.c
index cf3535f..e733a0f 100644
--- a/utfecpy.c
+++ b/utfecpy.c
@@ -7,24 +7,23 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
-#define _BSD_SOURCE 1	/* memccpy */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 char*
-utfecpy(char *to, char *e, char *from)
+utfecpy(char *to, char *e, const char *from)
 {
 	char *end;
 
 	if(to >= e)
 		return to;
-	end = memccpy(to, from, '\0', e - to);
+	end = (char*)memccpy(to, from, '\0', e - to);
 	if(end == nil){
 		end = e-1;
 		while(end>to && (*--end&0xC0)==0x80)
diff --git a/utflen.c b/utflen.c
index 769805a..45653d5 100644
--- a/utflen.c
+++ b/utflen.c
@@ -7,17 +7,17 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 int
-utflen(char *s)
+utflen(const char *s)
 {
 	int c;
 	long n;
@@ -34,4 +34,5 @@
 			s += chartorune(&rune, s);
 		n++;
 	}
+	return 0;
 }
diff --git a/utfnlen.c b/utfnlen.c
index 6680329..d673c82 100644
--- a/utfnlen.c
+++ b/utfnlen.c
@@ -7,22 +7,22 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 int
-utfnlen(char *s, long m)
+utfnlen(const char *s, long m)
 {
 	int c;
 	long n;
 	Rune rune;
-	char *es;
+	const char *es;
 
 	es = s + m;
 	for(n = 0; s < es; n++) {
diff --git a/utfrrune.c b/utfrrune.c
index cff12b5..c0b89f5 100644
--- a/utfrrune.c
+++ b/utfrrune.c
@@ -7,21 +7,22 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
+const
 char*
-utfrrune(char *s, long c)
+utfrrune(const char *s, Rune c)
 {
 	long c1;
 	Rune r;
-	char *s1;
+	const char *s1;
 
 	if(c < Runesync)		/* not part of utf sequence */
 		return strrchr(s, c);
@@ -42,4 +43,5 @@
 			s1 = s;
 		s += c1;
 	}
+	return 0;
 }
diff --git a/utfrune.c b/utfrune.c
index 52b8359..913783f 100644
--- a/utfrune.c
+++ b/utfrune.c
@@ -7,17 +7,18 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
+const
 char*
-utfrune(char *s, long c)
+utfrune(const char *s, Rune c)
 {
 	long c1;
 	Rune r;
@@ -41,4 +42,5 @@
 			return s;
 		s += n;
 	}
+	return 0;
 }
diff --git a/utfutf.c b/utfutf.c
index 13c8502..ec49231 100644
--- a/utfutf.c
+++ b/utfutf.c
@@ -7,24 +7,25 @@
  * or modification of this software and in all copies of the supporting
  * documentation for such software.
  * THIS SOFTWARE IS BEING PROVIDED "AS IS", WITHOUT ANY EXPRESS OR IMPLIED
- * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE
- * ANY REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
+ * WARRANTY.  IN PARTICULAR, NEITHER THE AUTHORS NOR LUCENT TECHNOLOGIES MAKE ANY
+ * REPRESENTATION OR WARRANTY OF ANY KIND CONCERNING THE MERCHANTABILITY
  * OF THIS SOFTWARE OR ITS FITNESS FOR ANY PARTICULAR PURPOSE.
  */
 #include <stdarg.h>
 #include <string.h>
-#include "plan9.h"
 #include "utf.h"
+#include "utfdef.h"
 
 
 /*
  * Return pointer to first occurrence of s2 in s1,
  * 0 if none
  */
+const
 char*
-utfutf(char *s1, char *s2)
+utfutf(const char *s1, const char *s2)
 {
-	char *p;
+	const char *p;
 	long f, n1, n2;
 	Rune r;
 
@@ -34,7 +35,7 @@
 		return strstr(s1, s2);
 
 	n2 = strlen(s2);
-	for(p=s1; p=utfrune(p, f); p+=n1)
+	for(p=s1; (p=utfrune(p, f)) != 0; p+=n1)
 		if(strncmp(p, s2, n2) == 0)
 			return p;
 	return 0;