minix: replace inode uid,gid,mode init with helper
[safe/jmp/linux-2.6] / fs / nls / nls_base.c
index 750abf2..44a88a9 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/errno.h>
 #include <linux/kmod.h>
 #include <linux/spinlock.h>
+#include <asm/byteorder.h>
 
 static struct nls_table default_table;
 static struct nls_table *tables = &default_table;
@@ -43,10 +44,17 @@ static const struct utf8_table utf8_table[] =
     {0,                                                       /* end of table    */}
 };
 
-int
-utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
+#define UNICODE_MAX    0x0010ffff
+#define PLANE_SIZE     0x00010000
+
+#define SURROGATE_MASK 0xfffff800
+#define SURROGATE_PAIR 0x0000d800
+#define SURROGATE_LOW  0x00000400
+#define SURROGATE_BITS 0x000003ff
+
+int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
 {
-       long l;
+       unsigned long l;
        int c0, c, nc;
        const struct utf8_table *t;
   
@@ -57,12 +65,13 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
                nc++;
                if ((c0 & t->cmask) == t->cval) {
                        l &= t->lmask;
-                       if (l < t->lval)
+                       if (l < t->lval || l > UNICODE_MAX ||
+                                       (l & SURROGATE_MASK) == SURROGATE_PAIR)
                                return -1;
-                       *p = l;
+                       *pu = (unicode_t) l;
                        return nc;
                }
-               if (n <= nc)
+               if (len <= nc)
                        return -1;
                s++;
                c = (*s ^ 0x80) & 0xFF;
@@ -72,76 +81,119 @@ utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
        }
        return -1;
 }
+EXPORT_SYMBOL(utf8_to_utf32);
 
-int
-utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
+int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
 {
-       __u16 *op;
-       const __u8 *ip;
-       int size;
-
-       op = pwcs;
-       ip = s;
-       while (*ip && n > 0) {
-               if (*ip & 0x80) {
-                       size = utf8_mbtowc(op, ip, n);
-                       if (size == -1) {
-                               /* Ignore character and move on */
-                               ip++;
-                               n--;
-                       } else {
-                               op++;
-                               ip += size;
-                               n -= size;
-                       }
-               } else {
-                       *op++ = *ip++;
-                       n--;
-               }
-       }
-       return (op - pwcs);
-}
-
-int
-utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
-{
-       long l;
+       unsigned long l;
        int c, nc;
        const struct utf8_table *t;
-  
+
        if (!s)
                return 0;
-  
-       l = wc;
+
+       l = u;
+       if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
+               return -1;
+
        nc = 0;
        for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
                nc++;
                if (l <= t->lmask) {
                        c = t->shift;
-                       *s = t->cval | (l >> c);
+                       *s = (u8) (t->cval | (l >> c));
                        while (c > 0) {
                                c -= 6;
                                s++;
-                               *s = 0x80 | ((l >> c) & 0x3F);
+                               *s = (u8) (0x80 | ((l >> c) & 0x3F));
                        }
                        return nc;
                }
        }
        return -1;
 }
+EXPORT_SYMBOL(utf32_to_utf8);
+
+int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
+{
+       u16 *op;
+       int size;
+       unicode_t u;
+
+       op = pwcs;
+       while (*s && len > 0) {
+               if (*s & 0x80) {
+                       size = utf8_to_utf32(s, len, &u);
+                       if (size < 0)
+                               return -EINVAL;
+
+                       if (u >= PLANE_SIZE) {
+                               u -= PLANE_SIZE;
+                               *op++ = (wchar_t) (SURROGATE_PAIR |
+                                               ((u >> 10) & SURROGATE_BITS));
+                               *op++ = (wchar_t) (SURROGATE_PAIR |
+                                               SURROGATE_LOW |
+                                               (u & SURROGATE_BITS));
+                       } else {
+                               *op++ = (wchar_t) u;
+                       }
+                       s += size;
+                       len -= size;
+               } else {
+                       *op++ = *s++;
+                       len--;
+               }
+       }
+       return op - pwcs;
+}
+EXPORT_SYMBOL(utf8s_to_utf16s);
 
-int
-utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
+static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
 {
-       const __u16 *ip;
-       __u8 *op;
+       switch (endian) {
+       default:
+               return c;
+       case UTF16_LITTLE_ENDIAN:
+               return __le16_to_cpu(c);
+       case UTF16_BIG_ENDIAN:
+               return __be16_to_cpu(c);
+       }
+}
+
+int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
+               u8 *s, int maxlen)
+{
+       u8 *op;
        int size;
+       unsigned long u, v;
 
        op = s;
-       ip = pwcs;
-       while (*ip && maxlen > 0) {
-               if (*ip > 0x7f) {
-                       size = utf8_wctomb(op, *ip, maxlen);
+       while (len > 0 && maxlen > 0) {
+               u = get_utf16(*pwcs, endian);
+               if (!u)
+                       break;
+               pwcs++;
+               len--;
+               if (u > 0x7f) {
+                       if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
+                               if (u & SURROGATE_LOW) {
+                                       /* Ignore character and move on */
+                                       continue;
+                               }
+                               if (len <= 0)
+                                       break;
+                               v = get_utf16(*pwcs, endian);
+                               if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
+                                               !(v & SURROGATE_LOW)) {
+                                       /* Ignore character and move on */
+                                       continue;
+                               }
+                               u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
+                                               + (v & SURROGATE_BITS);
+                               pwcs++;
+                               len--;
+                       }
+                       size = utf32_to_utf8(u, op, maxlen);
                        if (size == -1) {
                                /* Ignore character and move on */
                        } else {
@@ -149,13 +201,13 @@ utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
                                maxlen -= size;
                        }
                } else {
-                       *op++ = (__u8) *ip;
+                       *op++ = (u8) u;
                        maxlen--;
                }
-               ip++;
        }
-       return (op - s);
+       return op - s;
 }
+EXPORT_SYMBOL(utf16s_to_utf8s);
 
 int register_nls(struct nls_table * nls)
 {
@@ -218,7 +270,8 @@ struct nls_table *load_nls(char *charset)
 
 void unload_nls(struct nls_table *nls)
 {
-       module_put(nls->owner);
+       if (nls)
+               module_put(nls->owner);
 }
 
 static const wchar_t charset2uni[256] = {
@@ -467,9 +520,5 @@ EXPORT_SYMBOL(unregister_nls);
 EXPORT_SYMBOL(unload_nls);
 EXPORT_SYMBOL(load_nls);
 EXPORT_SYMBOL(load_nls_default);
-EXPORT_SYMBOL(utf8_mbtowc);
-EXPORT_SYMBOL(utf8_mbstowcs);
-EXPORT_SYMBOL(utf8_wctomb);
-EXPORT_SYMBOL(utf8_wcstombs);
 
 MODULE_LICENSE("Dual BSD/GPL");