#include <linux/nls.h>
#include <linux/kernel.h>
#include <linux/errno.h>
-#ifdef CONFIG_KMOD
#include <linux/kmod.h>
-#endif
#include <linux/spinlock.h>
+#include <asm/byteorder.h>
static struct nls_table default_table;
static struct nls_table *tables = &default_table;
long lval;
};
-static struct utf8_table utf8_table[] =
+static const struct utf8_table utf8_table[] =
{
{0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
{0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
{0, /* end of table */}
};
-int
-utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
+#define UNICODE_MAX 0x0010ffff
+#define PLANE_SIZE 0x00010000
+
+#define SURROGATE_MASK 0xfffff800
+#define SURROGATE_PAIR 0x0000d800
+#define SURROGATE_LOW 0x00000400
+#define SURROGATE_BITS 0x000003ff
+
+int utf8_to_utf32(const u8 *s, int len, unicode_t *pu)
{
- long l;
+ unsigned long l;
int c0, c, nc;
- struct utf8_table *t;
+ const struct utf8_table *t;
nc = 0;
c0 = *s;
nc++;
if ((c0 & t->cmask) == t->cval) {
l &= t->lmask;
- if (l < t->lval)
+ if (l < t->lval || l > UNICODE_MAX ||
+ (l & SURROGATE_MASK) == SURROGATE_PAIR)
return -1;
- *p = l;
+ *pu = (unicode_t) l;
return nc;
}
- if (n <= nc)
+ if (len <= nc)
return -1;
s++;
c = (*s ^ 0x80) & 0xFF;
}
return -1;
}
+EXPORT_SYMBOL(utf8_to_utf32);
-int
-utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
-{
- __u16 *op;
- const __u8 *ip;
- int size;
-
- op = pwcs;
- ip = s;
- while (*ip && n > 0) {
- if (*ip & 0x80) {
- size = utf8_mbtowc(op, ip, n);
- if (size == -1) {
- /* Ignore character and move on */
- ip++;
- n--;
- } else {
- op++;
- ip += size;
- n -= size;
- }
- } else {
- *op++ = *ip++;
- n--;
- }
- }
- return (op - pwcs);
-}
-
-int
-utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
+int utf32_to_utf8(unicode_t u, u8 *s, int maxlen)
{
- long l;
+ unsigned long l;
int c, nc;
- struct utf8_table *t;
-
- if (s == 0)
+ const struct utf8_table *t;
+
+ if (!s)
return 0;
-
- l = wc;
+
+ l = u;
+ if (l > UNICODE_MAX || (l & SURROGATE_MASK) == SURROGATE_PAIR)
+ return -1;
+
nc = 0;
for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
nc++;
if (l <= t->lmask) {
c = t->shift;
- *s = t->cval | (l >> c);
+ *s = (u8) (t->cval | (l >> c));
while (c > 0) {
c -= 6;
s++;
- *s = 0x80 | ((l >> c) & 0x3F);
+ *s = (u8) (0x80 | ((l >> c) & 0x3F));
}
return nc;
}
}
return -1;
}
+EXPORT_SYMBOL(utf32_to_utf8);
+
+int utf8s_to_utf16s(const u8 *s, int len, wchar_t *pwcs)
+{
+ u16 *op;
+ int size;
+ unicode_t u;
+
+ op = pwcs;
+ while (*s && len > 0) {
+ if (*s & 0x80) {
+ size = utf8_to_utf32(s, len, &u);
+ if (size < 0)
+ return -EINVAL;
+
+ if (u >= PLANE_SIZE) {
+ u -= PLANE_SIZE;
+ *op++ = (wchar_t) (SURROGATE_PAIR |
+ ((u >> 10) & SURROGATE_BITS));
+ *op++ = (wchar_t) (SURROGATE_PAIR |
+ SURROGATE_LOW |
+ (u & SURROGATE_BITS));
+ } else {
+ *op++ = (wchar_t) u;
+ }
+ s += size;
+ len -= size;
+ } else {
+ *op++ = *s++;
+ len--;
+ }
+ }
+ return op - pwcs;
+}
+EXPORT_SYMBOL(utf8s_to_utf16s);
+
+static inline unsigned long get_utf16(unsigned c, enum utf16_endian endian)
+{
+ switch (endian) {
+ default:
+ return c;
+ case UTF16_LITTLE_ENDIAN:
+ return __le16_to_cpu(c);
+ case UTF16_BIG_ENDIAN:
+ return __be16_to_cpu(c);
+ }
+}
-int
-utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
+int utf16s_to_utf8s(const wchar_t *pwcs, int len, enum utf16_endian endian,
+ u8 *s, int maxlen)
{
- const __u16 *ip;
- __u8 *op;
+ u8 *op;
int size;
+ unsigned long u, v;
op = s;
- ip = pwcs;
- while (*ip && maxlen > 0) {
- if (*ip > 0x7f) {
- size = utf8_wctomb(op, *ip, maxlen);
+ while (len > 0 && maxlen > 0) {
+ u = get_utf16(*pwcs, endian);
+ if (!u)
+ break;
+ pwcs++;
+ len--;
+ if (u > 0x7f) {
+ if ((u & SURROGATE_MASK) == SURROGATE_PAIR) {
+ if (u & SURROGATE_LOW) {
+ /* Ignore character and move on */
+ continue;
+ }
+ if (len <= 0)
+ break;
+ v = get_utf16(*pwcs, endian);
+ if ((v & SURROGATE_MASK) != SURROGATE_PAIR ||
+ !(v & SURROGATE_LOW)) {
+ /* Ignore character and move on */
+ continue;
+ }
+ u = PLANE_SIZE + ((u & SURROGATE_BITS) << 10)
+ + (v & SURROGATE_BITS);
+ pwcs++;
+ len--;
+ }
+ size = utf32_to_utf8(u, op, maxlen);
if (size == -1) {
/* Ignore character and move on */
- maxlen--;
} else {
op += size;
maxlen -= size;
}
} else {
- *op++ = (__u8) *ip;
+ *op++ = (u8) u;
+ maxlen--;
}
- ip++;
}
- return (op - s);
+ return op - s;
}
+EXPORT_SYMBOL(utf16s_to_utf8s);
int register_nls(struct nls_table * nls)
{
struct nls_table *load_nls(char *charset)
{
- struct nls_table *nls;
-#ifdef CONFIG_KMOD
- int ret;
-#endif
-
- nls = find_nls(charset);
- if (nls)
- return nls;
-
-#ifdef CONFIG_KMOD
- ret = request_module("nls_%s", charset);
- if (ret != 0) {
- printk("Unable to load NLS charset %s\n", charset);
- return NULL;
- }
- nls = find_nls(charset);
-#endif
- return nls;
+ return try_then_request_module(find_nls(charset), "nls_%s", charset);
}
void unload_nls(struct nls_table *nls)
{
- module_put(nls->owner);
+ if (nls)
+ module_put(nls->owner);
}
-static wchar_t charset2uni[256] = {
+static const wchar_t charset2uni[256] = {
/* 0x00*/
0x0000, 0x0001, 0x0002, 0x0003,
0x0004, 0x0005, 0x0006, 0x0007,
0x00fc, 0x00fd, 0x00fe, 0x00ff,
};
-static unsigned char page00[256] = {
+static const unsigned char page00[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
};
-static unsigned char *page_uni2charset[256] = {
+static const unsigned char *const page_uni2charset[256] = {
page00
};
-static unsigned char charset2lower[256] = {
+static const unsigned char charset2lower[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
};
-static unsigned char charset2upper[256] = {
+static const unsigned char charset2upper[256] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
{
- unsigned char *uni2charset;
+ const unsigned char *uni2charset;
unsigned char cl = uni & 0x00ff;
unsigned char ch = (uni & 0xff00) >> 8;
EXPORT_SYMBOL(unload_nls);
EXPORT_SYMBOL(load_nls);
EXPORT_SYMBOL(load_nls_default);
-EXPORT_SYMBOL(utf8_mbtowc);
-EXPORT_SYMBOL(utf8_mbstowcs);
-EXPORT_SYMBOL(utf8_wctomb);
-EXPORT_SYMBOL(utf8_wcstombs);
MODULE_LICENSE("Dual BSD/GPL");