4 * Native language support--charsets and unicode translations.
5 * By Gordon Chaffee 1996, 1997
7 * Unicode based case conversion 1999 by Wolfram Pienkoss
11 #include <linux/version.h>
12 #include <linux/module.h>
13 #include <linux/string.h>
14 #include <linux/config.h>
15 #include <linux/nls.h>
16 #include <linux/slab.h>
17 #include <linux/errno.h>
19 #include <linux/kmod.h>
21 #include <linux/spinlock.h>
23 static struct nls_table *tables;
24 static spinlock_t nls_lock = SPIN_LOCK_UNLOCKED;
27 * Sample implementation from Unicode home page.
28 * http://www.stonehand.com/unicode/standard/fss-utf.html
38 static struct utf8_table utf8_table[] =
40 {0x80, 0x00, 0*6, 0x7F, 0, /* 1 byte sequence */},
41 {0xE0, 0xC0, 1*6, 0x7FF, 0x80, /* 2 byte sequence */},
42 {0xF0, 0xE0, 2*6, 0xFFFF, 0x800, /* 3 byte sequence */},
43 {0xF8, 0xF0, 3*6, 0x1FFFFF, 0x10000, /* 4 byte sequence */},
44 {0xFC, 0xF8, 4*6, 0x3FFFFFF, 0x200000, /* 5 byte sequence */},
45 {0xFE, 0xFC, 5*6, 0x7FFFFFFF, 0x4000000, /* 6 byte sequence */},
46 {0, /* end of table */}
50 utf8_mbtowc(wchar_t *p, const __u8 *s, int n)
59 for (t = utf8_table; t->cmask; t++) {
61 if ((c0 & t->cmask) == t->cval) {
71 c = (*s ^ 0x80) & 0xFF;
80 utf8_mbstowcs(wchar_t *pwcs, const __u8 *s, int n)
88 while (*ip && n > 0) {
90 size = utf8_mbtowc(op, ip, n);
92 /* Ignore character and move on */
108 utf8_wctomb(__u8 *s, wchar_t wc, int maxlen)
112 struct utf8_table *t;
119 for (t = utf8_table; t->cmask && maxlen; t++, maxlen--) {
123 *s = t->cval | (l >> c);
127 *s = 0x80 | ((l >> c) & 0x3F);
136 utf8_wcstombs(__u8 *s, const wchar_t *pwcs, int maxlen)
144 while (*ip && maxlen > 0) {
146 size = utf8_wctomb(op, *ip, maxlen);
148 /* Ignore character and move on */
162 int register_nls(struct nls_table * nls)
164 struct nls_table ** tmp = &tables;
171 spin_lock(&nls_lock);
174 spin_unlock(&nls_lock);
181 spin_unlock(&nls_lock);
185 int unregister_nls(struct nls_table * nls)
187 struct nls_table ** tmp = &tables;
189 spin_lock(&nls_lock);
193 spin_unlock(&nls_lock);
198 spin_unlock(&nls_lock);
202 static struct nls_table *find_nls(char *charset)
204 struct nls_table *nls;
205 spin_lock(&nls_lock);
206 for (nls = tables; nls; nls = nls->next)
207 if (! strcmp(nls->charset, charset))
209 if (nls && !try_inc_mod_count(nls->owner))
211 spin_unlock(&nls_lock);
215 struct nls_table *load_nls(char *charset)
217 struct nls_table *nls;
223 nls = find_nls(charset);
228 if (strlen(charset) > sizeof(buf) - sizeof("nls_")) {
229 printk("Unable to load NLS charset %s: name too long\n",
234 sprintf(buf, "nls_%s", charset);
235 ret = request_module(buf);
237 printk("Unable to load NLS charset %s\n", charset);
240 nls = find_nls(charset);
245 void unload_nls(struct nls_table *nls)
248 __MOD_DEC_USE_COUNT(nls->owner);
251 wchar_t charset2uni[256] = {
253 0x0000, 0x0001, 0x0002, 0x0003,
254 0x0004, 0x0005, 0x0006, 0x0007,
255 0x0008, 0x0009, 0x000a, 0x000b,
256 0x000c, 0x000d, 0x000e, 0x000f,
258 0x0010, 0x0011, 0x0012, 0x0013,
259 0x0014, 0x0015, 0x0016, 0x0017,
260 0x0018, 0x0019, 0x001a, 0x001b,
261 0x001c, 0x001d, 0x001e, 0x001f,
263 0x0020, 0x0021, 0x0022, 0x0023,
264 0x0024, 0x0025, 0x0026, 0x0027,
265 0x0028, 0x0029, 0x002a, 0x002b,
266 0x002c, 0x002d, 0x002e, 0x002f,
268 0x0030, 0x0031, 0x0032, 0x0033,
269 0x0034, 0x0035, 0x0036, 0x0037,
270 0x0038, 0x0039, 0x003a, 0x003b,
271 0x003c, 0x003d, 0x003e, 0x003f,
273 0x0040, 0x0041, 0x0042, 0x0043,
274 0x0044, 0x0045, 0x0046, 0x0047,
275 0x0048, 0x0049, 0x004a, 0x004b,
276 0x004c, 0x004d, 0x004e, 0x004f,
278 0x0050, 0x0051, 0x0052, 0x0053,
279 0x0054, 0x0055, 0x0056, 0x0057,
280 0x0058, 0x0059, 0x005a, 0x005b,
281 0x005c, 0x005d, 0x005e, 0x005f,
283 0x0060, 0x0061, 0x0062, 0x0063,
284 0x0064, 0x0065, 0x0066, 0x0067,
285 0x0068, 0x0069, 0x006a, 0x006b,
286 0x006c, 0x006d, 0x006e, 0x006f,
288 0x0070, 0x0071, 0x0072, 0x0073,
289 0x0074, 0x0075, 0x0076, 0x0077,
290 0x0078, 0x0079, 0x007a, 0x007b,
291 0x007c, 0x007d, 0x007e, 0x007f,
293 0x0080, 0x0081, 0x0082, 0x0083,
294 0x0084, 0x0085, 0x0086, 0x0087,
295 0x0088, 0x0089, 0x008a, 0x008b,
296 0x008c, 0x008d, 0x008e, 0x008f,
298 0x0090, 0x0091, 0x0092, 0x0093,
299 0x0094, 0x0095, 0x0096, 0x0097,
300 0x0098, 0x0099, 0x009a, 0x009b,
301 0x009c, 0x009d, 0x009e, 0x009f,
303 0x00a0, 0x00a1, 0x00a2, 0x00a3,
304 0x00a4, 0x00a5, 0x00a6, 0x00a7,
305 0x00a8, 0x00a9, 0x00aa, 0x00ab,
306 0x00ac, 0x00ad, 0x00ae, 0x00af,
308 0x00b0, 0x00b1, 0x00b2, 0x00b3,
309 0x00b4, 0x00b5, 0x00b6, 0x00b7,
310 0x00b8, 0x00b9, 0x00ba, 0x00bb,
311 0x00bc, 0x00bd, 0x00be, 0x00bf,
313 0x00c0, 0x00c1, 0x00c2, 0x00c3,
314 0x00c4, 0x00c5, 0x00c6, 0x00c7,
315 0x00c8, 0x00c9, 0x00ca, 0x00cb,
316 0x00cc, 0x00cd, 0x00ce, 0x00cf,
318 0x00d0, 0x00d1, 0x00d2, 0x00d3,
319 0x00d4, 0x00d5, 0x00d6, 0x00d7,
320 0x00d8, 0x00d9, 0x00da, 0x00db,
321 0x00dc, 0x00dd, 0x00de, 0x00df,
323 0x00e0, 0x00e1, 0x00e2, 0x00e3,
324 0x00e4, 0x00e5, 0x00e6, 0x00e7,
325 0x00e8, 0x00e9, 0x00ea, 0x00eb,
326 0x00ec, 0x00ed, 0x00ee, 0x00ef,
328 0x00f0, 0x00f1, 0x00f2, 0x00f3,
329 0x00f4, 0x00f5, 0x00f6, 0x00f7,
330 0x00f8, 0x00f9, 0x00fa, 0x00fb,
331 0x00fc, 0x00fd, 0x00fe, 0x00ff,
334 static unsigned char page00[256] = {
335 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
336 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
337 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
338 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
339 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
340 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
341 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
342 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
343 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
344 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
345 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
346 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
347 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
348 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
349 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
350 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
352 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
353 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
354 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
355 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
356 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
357 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
358 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
359 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
360 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
361 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
362 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
363 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
364 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
365 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
366 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
367 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
370 static unsigned char *page_uni2charset[256] = {
374 static unsigned char charset2lower[256] = {
375 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
376 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
377 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
378 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
379 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
380 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
381 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
382 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
383 0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x40-0x47 */
384 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x48-0x4f */
385 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x50-0x57 */
386 0x78, 0x79, 0x7a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
387 0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, /* 0x60-0x67 */
388 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0x6e, 0x6f, /* 0x68-0x6f */
389 0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, /* 0x70-0x77 */
390 0x78, 0x79, 0x7a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
392 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
393 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
394 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
395 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
396 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
397 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
398 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
399 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
400 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
401 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
402 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
403 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
404 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
405 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
406 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
407 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
410 static unsigned char charset2upper[256] = {
411 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, /* 0x00-0x07 */
412 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, /* 0x08-0x0f */
413 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, /* 0x10-0x17 */
414 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, /* 0x18-0x1f */
415 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, /* 0x20-0x27 */
416 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, 0x2e, 0x2f, /* 0x28-0x2f */
417 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, /* 0x30-0x37 */
418 0x38, 0x39, 0x3a, 0x3b, 0x3c, 0x3d, 0x3e, 0x3f, /* 0x38-0x3f */
419 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x40-0x47 */
420 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x48-0x4f */
421 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x50-0x57 */
422 0x58, 0x59, 0x5a, 0x5b, 0x5c, 0x5d, 0x5e, 0x5f, /* 0x58-0x5f */
423 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, /* 0x60-0x67 */
424 0x48, 0x49, 0x4a, 0x4b, 0x4c, 0x4d, 0x4e, 0x4f, /* 0x68-0x6f */
425 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, /* 0x70-0x77 */
426 0x58, 0x59, 0x5a, 0x7b, 0x7c, 0x7d, 0x7e, 0x7f, /* 0x78-0x7f */
428 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, /* 0x80-0x87 */
429 0x88, 0x89, 0x8a, 0x8b, 0x8c, 0x8d, 0x8e, 0x8f, /* 0x88-0x8f */
430 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, /* 0x90-0x97 */
431 0x98, 0x99, 0x9a, 0x9b, 0x9c, 0x9d, 0x9e, 0x9f, /* 0x98-0x9f */
432 0xa0, 0xa1, 0xa2, 0xa3, 0xa4, 0xa5, 0xa6, 0xa7, /* 0xa0-0xa7 */
433 0xa8, 0xa9, 0xaa, 0xab, 0xac, 0xad, 0xae, 0xaf, /* 0xa8-0xaf */
434 0xb0, 0xb1, 0xb2, 0xb3, 0xb4, 0xb5, 0xb6, 0xb7, /* 0xb0-0xb7 */
435 0xb8, 0xb9, 0xba, 0xbb, 0xbc, 0xbd, 0xbe, 0xbf, /* 0xb8-0xbf */
436 0xc0, 0xc1, 0xc2, 0xc3, 0xc4, 0xc5, 0xc6, 0xc7, /* 0xc0-0xc7 */
437 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xce, 0xcf, /* 0xc8-0xcf */
438 0xd0, 0xd1, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, /* 0xd0-0xd7 */
439 0xd8, 0xd9, 0xda, 0xdb, 0xdc, 0xdd, 0xde, 0xdf, /* 0xd8-0xdf */
440 0xe0, 0xe1, 0xe2, 0xe3, 0xe4, 0xe5, 0xe6, 0xe7, /* 0xe0-0xe7 */
441 0xe8, 0xe9, 0xea, 0xeb, 0xec, 0xed, 0xee, 0xef, /* 0xe8-0xef */
442 0xf0, 0xf1, 0xf2, 0xf3, 0xf4, 0xf5, 0xf6, 0xf7, /* 0xf0-0xf7 */
443 0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* 0xf8-0xff */
447 static int uni2char(wchar_t uni, unsigned char *out, int boundlen)
449 unsigned char *uni2charset;
450 unsigned char cl = uni & 0x00ff;
451 unsigned char ch = (uni & 0xff00) >> 8;
454 return -ENAMETOOLONG;
456 uni2charset = page_uni2charset[ch];
457 if (uni2charset && uni2charset[cl])
458 out[0] = uni2charset[cl];
464 static int char2uni(const unsigned char *rawstring, int boundlen, wchar_t *uni)
466 *uni = charset2uni[*rawstring];
472 static struct nls_table default_table = {
481 /* Returns a simple default translation table */
482 struct nls_table *load_nls_default(void)
484 struct nls_table *default_nls;
486 default_nls = load_nls(CONFIG_NLS_DEFAULT);
487 if (default_nls != NULL)
490 return &default_table;
493 EXPORT_SYMBOL(register_nls);
494 EXPORT_SYMBOL(unregister_nls);
495 EXPORT_SYMBOL(unload_nls);
496 EXPORT_SYMBOL(load_nls);
497 EXPORT_SYMBOL(load_nls_default);
498 EXPORT_SYMBOL(utf8_mbtowc);
499 EXPORT_SYMBOL(utf8_mbstowcs);
500 EXPORT_SYMBOL(utf8_wctomb);
501 EXPORT_SYMBOL(utf8_wcstombs);