OSDN Git Service

Initial check in.
[android-x86/external-exfat.git] / libexfat / utf.c
1 /*
2  *  utf.c
3  *  exFAT file system implementation library.
4  *
5  *  Created by Andrew Nayenko on 13.09.09.
6  *  This software is distributed under the GNU General Public License 
7  *  version 3 or any later.
8  */
9
10 #include "exfat.h"
11 #include <errno.h>
12
13 static char* wchar_to_utf8(char* output, wchar_t wc, size_t outsize)
14 {
15         if (wc <= 0x7f)
16         {
17                 if (outsize < 1)
18                         return NULL;
19                 *output++ = (char) wc;
20         }
21         else if (wc <= 0x7ff)
22         {
23                 if (outsize < 2)
24                         return NULL;
25                 *output++ = 0xc0 | (wc >> 6);
26                 *output++ = 0x80 | (wc & 0x3f);
27         }
28         else if (wc <= 0xffff)
29         {
30                 if (outsize < 3)
31                         return NULL;
32                 *output++ = 0xe0 | (wc >> 12);
33                 *output++ = 0x80 | ((wc >> 6) & 0x3f);
34                 *output++ = 0x80 | (wc & 0x3f);
35         }
36         else if (wc <= 0x1fffff)
37         {
38                 if (outsize < 4)
39                         return NULL;
40                 *output++ = 0xf0 | (wc >> 18);
41                 *output++ = 0x80 | ((wc >> 12) & 0x3f);
42                 *output++ = 0x80 | ((wc >> 6) & 0x3f);
43                 *output++ = 0x80 | (wc & 0x3f);
44         }
45         else if (wc <= 0x3ffffff)
46         {
47                 if (outsize < 5)
48                         return NULL;
49                 *output++ = 0xf8 | (wc >> 24);
50                 *output++ = 0x80 | ((wc >> 18) & 0x3f);
51                 *output++ = 0x80 | ((wc >> 12) & 0x3f);
52                 *output++ = 0x80 | ((wc >> 6) & 0x3f);
53                 *output++ = 0x80 | (wc & 0x3f);
54         }
55         else if (wc <= 0x7fffffff)
56         {
57                 if (outsize < 6)
58                         return NULL;
59                 *output++ = 0xfc | (wc >> 30);
60                 *output++ = 0x80 | ((wc >> 24) & 0x3f);
61                 *output++ = 0x80 | ((wc >> 18) & 0x3f);
62                 *output++ = 0x80 | ((wc >> 12) & 0x3f);
63                 *output++ = 0x80 | ((wc >> 6) & 0x3f);
64                 *output++ = 0x80 | (wc & 0x3f);
65         }
66         else
67                 return NULL;
68
69         return output;
70 }
71
72 static const le16_t* utf16_to_wchar(const le16_t* input, wchar_t* wc,
73                 size_t insize)
74 {
75         if ((le16_to_cpu(input[0]) & 0xfc00) == 0xd800)
76         {
77                 if (insize < 2 || (le16_to_cpu(input[1]) & 0xfc00) != 0xdc00)
78                         return NULL;
79                 *wc = ((wchar_t) (le16_to_cpu(input[0]) & 0x3ff) << 10);
80                 *wc |= (le16_to_cpu(input[1]) & 0x3ff);
81                 return input + 2;
82         }
83         else
84         {
85                 *wc = le16_to_cpu(*input);
86                 return input + 1;
87         }
88 }
89
90 int utf16_to_utf8(char* output, const le16_t* input, size_t outsize,
91                 size_t insize)
92 {
93         const le16_t* inp = input;
94         char* outp = output;
95         wchar_t wc;
96
97         while (inp - input < insize && le16_to_cpu(*inp))
98         {
99                 inp = utf16_to_wchar(inp, &wc, insize - (inp - input));
100                 if (inp == NULL)
101                 {
102                         exfat_error("illegal UTF-16 sequence");
103                         return -EILSEQ;
104                 }
105                 outp = wchar_to_utf8(outp, wc, outsize - (outp - output));
106                 if (outp == NULL)
107                 {
108                         exfat_error("name is too long");
109                         return -ENAMETOOLONG;
110                 }
111         }
112         *outp = '\0';
113         return 0;
114 }
115
116 static const char* utf8_to_wchar(const char* input, wchar_t* wc,
117                 size_t insize)
118 {
119         if ((input[0] & 0x80) == 0 && insize >= 1)
120         {
121                 *wc = (wchar_t) input[0];
122                 return input + 1;
123         }
124         if ((input[0] & 0xe0) == 0xc0 && insize >= 2)
125         {
126                 *wc = (((wchar_t) input[0] & 0x1f) << 6) |
127                        ((wchar_t) input[1] & 0x3f);
128                 return input + 2;
129         }
130         if ((input[0] & 0xf0) == 0xe0 && insize >= 3)
131         {
132                 *wc = (((wchar_t) input[0] & 0x0f) << 12) |
133                       (((wchar_t) input[1] & 0x3f) << 6) |
134                        ((wchar_t) input[2] & 0x3f);
135                 return input + 3;
136         }
137         if ((input[0] & 0xf8) == 0xf0 && insize >= 4)
138         {
139                 *wc = (((wchar_t) input[0] & 0x07) << 18) |
140                       (((wchar_t) input[1] & 0x3f) << 12) |
141                       (((wchar_t) input[2] & 0x3f) << 6) |
142                        ((wchar_t) input[3] & 0x3f);
143                 return input + 4;
144         }
145         if ((input[0] & 0xfc) == 0xf8 && insize >= 5)
146         {
147                 *wc = (((wchar_t) input[0] & 0x03) << 24) |
148                       (((wchar_t) input[1] & 0x3f) << 18) |
149                       (((wchar_t) input[2] & 0x3f) << 12) |
150                       (((wchar_t) input[3] & 0x3f) << 6) |
151                        ((wchar_t) input[4] & 0x3f);
152                 return input + 5;
153         }
154         if ((input[0] & 0xfe) == 0xfc && insize >= 6)
155         {
156                 *wc = (((wchar_t) input[0] & 0x01) << 30) |
157                       (((wchar_t) input[1] & 0x3f) << 24) |
158                       (((wchar_t) input[2] & 0x3f) << 18) |
159                       (((wchar_t) input[3] & 0x3f) << 12) |
160                       (((wchar_t) input[4] & 0x3f) << 6) |
161                        ((wchar_t) input[5] & 0x3f);
162                 return input + 6;
163         }
164         return NULL;
165 }
166
167 static le16_t* wchar_to_utf16(le16_t* output, wchar_t wc, size_t outsize)
168 {
169         if (wc <= 0xffff) /* if character is from BMP */
170         {
171                 if (outsize == 0)
172                 {
173                         exfat_error("name is too long");
174                         return NULL;
175                 }
176                 output[0] = cpu_to_le16(wc);
177                 return output + 1;
178         }
179         if (outsize < 2)
180         {
181                 exfat_error("name is too long");
182                 return NULL;
183         }
184         output[0] = cpu_to_le16(0xd800 | ((wc >> 10) & 0x3ff));
185         output[1] = cpu_to_le16(0xdc00 | (wc & 0x3ff));
186         return output + 2;
187 }
188
189 int utf8_to_utf16(le16_t* output, const char* input, size_t outsize,
190                 size_t insize)
191 {
192         const char* inp = input;
193         le16_t* outp = output;
194         wchar_t wc;
195
196         while (inp - input < insize && *inp)
197         {
198                 inp = utf8_to_wchar(inp, &wc, insize - (inp - input));
199                 if (inp == NULL)
200                 {
201                         exfat_error("illegal UTF-8 sequence");
202                         return -EILSEQ;
203                 }
204                 outp = wchar_to_utf16(outp, wc, outsize - (outp - output));
205                 if (outp == NULL)
206                 {
207                         exfat_error("name is too long");
208                         return -ENAMETOOLONG;
209                 }
210         }
211         *outp = cpu_to_le16(0);
212         return 0;
213 }