OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / encoding / japanese / iso2022jp.go
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package japanese
6
7 import (
8         "unicode/utf8"
9
10         "golang.org/x/text/encoding"
11         "golang.org/x/text/encoding/internal"
12         "golang.org/x/text/encoding/internal/identifier"
13         "golang.org/x/text/transform"
14 )
15
16 // ISO2022JP is the ISO-2022-JP encoding.
17 var ISO2022JP encoding.Encoding = &iso2022JP
18
19 var iso2022JP = internal.Encoding{
20         internal.FuncEncoding{iso2022JPNewDecoder, iso2022JPNewEncoder},
21         "ISO-2022-JP",
22         identifier.ISO2022JP,
23 }
24
25 func iso2022JPNewDecoder() transform.Transformer {
26         return new(iso2022JPDecoder)
27 }
28
29 func iso2022JPNewEncoder() transform.Transformer {
30         return new(iso2022JPEncoder)
31 }
32
33 const (
34         asciiState = iota
35         katakanaState
36         jis0208State
37         jis0212State
38 )
39
40 const asciiEsc = 0x1b
41
42 type iso2022JPDecoder int
43
44 func (d *iso2022JPDecoder) Reset() {
45         *d = asciiState
46 }
47
48 func (d *iso2022JPDecoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
49         r, size := rune(0), 0
50         for ; nSrc < len(src); nSrc += size {
51                 c0 := src[nSrc]
52                 if c0 >= utf8.RuneSelf {
53                         r, size = '\ufffd', 1
54                         goto write
55                 }
56
57                 if c0 == asciiEsc {
58                         if nSrc+2 >= len(src) {
59                                 if !atEOF {
60                                         return nDst, nSrc, transform.ErrShortSrc
61                                 }
62                                 // TODO: is it correct to only skip 1??
63                                 r, size = '\ufffd', 1
64                                 goto write
65                         }
66                         size = 3
67                         c1 := src[nSrc+1]
68                         c2 := src[nSrc+2]
69                         switch {
70                         case c1 == '$' && (c2 == '@' || c2 == 'B'): // 0x24 {0x40, 0x42}
71                                 *d = jis0208State
72                                 continue
73                         case c1 == '$' && c2 == '(': // 0x24 0x28
74                                 if nSrc+3 >= len(src) {
75                                         if !atEOF {
76                                                 return nDst, nSrc, transform.ErrShortSrc
77                                         }
78                                         r, size = '\ufffd', 1
79                                         goto write
80                                 }
81                                 size = 4
82                                 if src[nSrc+3] == 'D' {
83                                         *d = jis0212State
84                                         continue
85                                 }
86                         case c1 == '(' && (c2 == 'B' || c2 == 'J'): // 0x28 {0x42, 0x4A}
87                                 *d = asciiState
88                                 continue
89                         case c1 == '(' && c2 == 'I': // 0x28 0x49
90                                 *d = katakanaState
91                                 continue
92                         }
93                         r, size = '\ufffd', 1
94                         goto write
95                 }
96
97                 switch *d {
98                 case asciiState:
99                         r, size = rune(c0), 1
100
101                 case katakanaState:
102                         if c0 < 0x21 || 0x60 <= c0 {
103                                 r, size = '\ufffd', 1
104                                 goto write
105                         }
106                         r, size = rune(c0)+(0xff61-0x21), 1
107
108                 default:
109                         if c0 == 0x0a {
110                                 *d = asciiState
111                                 r, size = rune(c0), 1
112                                 goto write
113                         }
114                         if nSrc+1 >= len(src) {
115                                 if !atEOF {
116                                         return nDst, nSrc, transform.ErrShortSrc
117                                 }
118                                 r, size = '\ufffd', 1
119                                 goto write
120                         }
121                         size = 2
122                         c1 := src[nSrc+1]
123                         i := int(c0-0x21)*94 + int(c1-0x21)
124                         if *d == jis0208State && i < len(jis0208Decode) {
125                                 r = rune(jis0208Decode[i])
126                         } else if *d == jis0212State && i < len(jis0212Decode) {
127                                 r = rune(jis0212Decode[i])
128                         } else {
129                                 r = '\ufffd'
130                                 goto write
131                         }
132                         if r == 0 {
133                                 r = '\ufffd'
134                         }
135                 }
136
137         write:
138                 if nDst+utf8.RuneLen(r) > len(dst) {
139                         return nDst, nSrc, transform.ErrShortDst
140                 }
141                 nDst += utf8.EncodeRune(dst[nDst:], r)
142         }
143         return nDst, nSrc, err
144 }
145
146 type iso2022JPEncoder int
147
148 func (e *iso2022JPEncoder) Reset() {
149         *e = asciiState
150 }
151
152 func (e *iso2022JPEncoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
153         r, size := rune(0), 0
154         for ; nSrc < len(src); nSrc += size {
155                 r = rune(src[nSrc])
156
157                 // Decode a 1-byte rune.
158                 if r < utf8.RuneSelf {
159                         size = 1
160
161                 } else {
162                         // Decode a multi-byte rune.
163                         r, size = utf8.DecodeRune(src[nSrc:])
164                         if size == 1 {
165                                 // All valid runes of size 1 (those below utf8.RuneSelf) were
166                                 // handled above. We have invalid UTF-8 or we haven't seen the
167                                 // full character yet.
168                                 if !atEOF && !utf8.FullRune(src[nSrc:]) {
169                                         err = transform.ErrShortSrc
170                                         break
171                                 }
172                         }
173
174                         // func init checks that the switch covers all tables.
175                         //
176                         // http://encoding.spec.whatwg.org/#iso-2022-jp says that "the index jis0212
177                         // is not used by the iso-2022-jp encoder due to lack of widespread support".
178                         //
179                         // TODO: do we have to special-case U+00A5 and U+203E, as per
180                         // http://encoding.spec.whatwg.org/#iso-2022-jp
181                         // Doing so would mean that "\u00a5" would not be preserved
182                         // after an encode-decode round trip.
183                         switch {
184                         case encode0Low <= r && r < encode0High:
185                                 if r = rune(encode0[r-encode0Low]); r>>tableShift == jis0208 {
186                                         goto writeJIS
187                                 }
188                         case encode1Low <= r && r < encode1High:
189                                 if r = rune(encode1[r-encode1Low]); r>>tableShift == jis0208 {
190                                         goto writeJIS
191                                 }
192                         case encode2Low <= r && r < encode2High:
193                                 if r = rune(encode2[r-encode2Low]); r>>tableShift == jis0208 {
194                                         goto writeJIS
195                                 }
196                         case encode3Low <= r && r < encode3High:
197                                 if r = rune(encode3[r-encode3Low]); r>>tableShift == jis0208 {
198                                         goto writeJIS
199                                 }
200                         case encode4Low <= r && r < encode4High:
201                                 if r = rune(encode4[r-encode4Low]); r>>tableShift == jis0208 {
202                                         goto writeJIS
203                                 }
204                         case encode5Low <= r && r < encode5High:
205                                 if 0xff61 <= r && r < 0xffa0 {
206                                         goto writeKatakana
207                                 }
208                                 if r = rune(encode5[r-encode5Low]); r>>tableShift == jis0208 {
209                                         goto writeJIS
210                                 }
211                         }
212
213                         // Switch back to ASCII state in case of error so that an ASCII
214                         // replacement character can be written in the correct state.
215                         if *e != asciiState {
216                                 if nDst+3 > len(dst) {
217                                         err = transform.ErrShortDst
218                                         break
219                                 }
220                                 *e = asciiState
221                                 dst[nDst+0] = asciiEsc
222                                 dst[nDst+1] = '('
223                                 dst[nDst+2] = 'B'
224                                 nDst += 3
225                         }
226                         err = internal.ErrASCIIReplacement
227                         break
228                 }
229
230                 if *e != asciiState {
231                         if nDst+4 > len(dst) {
232                                 err = transform.ErrShortDst
233                                 break
234                         }
235                         *e = asciiState
236                         dst[nDst+0] = asciiEsc
237                         dst[nDst+1] = '('
238                         dst[nDst+2] = 'B'
239                         nDst += 3
240                 } else if nDst >= len(dst) {
241                         err = transform.ErrShortDst
242                         break
243                 }
244                 dst[nDst] = uint8(r)
245                 nDst++
246                 continue
247
248         writeJIS:
249                 if *e != jis0208State {
250                         if nDst+5 > len(dst) {
251                                 err = transform.ErrShortDst
252                                 break
253                         }
254                         *e = jis0208State
255                         dst[nDst+0] = asciiEsc
256                         dst[nDst+1] = '$'
257                         dst[nDst+2] = 'B'
258                         nDst += 3
259                 } else if nDst+2 > len(dst) {
260                         err = transform.ErrShortDst
261                         break
262                 }
263                 dst[nDst+0] = 0x21 + uint8(r>>codeShift)&codeMask
264                 dst[nDst+1] = 0x21 + uint8(r)&codeMask
265                 nDst += 2
266                 continue
267
268         writeKatakana:
269                 if *e != katakanaState {
270                         if nDst+4 > len(dst) {
271                                 err = transform.ErrShortDst
272                                 break
273                         }
274                         *e = katakanaState
275                         dst[nDst+0] = asciiEsc
276                         dst[nDst+1] = '('
277                         dst[nDst+2] = 'I'
278                         nDst += 3
279                 } else if nDst >= len(dst) {
280                         err = transform.ErrShortDst
281                         break
282                 }
283                 dst[nDst] = uint8(r - (0xff61 - 0x21))
284                 nDst++
285                 continue
286         }
287         if atEOF && err == nil && *e != asciiState {
288                 if nDst+3 > len(dst) {
289                         err = transform.ErrShortDst
290                 } else {
291                         *e = asciiState
292                         dst[nDst+0] = asciiEsc
293                         dst[nDst+1] = '('
294                         dst[nDst+2] = 'B'
295                         nDst += 3
296                 }
297         }
298         return nDst, nSrc, err
299 }