OSDN Git Service

new repo
[bytom/vapor.git] / vendor / golang.org / x / text / encoding / simplifiedchinese / hzgb2312.go
1 // Copyright 2013 The Go Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style
3 // license that can be found in the LICENSE file.
4
5 package simplifiedchinese
6
7 import (
8         "unicode/utf8"
9
10         "golang.org/x/text/encoding"
11         "golang.org/x/text/encoding/internal"
12         "golang.org/x/text/encoding/internal/identifier"
13         "golang.org/x/text/transform"
14 )
15
16 // HZGB2312 is the HZ-GB2312 encoding.
17 var HZGB2312 encoding.Encoding = &hzGB2312
18
19 var hzGB2312 = internal.Encoding{
20         internal.FuncEncoding{hzGB2312NewDecoder, hzGB2312NewEncoder},
21         "HZ-GB2312",
22         identifier.HZGB2312,
23 }
24
25 func hzGB2312NewDecoder() transform.Transformer {
26         return new(hzGB2312Decoder)
27 }
28
29 func hzGB2312NewEncoder() transform.Transformer {
30         return new(hzGB2312Encoder)
31 }
32
33 const (
34         asciiState = iota
35         gbState
36 )
37
38 type hzGB2312Decoder int
39
40 func (d *hzGB2312Decoder) Reset() {
41         *d = asciiState
42 }
43
44 func (d *hzGB2312Decoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
45         r, size := rune(0), 0
46 loop:
47         for ; nSrc < len(src); nSrc += size {
48                 c0 := src[nSrc]
49                 if c0 >= utf8.RuneSelf {
50                         r, size = utf8.RuneError, 1
51                         goto write
52                 }
53
54                 if c0 == '~' {
55                         if nSrc+1 >= len(src) {
56                                 if !atEOF {
57                                         err = transform.ErrShortSrc
58                                         break loop
59                                 }
60                                 r = utf8.RuneError
61                                 goto write
62                         }
63                         size = 2
64                         switch src[nSrc+1] {
65                         case '{':
66                                 *d = gbState
67                                 continue
68                         case '}':
69                                 *d = asciiState
70                                 continue
71                         case '~':
72                                 if nDst >= len(dst) {
73                                         err = transform.ErrShortDst
74                                         break loop
75                                 }
76                                 dst[nDst] = '~'
77                                 nDst++
78                                 continue
79                         case '\n':
80                                 continue
81                         default:
82                                 r = utf8.RuneError
83                                 goto write
84                         }
85                 }
86
87                 if *d == asciiState {
88                         r, size = rune(c0), 1
89                 } else {
90                         if nSrc+1 >= len(src) {
91                                 if !atEOF {
92                                         err = transform.ErrShortSrc
93                                         break loop
94                                 }
95                                 r, size = utf8.RuneError, 1
96                                 goto write
97                         }
98                         size = 2
99                         c1 := src[nSrc+1]
100                         if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
101                                 // error
102                         } else if i := int(c0-0x01)*190 + int(c1+0x3f); i < len(decode) {
103                                 r = rune(decode[i])
104                                 if r != 0 {
105                                         goto write
106                                 }
107                         }
108                         if c1 > utf8.RuneSelf {
109                                 // Be consistent and always treat non-ASCII as a single error.
110                                 size = 1
111                         }
112                         r = utf8.RuneError
113                 }
114
115         write:
116                 if nDst+utf8.RuneLen(r) > len(dst) {
117                         err = transform.ErrShortDst
118                         break loop
119                 }
120                 nDst += utf8.EncodeRune(dst[nDst:], r)
121         }
122         return nDst, nSrc, err
123 }
124
125 type hzGB2312Encoder int
126
127 func (d *hzGB2312Encoder) Reset() {
128         *d = asciiState
129 }
130
131 func (e *hzGB2312Encoder) Transform(dst, src []byte, atEOF bool) (nDst, nSrc int, err error) {
132         r, size := rune(0), 0
133         for ; nSrc < len(src); nSrc += size {
134                 r = rune(src[nSrc])
135
136                 // Decode a 1-byte rune.
137                 if r < utf8.RuneSelf {
138                         size = 1
139                         if r == '~' {
140                                 if nDst+2 > len(dst) {
141                                         err = transform.ErrShortDst
142                                         break
143                                 }
144                                 dst[nDst+0] = '~'
145                                 dst[nDst+1] = '~'
146                                 nDst += 2
147                                 continue
148                         } else if *e != asciiState {
149                                 if nDst+3 > len(dst) {
150                                         err = transform.ErrShortDst
151                                         break
152                                 }
153                                 *e = asciiState
154                                 dst[nDst+0] = '~'
155                                 dst[nDst+1] = '}'
156                                 nDst += 2
157                         } else if nDst >= len(dst) {
158                                 err = transform.ErrShortDst
159                                 break
160                         }
161                         dst[nDst] = uint8(r)
162                         nDst += 1
163                         continue
164
165                 }
166
167                 // Decode a multi-byte rune.
168                 r, size = utf8.DecodeRune(src[nSrc:])
169                 if size == 1 {
170                         // All valid runes of size 1 (those below utf8.RuneSelf) were
171                         // handled above. We have invalid UTF-8 or we haven't seen the
172                         // full character yet.
173                         if !atEOF && !utf8.FullRune(src[nSrc:]) {
174                                 err = transform.ErrShortSrc
175                                 break
176                         }
177                 }
178
179                 // func init checks that the switch covers all tables.
180                 switch {
181                 case encode0Low <= r && r < encode0High:
182                         if r = rune(encode0[r-encode0Low]); r != 0 {
183                                 goto writeGB
184                         }
185                 case encode1Low <= r && r < encode1High:
186                         if r = rune(encode1[r-encode1Low]); r != 0 {
187                                 goto writeGB
188                         }
189                 case encode2Low <= r && r < encode2High:
190                         if r = rune(encode2[r-encode2Low]); r != 0 {
191                                 goto writeGB
192                         }
193                 case encode3Low <= r && r < encode3High:
194                         if r = rune(encode3[r-encode3Low]); r != 0 {
195                                 goto writeGB
196                         }
197                 case encode4Low <= r && r < encode4High:
198                         if r = rune(encode4[r-encode4Low]); r != 0 {
199                                 goto writeGB
200                         }
201                 }
202
203         terminateInASCIIState:
204                 // Switch back to ASCII state in case of error so that an ASCII
205                 // replacement character can be written in the correct state.
206                 if *e != asciiState {
207                         if nDst+2 > len(dst) {
208                                 err = transform.ErrShortDst
209                                 break
210                         }
211                         dst[nDst+0] = '~'
212                         dst[nDst+1] = '}'
213                         nDst += 2
214                 }
215                 err = internal.ErrASCIIReplacement
216                 break
217
218         writeGB:
219                 c0 := uint8(r>>8) - 0x80
220                 c1 := uint8(r) - 0x80
221                 if c0 < 0x21 || 0x7e <= c0 || c1 < 0x21 || 0x7f <= c1 {
222                         goto terminateInASCIIState
223                 }
224                 if *e == asciiState {
225                         if nDst+4 > len(dst) {
226                                 err = transform.ErrShortDst
227                                 break
228                         }
229                         *e = gbState
230                         dst[nDst+0] = '~'
231                         dst[nDst+1] = '{'
232                         nDst += 2
233                 } else if nDst+2 > len(dst) {
234                         err = transform.ErrShortDst
235                         break
236                 }
237                 dst[nDst+0] = c0
238                 dst[nDst+1] = c1
239                 nDst += 2
240                 continue
241         }
242         // TODO: should one always terminate in ASCII state to make it safe to
243         // concatenate two HZ-GB2312-encoded strings?
244         return nDst, nSrc, err
245 }