2 * Copyright (C) 2005-2011 Atsushi Konno All rights reserved.
3 * Copyright (C) 2005 QSDN,Inc. All rights reserved.
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
9 * http://www.apache.org/licenses/LICENSE-2.0
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
18 #include "chxj_encoding.h"
19 #include "chxj_apply_convrule.h"
20 #include "chxj_url_encode.h"
21 #include "chxj_dump_string.h"
27 chxj_encoding(request_rec *r, const char *src, apr_size_t *len)
37 mod_chxj_config *dconf;
38 mod_chxj_req_config *req_conf;
39 chxjconvrule_entry *entryp;
43 DBG(r,"REQ[%X] start %s()",TO_ADDR(r),__func__);
45 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
48 DBG(r,"REQ[%X] none encoding.", TO_ADDR(r));
49 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
53 ERR(r,"REQ[%X] runtime exception: chxj_encoding(): invalid string size.[%d]", TO_ADDR(r),(int)*len);
54 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
55 return (char *)apr_pstrdup(r->pool, "");
58 req_conf = chxj_get_module_config(r->request_config, &chxj_module);
59 /*-------------------------------------------------------------------------*/
60 /* already setup entryp if request_conf->user_agent is not null */
61 /*-------------------------------------------------------------------------*/
62 if (req_conf->user_agent) {
63 entryp = req_conf->entryp;
66 entryp = chxj_apply_convrule(r, dconf->convrules);
68 if (entryp->encoding == NULL) {
69 DBG(r,"REQ[%X] none encoding.", TO_ADDR(r));
70 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
74 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
75 DBG(r,"REQ[%X] none encoding.", TO_ADDR(r));
76 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
80 apr_pool_create(&pool, r->pool);
82 ibuf = apr_palloc(pool, ilen+1);
84 ERR(r,"REQ[%X] runtime exception: chxj_encoding(): Out of memory.",TO_ADDR(r));
85 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
88 memset(ibuf, 0, ilen+1);
89 memcpy(ibuf, src, ilen);
92 spos = obuf = apr_palloc(pool, olen);
94 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
97 DBG(r,"REQ[%X] encode convert [%s] -> [%s]", TO_ADDR(r), entryp->encoding, "CP932");
99 memset(obuf, 0, olen);
100 cd = iconv_open("CP932", entryp->encoding);
101 if (cd == (iconv_t)-1) {
102 if (EINVAL == errno) {
103 ERR(r,"REQ[%X] The conversion from %s to %s is not supported by the implementation.", TO_ADDR(r),entryp->encoding, "CP932");
106 ERR(r,"REQ[%X] iconv open failed. from:[%s] to:[%s] errno:[%d]", TO_ADDR(r),entryp->encoding, "CP932", errno);
108 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
112 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
113 if (result == (size_t)(-1)) {
114 if (E2BIG == errno) {
115 ERR(r, "REQ[%X] There is not sufficient room at *outbuf.",TO_ADDR(r));
118 else if (EILSEQ == errno) {
119 ERR(r,"REQ[%X] %s:%d An invalid multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),__FILE__,__LINE__,ibuf);
120 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
122 else if (EINVAL == errno) {
123 ERR(r,"REQ[%X] An incomplete multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);
131 chxj_dump_string(r, APLOG_MARK, "RESULT Convert Encoding", spos, *len);
132 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
138 chxj_convert_illegal_charactor_sequence(request_rec *r, chxjconvrule_entry *entryp, char **ibuf, apr_size_t *ilen, char **obuf, apr_size_t *olen)
140 if (STRCASEEQ('u','U',"UTF-8", entryp->encoding) || STRCASEEQ('u','U',"UTF8", entryp->encoding)) {
141 if ((0xe0 & **ibuf) == 0xc0) {
142 /* 2byte charactor */
147 DBG(r,"REQ[%X] passed 2byte.",TO_ADDR(r));
149 else if ((0xf0 & **ibuf) == 0xe0) {
150 /* 3byte charactor */
155 DBG(r,"REQ[%X] passed 3byte.",TO_ADDR(r));
157 else if ((0xf8 & **ibuf) == 0xf0) {
158 /* 4byte charactor */
163 DBG(r,"REQ[%X] passed 4byte.",TO_ADDR(r));
165 else if ((0xc0 & **ibuf) == 0x80) {
166 /* 1byte charactor */
171 DBG(r,"REQ[%X] passed 1byte.",TO_ADDR(r));
174 /* unknown charactor */
179 DBG(r,"REQ[%X] passed 1byte.",TO_ADDR(r));
182 else if (STRCASEEQ('e','E', "EUCJP", entryp->encoding)
183 || STRCASEEQ('c','C', "CSEUCPKDFMTJAPANESE", entryp->encoding)
184 || STRCASEEQ('e','E', "EUC-JISX0213", entryp->encoding)
185 || STRCASEEQ('e','E', "EUC-JP-MS", entryp->encoding)
186 || STRCASEEQ('e','E', "EUC-JP", entryp->encoding)
187 || STRCASEEQ('e','E', "EUCJP-MS", entryp->encoding)
188 || STRCASEEQ('e','E', "EUCJP-OPEN", entryp->encoding)
189 || STRCASEEQ('e','E', "EUCJP-WIN", entryp->encoding)
190 || STRCASEEQ('e','E', "EUCJP", entryp->encoding)) {
191 if ((unsigned char)**ibuf == 0x8F) {
192 /* 3byte charactor */
197 DBG(r,"REQ[%X] passed 3byte.",TO_ADDR(r));
200 /* 2byte charactor */
205 DBG(r,"REQ[%X] passed 2byte.",TO_ADDR(r));
208 else if (STRCASEEQ('c', 'C', "CP932", entryp->encoding)
209 || STRCASEEQ('c', 'C', "CSIBM932", entryp->encoding)
210 || STRCASEEQ('i', 'I', "IBM-932", entryp->encoding)
211 || STRCASEEQ('i', 'I', "IBM932", entryp->encoding)
212 || STRCASEEQ('m', 'M', "MS932", entryp->encoding)
213 || STRCASEEQ('m', 'M', "MS_KANJI", entryp->encoding)
214 || STRCASEEQ('s', 'S', "SJIS-OPEN", entryp->encoding)
215 || STRCASEEQ('s', 'S', "SJIS-WIN", entryp->encoding)
216 || STRCASEEQ('s', 'S', "SJIS", entryp->encoding)) {
217 if ( ( ((0x81 <= (unsigned char)**ibuf) && (0x9f >= (unsigned char)**ibuf))
218 || ((0xe0 <= (unsigned char)**ibuf) && (0xfc >= (unsigned char)**ibuf)))
220 ( ((0x40 <= (unsigned char)*((*ibuf)+1)) && (0x7e >= (unsigned char)*((*ibuf)+1)))
221 ||((0x80 <= (unsigned char)*((*ibuf)+1)) && (0xfc >= (unsigned char)*((*ibuf)+1))))) {
222 /* 2byte charactor */
227 DBG(r,"REQ[%X] passed 2byte.",TO_ADDR(r));
230 /* 1byte charactor */
235 DBG(r,"REQ[%X] passed 1byte.",TO_ADDR(r));
239 /* unknown 1byte charactor */
244 DBG(r,"REQ[%X] passed 1byte.",TO_ADDR(r));
247 *ilen = strlen(*ibuf);
248 DBG(r,"REQ[%X] new len = [%" APR_SIZE_T_FMT "].", TO_ADDR(r),(apr_size_t)*ilen);
254 chxj_rencoding(request_rec *r, const char *src, apr_size_t *len,const char *enc)
264 mod_chxj_config *dconf;
265 mod_chxj_req_config *req_conf;
266 chxjconvrule_entry *entryp;
268 DBG(r,"REQ[%X] start %s()",TO_ADDR(r),__func__);
271 ERR(r,"REQ[%X] runtime exception: chxj_rencoding(): invalid string size.[%d]",TO_ADDR(r),(int)*len);
272 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
273 return (char *)apr_pstrdup(r->pool, "");
276 dconf = chxj_get_module_config(r->per_dir_config, &chxj_module);
278 DBG(r,"REQ[%X] none encoding.", TO_ADDR(r));
279 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
283 req_conf = chxj_get_module_config(r->request_config, &chxj_module);
284 /*-------------------------------------------------------------------------*/
285 /* already setup entryp if request_conf->user_agent is not null */
286 /*-------------------------------------------------------------------------*/
287 if (req_conf->user_agent) {
288 entryp = req_conf->entryp;
291 entryp = chxj_apply_convrule(r, dconf->convrules);
293 if (! entryp->encoding) {
294 DBG(r,"REQ[%X] none encoding.",TO_ADDR(r));
295 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
299 if (STRCASEEQ('n','N',"none", entryp->encoding)) {
300 DBG(r,"REQ[%X] none encoding.",TO_ADDR(r));
301 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
306 ibuf = apr_palloc(r->pool, ilen+1);
308 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
312 memset(ibuf, 0, ilen+1);
313 memcpy(ibuf, src, ilen+0);
316 spos = obuf = apr_palloc(r->pool, olen);
318 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
321 char *from_enc = (char *)enc;
325 if (strcasecmp(enc,"Shift_JIS") == 0){
328 DBG(r,"REQ[%X] encode convert [%s] -> [%s]", TO_ADDR(r),from_enc, entryp->encoding);
329 memset(obuf, 0, olen);
331 cd = iconv_open(entryp->encoding, from_enc);
332 if (cd == (iconv_t)-1) {
333 if (EINVAL == errno) {
334 ERR(r,"REQ[%X] The conversion from %s to %s is not supported by the implementation.", TO_ADDR(r),"CP932", entryp->encoding);
336 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
341 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
342 if (result == (size_t)(-1)) {
343 if (E2BIG == errno) {
344 ERR(r,"REQ[%X] There is not sufficient room at *outbuf",TO_ADDR(r));
347 else if (EILSEQ == errno) {
348 ERR(r,"REQ[%X] An invalid multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);
349 chxj_convert_illegal_charactor_sequence(r, entryp, &ibuf, &ilen, &obuf, &olen);
351 else if (EINVAL == errno) {
352 ERR(r,"REQ[%X] An incomplete multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);
361 chxj_dump_string(r, APLOG_MARK, "RESULT Convert REncoding", spos, *len);
362 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
369 chxj_encoding_parameter(request_rec *r, const char *value, int xmlflag)
385 DBG(r,"REQ[%X] start %s()",TO_ADDR(r),__func__);
387 src = apr_pstrdup(r->pool, value);
389 anchor_pos = strchr(src, '#');
392 anchor = apr_pstrdup(r->pool, anchor_pos);
397 spos = strchr(src, '?');
399 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
401 return apr_pstrcat(r->pool, src, "#", anchor, NULL);
408 src_sv = apr_pstrdup(r->pool, src);
409 param = apr_palloc(r->pool, 1);
416 use_amp_flag = (xmlflag) ? 1 : 0;
418 pair = apr_strtok(spos, "&", &pstat);
421 if (strncasecmp(pair, "amp;", 4) == 0) {
425 sep_pos = strchr(pair, '=');
426 if (pair == sep_pos) {
427 key = apr_pstrdup(r->pool, "");
430 key = apr_strtok(pair, "=", &vstat);
434 apr_size_t klen = (apr_size_t)strlen(key);
435 key = chxj_url_decode(r->pool, key);
436 len = (apr_size_t)strlen(key);
438 key = chxj_encoding(r, key, &len);
439 key = chxj_url_encode(r->pool, key);
441 #if 0 /* XXX:2009/4/10 */
442 key = chxj_url_encode(r->pool, key);
445 val = apr_strtok(pair, "=", &vstat);
446 if (! val && sep_pos) {
447 val = apr_pstrdup(r->pool, "");
450 apr_size_t vlen = (apr_size_t)strlen(val);
451 val = chxj_url_decode(r->pool, val);
452 len = (apr_size_t)strlen(val);
454 val = chxj_encoding(r, val, &len);
455 val = chxj_url_encode(r->pool, val);
457 #if 0 /* XXX:2009/4/10 */
458 val = chxj_url_encode(r->pool, val);
460 if (strlen(param) == 0) {
461 param = apr_pstrcat(r->pool, param, key, "=", val, NULL);
465 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
468 param = apr_pstrcat(r->pool, param, "&", key, "=", val, NULL);
473 if (strlen(param) == 0) {
474 param = apr_pstrcat(r->pool, param, key, NULL);
478 param = apr_pstrcat(r->pool, param, "&", key, NULL);
481 param = apr_pstrcat(r->pool, param, "&", key, NULL);
486 DBG(r,"REQ[%X] end %s()",TO_ADDR(r),__func__);
489 return apr_pstrcat(r->pool, src_sv, "?", param, "#", anchor, NULL);
491 return apr_pstrcat(r->pool, src_sv, "?", param, NULL);
497 chxj_iconv(request_rec *r, apr_pool_t *pool, const char *src, apr_size_t *len, const char *from, const char *to)
510 ERR(r,"REQ[%X] runtime exception: chxj_iconv(): invalid string size.[%d]", TO_ADDR(r),(int)*len);
511 return (char *)apr_pstrdup(pool, "");
515 ibuf = apr_palloc(pool, ilen+1);
517 ERR(r,"REQ[%X] runtime exception: chxj_iconv(): Out of memory.",TO_ADDR(r));
520 memset(ibuf, 0, ilen+1);
521 memcpy(ibuf, src, ilen);
524 spos = obuf = apr_palloc(pool, olen);
526 ERR(r,"REQ[%X] %s:%d runtime exception: chxj_iconv(): Out of memory", TO_ADDR(r),APLOG_MARK);
529 memset(obuf, 0, olen);
530 cd = iconv_open(to, from);
531 if (cd == (iconv_t)-1) {
532 if (EINVAL == errno) {
533 ERR(r,"REQ[%X] The conversion from %s to %s is not supported by the implementation.", TO_ADDR(r),from, to);
536 ERR(r,"REQ[%X] iconv open failed. from:[%s] to:[%s] errno:[%d]", TO_ADDR(r),from, to, errno);
541 result = iconv(cd, &ibuf, &ilen, &obuf, &olen);
542 if (result == (size_t)(-1)) {
543 if (E2BIG == errno) {
544 ERR(r,"REQ[%X] There is not sufficient room at *outbuf.",TO_ADDR(r));
546 else if (EILSEQ == errno) {
547 ERR(r,"REQ[%X] An invalid multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);
549 else if (EINVAL == errno) {
550 ERR(r,"REQ[%X] An incomplete multibyte sequence has been encountered in the input. input:[%s]", TO_ADDR(r),ibuf);