2 * convert from text form of arbitrary data (e.g., keys) to binary
3 * Copyright (C) 2000 Henry Spencer.
5 * This library is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU Library General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or (at your
8 * option) any later version. See <http://www.fsf.org/copyleft/lgpl.txt>.
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
12 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 * License for more details.
15 * RCSID $Id: ttodata.c,v 1.4 2002/03/12 17:04:58 henry Exp $
20 /* converters and misc */
21 static int unhex(const char *, char *, size_t);
22 static int unb64(const char *, char *, size_t);
23 static int untext(const char *, char *, size_t);
24 static const char *badch(const char *, int, char *, size_t);
26 /* internal error codes for converters */
27 #define SHORT (-2) /* internal buffer too short */
28 #define BADPAD (-3) /* bad base64 padding */
29 #define BADCH0 (-4) /* invalid character 0 */
30 #define BADCH1 (-5) /* invalid character 1 */
31 #define BADCH2 (-6) /* invalid character 2 */
32 #define BADCH3 (-7) /* invalid character 3 */
33 #define BADOFF(code) (BADCH0-(code))
36 - ttodatav - convert text to data, with verbose error reports
37 * If some of this looks slightly odd, it's because it has changed
38 * repeatedly (from the original atodata()) without a major rewrite.
40 const char * /* NULL on success, else literal or errp */
41 ttodatav(src, srclen, base, dst, dstlen, lenp, errp, errlen)
43 size_t srclen; /* 0 means apply strlen() */
44 int base; /* 0 means figure it out */
45 char *dst; /* need not be valid if dstlen is 0 */
47 size_t *lenp; /* where to record length (NULL is nowhere) */
48 char *errp; /* error buffer */
51 size_t ingroup; /* number of input bytes converted at once */
52 char buf[4]; /* output from conversion */
53 int nbytes; /* size of output */
54 int (*decode)(const char *, char *, size_t);
63 dst = buf; /* point it somewhere valid */
68 return "input too short to be valid";
70 return "input does not begin with format prefix";
85 return "unknown format prefix";
107 return "unknown base";
113 while (srclen >= ingroup) {
114 nbytes = (*decode)(src, buf, sizeof(buf));
120 return badch(src, nbytes, errp, errlen);
123 return "internal buffer too short (\"can't happen\")";
126 return "bad (non-zero) padding at end of base64 input";
130 return "unknown internal error";
131 for (i = 0; i < nbytes; i++) {
138 if (underscoreok && srclen > 1 && *src == '_') {
139 /* srclen > 1 means not last character */
145 return "input ends in mid-byte, perhaps truncated";
147 return "no data bytes specified by input";
154 - ttodata - convert text to data
156 const char * /* NULL on success, else literal */
157 ttodata(src, srclen, base, dst, dstlen, lenp)
159 size_t srclen; /* 0 means apply strlen() */
160 int base; /* 0 means figure it out */
161 char *dst; /* need not be valid if dstlen is 0 */
163 size_t *lenp; /* where to record length (NULL is nowhere) */
165 return ttodatav(src, srclen, base, dst, dstlen, lenp, (char *)NULL,
170 - atodata - convert ASCII to data
171 * backward-compatibility interface
173 size_t /* 0 for failure, true length for success */
174 atodata(src, srclen, dst, dstlen)
183 err = ttodata(src, srclen, 0, dst, dstlen, &len);
190 - atobytes - convert ASCII to data bytes
191 * another backward-compatibility interface
194 atobytes(src, srclen, dst, dstlen, lenp)
201 return ttodata(src, srclen, 0, dst, dstlen, lenp);
205 - unhex - convert two ASCII hex digits to byte
207 static int /* number of result bytes, or error code */
208 unhex(src, dst, dstlen)
209 const char *src; /* known to be full length */
211 size_t dstlen; /* not large enough is a failure */
215 static char hex[] = "0123456789abcdef";
220 p = strchr(hex, *src);
222 p = strchr(hex, tolower(*src));
225 byte = (p - hex) << 4;
228 p = strchr(hex, *src);
230 p = strchr(hex, tolower(*src));
240 - unb64 - convert four ASCII base64 digits to three bytes
241 * Note that a base64 digit group is padded out with '=' if it represents
242 * less than three bytes: one byte is dd==, two is ddd=, three is dddd.
244 static int /* number of result bytes, or error code */
245 unb64(src, dst, dstlen)
246 const char *src; /* known to be full length */
253 static char base64[] =
254 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
259 p = strchr(base64, *src++);
262 byte1 = (p - base64) << 2; /* first six bits */
264 p = strchr(base64, *src++);
267 byte2 = p - base64; /* next six: two plus four */
268 *dst++ = byte1 | (byte2 >> 4);
269 byte1 = (byte2 & 0xf) << 4;
271 p = strchr(base64, *src++);
273 if (*(src-1) == '=' && *src == '=') {
274 if (byte1 != 0) /* bad padding */
280 byte2 = p - base64; /* next six: four plus two */
281 *dst++ = byte1 | (byte2 >> 2);
282 byte1 = (byte2 & 0x3) << 6;
284 p = strchr(base64, *src++);
286 if (*(src-1) == '=') {
287 if (byte1 != 0) /* bad padding */
293 byte2 = p - base64; /* last six */
294 *dst++ = byte1 | byte2;
299 - untext - convert one ASCII character to byte
301 static int /* number of result bytes, or error code */
302 untext(src, dst, dstlen)
303 const char *src; /* known to be full length */
305 size_t dstlen; /* not large enough is a failure */
315 - badch - produce a nice complaint about an unknown character
317 * If the compiler complains that the array bigenough[] has a negative
318 * size, that means the TTODATAV_BUF constant has been set too small.
320 static const char * /* literal or errp */
321 badch(src, errcode, errp, errlen)
324 char *errp; /* might be NULL */
327 static const char pre[] = "unknown character (`";
328 static const char suf[] = "') in input";
330 # define REQD (sizeof(pre) - 1 + sizeof(buf) - 1 + sizeof(suf))
332 char bigenough[TTODATAV_BUF - REQD]; /* see above */
336 if (errp == NULL || errlen < REQD)
337 return "unknown character in input";
339 ch = *(src + BADOFF(errcode));
345 buf[1] = ((ch & 0700) >> 6) + '0';
346 buf[2] = ((ch & 0070) >> 3) + '0';
347 buf[3] = ((ch & 0007) >> 0) + '0';
352 return (const char *)errp;
365 - main - convert first argument to hex, or run regression
383 fprintf(stderr, "Usage: %s {0x<hex>|0s<base64>|-r}\n", pgm);
387 if (strcmp(argv[1], "-r") == 0) {
388 regress(pgm); /* should not return */
389 fprintf(stderr, "%s: regress() returned?!?\n", pgm);
393 oops = ttodatav(argv[1], 0, 0, buf, sizeof(buf), &n, err, sizeof(err));
395 fprintf(stderr, "%s: ttodata error `%s' in `%s'\n", pgm,
400 if (n > sizeof(buf)) {
401 p = (char *)malloc((size_t)n);
404 "%s: unable to malloc %d bytes for result\n",
408 oops = ttodata(argv[1], 0, 0, p, n, &n);
410 fprintf(stderr, "%s: error `%s' in ttodata retry?!?\n",
416 hexout(p, n, stdout);
419 i = datatot(buf, n, 'h', buf2, sizeof(buf2));
421 fprintf(stderr, "%s: datatot reports error in `%s'\n", pgm,
426 if (i > sizeof(buf2)) {
427 p2 = (char *)malloc((size_t)i);
430 "%s: unable to malloc %d bytes for result\n",
434 i = datatot(buf, n, 'h', p2, i);
436 fprintf(stderr, "%s: error in datatoa retry?!?\n", pgm);
447 - hexout - output an arbitrary-length string in hex
458 for (i = 0; i < len; i++)
459 fprintf(f, "%02x", (unsigned char)s[i]);
464 char *ascii; /* NULL for end */
465 char *data; /* NULL for error expected */
473 0, "0xabcd", "\xab\xcd",
474 0, "0x0123456789", "\x01\x23\x45\x67\x89",
476 0, "0xabcdef", "\xab\xcd\xef",
477 0, "0xABCDEF", "\xab\xcd\xef",
478 0, "0XaBc0eEd81f", "\xab\xc0\xee\xd8\x1f",
479 0, "0XaBc0_eEd8", "\xab\xc0\xee\xd8",
483 16, "aBc0_eEd8", "\xab\xc0\xee\xd8",
488 0, "0sDCBA", "\x0c\x20\x40",
489 0, "0SDCBA", "\x0c\x20\x40",
492 0, "0sDCA=", "\x0c\x20",
494 0, "0sDCAZ", "\x0c\x20\x19",
495 0, "0sDCAa", "\x0c\x20\x1a",
496 0, "0sDCAz", "\x0c\x20\x33",
497 0, "0sDCA0", "\x0c\x20\x34",
498 0, "0sDCA9", "\x0c\x20\x3d",
499 0, "0sDCA+", "\x0c\x20\x3e",
500 0, "0sDCA/", "\x0c\x20\x3f",
501 0, "0sAbraCadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe",
502 64, "AbraCadabra+", "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe",
504 0, "0tabc_xyz", "abc_xyz",
505 256, "abc_xyz", "abc_xyz",
510 char *data; /* input; NULL for end */
512 int buflen; /* -1 means big buffer */
513 int outlen; /* -1 means strlen(ascii)+1 */
514 char *ascii; /* NULL for error expected */
516 "", 'x', -1, -1, NULL,
517 "", 'X', -1, -1, NULL,
518 "", 'n', -1, -1, NULL,
519 "0", 'x', -1, -1, "0x30",
520 "0", 'x', 0, 5, "---",
523 "0", 'x', 3, 5, "0x",
524 "0", 'x', 4, 5, "0x3",
525 "0", 'x', 5, 5, "0x30",
526 "0", 'x', 6, 5, "0x30",
527 "\xab\xcd", 'x', -1, -1, "0xabcd",
528 "\x01\x23\x45\x67\x89", 'x', -1, -1, "0x0123456789",
529 "\xab\xcd\xef", 'x', -1, -1, "0xabcdef",
530 "\xab\xc0\xee\xd8\x1f", 'x', -1, -1, "0xabc0eed81f",
531 "\x01\x02", 'h', -1, -1, "0x0102",
532 "\x01\x02\x03\x04\x05\x06", 'h', -1, -1, "0x01020304_0506",
533 "\xab\xc0\xee\xd8\x1f", 16, -1, -1, "abc0eed81f",
534 "\x0c\x20\x40", 's', -1, -1, "0sDCBA",
535 "\x0c\x20\x40", 's', 0, 7, "---",
536 "\x0c\x20\x40", 's', 1, 7, "",
537 "\x0c\x20\x40", 's', 2, 7, "0",
538 "\x0c\x20\x40", 's', 3, 7, "0s",
539 "\x0c\x20\x40", 's', 4, 7, "0sD",
540 "\x0c\x20\x40", 's', 5, 7, "0sDC",
541 "\x0c\x20\x40", 's', 6, 7, "0sDCB",
542 "\x0c\x20\x40", 's', 7, 7, "0sDCBA",
543 "\x0c\x20\x40", 's', 8, 7, "0sDCBA",
544 "\x0c", 's', -1, -1, "0sDA==",
545 "\x0c\x20", 's', -1, -1, "0sDCA=",
546 "\x0c\x20\x19", 's', -1, -1, "0sDCAZ",
547 "\x0c\x20\x1a", 's', -1, -1, "0sDCAa",
548 "\x0c\x20\x33", 's', -1, -1, "0sDCAz",
549 "\x0c\x20\x34", 's', -1, -1, "0sDCA0",
550 "\x0c\x20\x3d", 's', -1, -1, "0sDCA9",
551 "\x0c\x20\x3e", 's', -1, -1, "0sDCA+",
552 "\x0c\x20\x3f", 's', -1, -1, "0sDCA/",
553 "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", 's', -1, -1, "0sAbraCadabra+",
554 "\x01\xba\xda\x09\xa7\x5a\x6e\xb6\xbe", 64, -1, -1, "AbraCadabra+",
555 NULL, 'x', -1, -1, NULL,
559 - regress - regression-test ttodata() and datatot()
561 void /* should not return at all, in fact */
573 for (r = atodatatab; r->ascii != NULL; r++) {
574 oops = ttodata(r->ascii, 0, r->base, buf, sizeof(buf), &n);
575 if (oops != NULL && r->data == NULL)
576 {} /* error expected */
577 else if (oops != NULL) {
578 printf("`%s' gave error `%s', expecting %d `", r->ascii,
579 oops, strlen(r->data));
580 hexout(r->data, strlen(r->data), stdout);
583 } else if (r->data == NULL) {
584 printf("`%s' gave %d `", r->ascii, n);
585 hexout(buf, n, stdout);
586 printf("', expecting error\n");
588 } else if (n != strlen(r->data)) {
589 printf("length wrong in `%s': got %d `", r->ascii, n);
590 hexout(buf, n, stdout);
591 printf("', expecting %d `", strlen(r->data));
592 hexout(r->data, strlen(r->data), stdout);
595 } else if (memcmp(buf, r->data, n) != 0) {
596 printf("`%s' gave %d `", r->ascii, n);
597 hexout(buf, n, stdout);
598 printf("', expecting %d `", strlen(r->data));
599 hexout(r->data, strlen(r->data), stdout);
605 for (dr = datatoatab; dr->data != NULL; dr++) {
607 n = datatot(dr->data, strlen(dr->data), dr->format, buf,
608 (dr->buflen == -1) ? sizeof(buf) : dr->buflen);
609 should = (dr->ascii == NULL) ? 0 : strlen(dr->ascii) + 1;
610 if (dr->outlen != -1)
612 if (n == 0 && dr->ascii == NULL)
613 {} /* error expected */
616 hexout(dr->data, strlen(dr->data), stdout);
617 printf("' %c gave error, expecting %d `%s'\n",
618 dr->format, should, dr->ascii);
620 } else if (dr->ascii == NULL) {
622 hexout(dr->data, strlen(dr->data), stdout);
623 printf("' %c gave %d `%.*s', expecting error\n",
624 dr->format, n, n, buf);
626 } else if (n != should) {
627 printf("length wrong in `");
628 hexout(dr->data, strlen(dr->data), stdout);
629 printf("': got %d `%s'", n, buf);
630 printf(", expecting %d `%s'\n", should, dr->ascii);
632 } else if (strcmp(buf, dr->ascii) != 0) {
634 hexout(dr->data, strlen(dr->data), stdout);
635 printf("' gave %d `%s'", n, buf);
636 printf(", expecting %d `%s'\n", should, dr->ascii);
644 #endif /* TTODATA_MAIN */