2 punycode.c from RFC 3492
\r
3 http://www.nicemice.net/idn/
\r
5 http://www.nicemice.net/amc/
\r
7 This is ANSI C code (C89) implementing Punycode (RFC 3492).
\r
12 /************************************************************/
\r
13 /* Public interface (would normally go in its own .h file): */
\r
17 enum punycode_status {
\r
19 punycode_bad_input, /* Input is invalid. */
\r
20 punycode_big_output, /* Output would exceed the space provided. */
\r
21 punycode_overflow /* Input needs wider integers to process. */
\r
24 #if UINT_MAX >= (1 << 26) - 1
\r
25 typedef unsigned int punycode_uint;
\r
27 typedef unsigned long punycode_uint;
\r
30 enum punycode_status punycode_encode(
\r
31 punycode_uint input_length,
\r
32 const punycode_uint input[],
\r
33 const unsigned char case_flags[],
\r
34 punycode_uint *output_length,
\r
37 /* punycode_encode() converts Unicode to Punycode. The input */
\r
38 /* is represented as an array of Unicode code points (not code */
\r
39 /* units; surrogate pairs are not allowed), and the output */
\r
40 /* will be represented as an array of ASCII code points. The */
\r
41 /* output string is *not* null-terminated; it will contain */
\r
42 /* zeros if and only if the input contains zeros. (Of course */
\r
43 /* the caller can leave room for a terminator and add one if */
\r
44 /* needed.) The input_length is the number of code points in */
\r
45 /* the input. The output_length is an in/out argument: the */
\r
46 /* caller passes in the maximum number of code points that it */
\r
47 /* can receive, and on successful return it will contain the */
\r
48 /* number of code points actually output. The case_flags array */
\r
49 /* holds input_length boolean values, where nonzero suggests that */
\r
50 /* the corresponding Unicode character be forced to uppercase */
\r
51 /* after being decoded (if possible), and zero suggests that */
\r
52 /* it be forced to lowercase (if possible). ASCII code points */
\r
53 /* are encoded literally, except that ASCII letters are forced */
\r
54 /* to uppercase or lowercase according to the corresponding */
\r
55 /* uppercase flags. If case_flags is a null pointer then ASCII */
\r
56 /* letters are left as they are, and other code points are */
\r
57 /* treated as if their uppercase flags were zero. The return */
\r
58 /* value can be any of the punycode_status values defined above */
\r
59 /* except punycode_bad_input; if not punycode_success, then */
\r
60 /* output_size and output might contain garbage. */
\r
62 enum punycode_status punycode_decode(
\r
63 punycode_uint input_length,
\r
65 punycode_uint *output_length,
\r
66 punycode_uint output[],
\r
67 unsigned char case_flags[] );
\r
69 /* punycode_decode() converts Punycode to Unicode. The input is */
\r
70 /* represented as an array of ASCII code points, and the output */
\r
71 /* will be represented as an array of Unicode code points. The */
\r
72 /* input_length is the number of code points in the input. The */
\r
73 /* output_length is an in/out argument: the caller passes in */
\r
74 /* the maximum number of code points that it can receive, and */
\r
75 /* on successful return it will contain the actual number of */
\r
76 /* code points output. The case_flags array needs room for at */
\r
77 /* least output_length values, or it can be a null pointer if the */
\r
78 /* case information is not needed. A nonzero flag suggests that */
\r
79 /* the corresponding Unicode character be forced to uppercase */
\r
80 /* by the caller (if possible), while zero suggests that it be */
\r
81 /* forced to lowercase (if possible). ASCII code points are */
\r
82 /* output already in the proper case, but their flags will be set */
\r
83 /* appropriately so that applying the flags would be harmless. */
\r
84 /* The return value can be any of the punycode_status values */
\r
85 /* defined above; if not punycode_success, then output_length, */
\r
86 /* output, and case_flags might contain garbage. On success, the */
\r
87 /* decoder will never need to write an output_length greater than */
\r
88 /* input_length, because of how the encoding is defined. */
\r