punycode.h

   1 /*\r
   2 punycode.c from RFC 3492\r
   3 http://www.nicemice.net/idn/\r
   4 Adam M. Costello\r
   5 http://www.nicemice.net/amc/\r
   6 \r
   7 This is ANSI C code (C89) implementing Punycode (RFC 3492).\r
   8 \r
   9 */\r
  10 \r
  11 \r
  12 /************************************************************/\r
  13 /* Public interface (would normally go in its own .h file): */\r
  14 \r
  15 #include <limits.h>\r
  16 \r
  17 enum punycode_status {\r
  18   punycode_success,\r
  19   punycode_bad_input,   /* Input is invalid.                       */\r
  20   punycode_big_output,  /* Output would exceed the space provided. */\r
  21   punycode_overflow     /* Input needs wider integers to process.  */\r
  22 };\r
  23 \r
  24 #if UINT_MAX >= (1 << 26) - 1\r
  25 typedef unsigned int punycode_uint;\r
  26 #else\r
  27 typedef unsigned long punycode_uint;\r
  28 #endif\r
  29 \r
  30 enum punycode_status punycode_encode(\r
  31   punycode_uint input_length,\r
  32   const punycode_uint input[],\r
  33   const unsigned char case_flags[],\r
  34   punycode_uint *output_length,\r
  35   char output[] );\r
  36 \r
  37     /* punycode_encode() converts Unicode to Punycode.  The input     */\r
  38     /* is represented as an array of Unicode code points (not code    */\r
  39     /* units; surrogate pairs are not allowed), and the output        */\r
  40     /* will be represented as an array of ASCII code points.  The     */\r
  41     /* output string is *not* null-terminated; it will contain        */\r
  42     /* zeros if and only if the input contains zeros.  (Of course     */\r
  43     /* the caller can leave room for a terminator and add one if      */\r
  44     /* needed.)  The input_length is the number of code points in     */\r
  45     /* the input.  The output_length is an in/out argument: the       */\r
  46     /* caller passes in the maximum number of code points that it     */\r
  47     /* can receive, and on successful return it will contain the      */\r
  48     /* number of code points actually output.  The case_flags array   */\r
  49     /* holds input_length boolean values, where nonzero suggests that */\r
  50     /* the corresponding Unicode character be forced to uppercase     */\r
  51     /* after being decoded (if possible), and zero suggests that      */\r
  52     /* it be forced to lowercase (if possible).  ASCII code points    */\r
  53     /* are encoded literally, except that ASCII letters are forced    */\r
  54     /* to uppercase or lowercase according to the corresponding       */\r
  55     /* uppercase flags.  If case_flags is a null pointer then ASCII   */\r
  56     /* letters are left as they are, and other code points are        */\r
  57     /* treated as if their uppercase flags were zero.  The return     */\r
  58     /* value can be any of the punycode_status values defined above   */\r
  59     /* except punycode_bad_input; if not punycode_success, then       */\r
  60     /* output_size and output might contain garbage.                  */\r
  61 \r
  62 enum punycode_status punycode_decode(\r
  63   punycode_uint input_length,\r
  64   const char input[],\r
  65   punycode_uint *output_length,\r
  66   punycode_uint output[],\r
  67   unsigned char case_flags[] );\r
  68 \r
  69     /* punycode_decode() converts Punycode to Unicode.  The input is  */\r
  70     /* represented as an array of ASCII code points, and the output   */\r
  71     /* will be represented as an array of Unicode code points.  The   */\r
  72     /* input_length is the number of code points in the input.  The   */\r
  73     /* output_length is an in/out argument: the caller passes in      */\r
  74     /* the maximum number of code points that it can receive, and     */\r
  75     /* on successful return it will contain the actual number of      */\r
  76     /* code points output.  The case_flags array needs room for at    */\r
  77     /* least output_length values, or it can be a null pointer if the */\r
  78     /* case information is not needed.  A nonzero flag suggests that  */\r
  79     /* the corresponding Unicode character be forced to uppercase     */\r
  80     /* by the caller (if possible), while zero suggests that it be    */\r
  81     /* forced to lowercase (if possible).  ASCII code points are      */\r
  82     /* output already in the proper case, but their flags will be set */\r
  83     /* appropriately so that applying the flags would be harmless.    */\r
  84     /* The return value can be any of the punycode_status values      */\r
  85     /* defined above; if not punycode_success, then output_length,    */\r
  86     /* output, and case_flags might contain garbage.  On success, the */\r
  87     /* decoder will never need to write an output_length greater than */\r
  88     /* input_length, because of how the encoding is defined.          */\r
  89 \r