src/backend/parser/scansup.c

   1 /*-------------------------------------------------------------------------
   2  *
   3  * scansup.c
   4  *        support routines for the lex/flex scanner, used by both the normal
   5  * backend as well as the bootstrap backend
   6  *
   7  * Portions Copyright (c) 1996-2004, PostgreSQL Global Development Group
   8  * Portions Copyright (c) 1994, Regents of the University of California
   9  *
  10  *
  11  * IDENTIFICATION
  12  *        $PostgreSQL: pgsql/src/backend/parser/scansup.c,v 1.27 2004/08/29 04:12:42 momjian Exp $
  13  *
  14  *-------------------------------------------------------------------------
  15  */
  16 #include "postgres.h"
  17
  18 #include <ctype.h>
  19
  20 #include "miscadmin.h"
  21 #include "parser/scansup.h"
  22 #include "mb/pg_wchar.h"
  23
  24
  25 /* ----------------
  26  *              scanstr
  27  *
  28  * if the string passed in has escaped codes, map the escape codes to actual
  29  * chars
  30  *
  31  * the string returned is palloc'd and should eventually be pfree'd by the
  32  * caller!
  33  * ----------------
  34  */
  35
  36 char *
  37 scanstr(const char *s)
  38 {
  39         char       *newStr;
  40         int                     len,
  41                                 i,
  42                                 j;
  43
  44         if (s == NULL || s[0] == '\0')
  45                 return pstrdup("");
  46
  47         len = strlen(s);
  48
  49         newStr = palloc(len + 1);       /* string cannot get longer */
  50
  51         for (i = 0, j = 0; i < len; i++)
  52         {
  53                 if (s[i] == '\'')
  54                 {
  55                         /*
  56                          * Note: if scanner is working right, unescaped quotes can
  57                          * only appear in pairs, so there should be another character.
  58                          */
  59                         i++;
  60                         newStr[j] = s[i];
  61                 }
  62                 else if (s[i] == '\\')
  63                 {
  64                         i++;
  65                         switch (s[i])
  66                         {
  67                                 case 'b':
  68                                         newStr[j] = '\b';
  69                                         break;
  70                                 case 'f':
  71                                         newStr[j] = '\f';
  72                                         break;
  73                                 case 'n':
  74                                         newStr[j] = '\n';
  75                                         break;
  76                                 case 'r':
  77                                         newStr[j] = '\r';
  78                                         break;
  79                                 case 't':
  80                                         newStr[j] = '\t';
  81                                         break;
  82                                 case '0':
  83                                 case '1':
  84                                 case '2':
  85                                 case '3':
  86                                 case '4':
  87                                 case '5':
  88                                 case '6':
  89                                 case '7':
  90                                         {
  91                                                 int                     k;
  92                                                 long            octVal = 0;
  93
  94                                                 for (k = 0;
  95                                                          s[i + k] >= '0' && s[i + k] <= '7' && k < 3;
  96                                                          k++)
  97                                                         octVal = (octVal << 3) + (s[i + k] - '0');
  98                                                 i += k - 1;
  99                                                 newStr[j] = ((char) octVal);
 100                                         }
 101                                         break;
 102                                 default:
 103                                         newStr[j] = s[i];
 104                                         break;
 105                         }                                       /* switch */
 106                 }                                               /* s[i] == '\\' */
 107                 else
 108                         newStr[j] = s[i];
 109                 j++;
 110         }
 111         newStr[j] = '\0';
 112         return newStr;
 113 }
 114
 115
 116 /*
 117  * downcase_truncate_identifier() --- do appropriate downcasing and
 118  * truncation of an unquoted identifier.  Optionally warn of truncation.
 119  *
 120  * Returns a palloc'd string containing the adjusted identifier.
 121  *
 122  * Note: in some usages the passed string is not null-terminated.
 123  *
 124  * Note: the API of this function is designed to allow for downcasing
 125  * transformations that increase the string length, but we don't yet
 126  * support that.  If you want to implement it, you'll need to fix
 127  * SplitIdentifierString() in utils/adt/varlena.c.
 128  */
 129 char *
 130 downcase_truncate_identifier(const char *ident, int len, bool warn)
 131 {
 132         char       *result;
 133         int                     i;
 134
 135         result = palloc(len + 1);
 136         /*
 137          * SQL99 specifies Unicode-aware case normalization, which we don't yet
 138          * have the infrastructure for.  Instead we use tolower() to provide a
 139          * locale-aware translation.  However, there are some locales where this
 140          * is not right either (eg, Turkish may do strange things with 'i' and
 141          * 'I').  Our current compromise is to use tolower() for characters with
 142          * the high bit set, and use an ASCII-only downcasing for 7-bit
 143          * characters.
 144          */
 145         for (i = 0; i < len; i++)
 146         {
 147                 unsigned char   ch = (unsigned char) ident[i];
 148
 149                 if (ch >= 'A' && ch <= 'Z')
 150                         ch += 'a' - 'A';
 151                 else if (ch >= 0x80 && isupper(ch))
 152                         ch = tolower(ch);
 153                 result[i] = (char) ch;
 154         }
 155         result[i] = '\0';
 156
 157         if (i >= NAMEDATALEN)
 158                 truncate_identifier(result, i, warn);
 159
 160         return result;
 161 }
 162
 163 /*
 164  * truncate_identifier() --- truncate an identifier to NAMEDATALEN-1 bytes.
 165  *
 166  * The given string is modified in-place, if necessary.  A warning is
 167  * issued if requested.
 168  *
 169  * We require the caller to pass in the string length since this saves a
 170  * strlen() call in some common usages.
 171  */
 172 void
 173 truncate_identifier(char *ident, int len, bool warn)
 174 {
 175         if (len >= NAMEDATALEN)
 176         {
 177                 len = pg_mbcliplen(ident, len, NAMEDATALEN-1);
 178                 if (warn)
 179                         ereport(NOTICE,
 180                                         (errcode(ERRCODE_NAME_TOO_LONG),
 181                                          errmsg("identifier \"%s\" will be truncated to \"%.*s\"",
 182                                                         ident, len, ident)));
 183                 ident[len] = '\0';
 184         }
 185 }