Merge duplicate upper/lower/initcap() routines in oracle_compat.c and

author Bruce Momjian <bruce@momjian.us>

Mon, 23 Jun 2008 19:27:19 +0000 (19:27 +0000)

committer Bruce Momjian <bruce@momjian.us>

Mon, 23 Jun 2008 19:27:19 +0000 (19:27 +0000)
author Bruce Momjian <bruce@momjian.us>
Mon, 23 Jun 2008 19:27:19 +0000 (19:27 +0000)
committer Bruce Momjian <bruce@momjian.us>
Mon, 23 Jun 2008 19:27:19 +0000 (19:27 +0000)
diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c

index 4e6ad07..c435d4b 100644 (file)
--- a/src/backend/utils/adt/formatting.c
+++ b/src/backend/utils/adt/formatting.c
@@ -1,7 +1,7 @@
  /* -----------------------------------------------------------------------
   * formatting.c
   *
- * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.142 2008/06/17 16:09:06 momjian Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/adt/formatting.c,v 1.143 2008/06/23 19:27:19 momjian Exp $
   *
   *
   *      Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group
@@ -925,9 +925,6 @@ static char *get_th(char *num, int type);
  static char *str_numth(char *dest, char *num, int type);
  static int     strspace_len(char *str);
  static int     strdigits_len(char *str);
-static char *str_toupper(char *buff);
-static char *str_tolower(char *buff);
-static char *str_initcap(char *buff);
  
  static int     seq_search(char *name, char **array, int type, int max, int *len);
  static void do_to_timestamp(text *date_txt, text *fmt,
@@ -1424,12 +1421,24 @@ str_numth(char *dest, char *num, int type)
         return dest;
  }
  
+/*
+ * If the system provides the needed functions for wide-character manipulation
+ * (which are all standardized by C99), then we implement upper/lower/initcap
+ * using wide-character functions, if necessary.  Otherwise we use the
+ * traditional <ctype.h> functions, which of course will not work as desired
+ * in multibyte character sets.  Note that in either case we are effectively
+ * assuming that the database character encoding matches the encoding implied
+ * by LC_CTYPE.
+ */
+
  /* ----------
- * Convert string to upper case. It is designed to be multibyte-aware.
+ * wide-character-aware lower function
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function.
   * ----------
   */
-static char *
-str_toupper(char *buff)
+char *
+str_tolower(char *buff, size_t nbytes)
  {
         char            *result;
  
@@ -1438,27 +1447,46 @@ str_toupper(char *buff)
  
  #ifdef USE_WIDE_UPPER_LOWER
         if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-               result = wstring_upper(buff);
+       {
+               wchar_t         *workspace;
+               int                     curr_char = 0;
+
+               /* Output workspace cannot have more codes than input bytes */
+               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+
+               char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
+
+               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+                       workspace[curr_char] = towlower(workspace[curr_char]);
+
+               /* Make result large enough; case change might change number of bytes */
+               result = palloc(curr_char * MB_CUR_MAX + 1);
+
+               wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
+               pfree(workspace);
+       }
         else
  #endif         /* USE_WIDE_UPPER_LOWER */
         {
                 char *p;
  
-               result = pstrdup(buff);
+               result = pnstrdup(buff, nbytes);
  
                 for (p = result; *p; p++)
-                       *p = pg_toupper((unsigned char) *p);
+                       *p = pg_tolower((unsigned char) *p);
         }
  
         return result;
  }
  
  /* ----------
- * Convert string to lower case. It is designed to be multibyte-aware.
+ * wide-character-aware upper function
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function.
   * ----------
   */
-static char *
-str_tolower(char *buff)
+char *
+str_toupper(char *buff, size_t nbytes)
  {
         char            *result;
  
@@ -1467,27 +1495,46 @@ str_tolower(char *buff)
  
  #ifdef USE_WIDE_UPPER_LOWER
         if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-               result = wstring_lower(buff);
+       {
+               wchar_t         *workspace;
+               int                     curr_char = 0;
+
+               /* Output workspace cannot have more codes than input bytes */
+               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
+
+               char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
+
+               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
+                       workspace[curr_char] = towupper(workspace[curr_char]);
+
+               /* Make result large enough; case change might change number of bytes */
+               result = palloc(curr_char * MB_CUR_MAX + 1);
+
+               wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
+               pfree(workspace);
+       }
         else
  #endif         /* USE_WIDE_UPPER_LOWER */
         {
                 char *p;
  
-               result = pstrdup(buff);
+               result = pnstrdup(buff, nbytes);
  
                 for (p = result; *p; p++)
-                       *p = pg_tolower((unsigned char) *p);
+                       *p = pg_toupper((unsigned char) *p);
         }
  
         return result;
  }
-  
+
  /* ----------
   * wide-character-aware initcap function
+ * We pass the number of bytes so we can pass varlena and char*
+ * to this function.
   * ----------
   */
-static char *
-str_initcap(char *buff)
+char *
+str_initcap(char *buff, size_t nbytes)
  {
         char            *result;
         bool            wasalnum = false;
@@ -1499,35 +1546,34 @@ str_initcap(char *buff)
         if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
         {
                 wchar_t         *workspace;
-               text            *in_text;
-               text            *out_text;
-               int                     i;
+               int                     curr_char = 0;
+
+               /* Output workspace cannot have more codes than input bytes */
+               workspace = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
  
-               in_text = cstring_to_text(buff);
-               workspace = texttowcs(in_text);
+               char2wchar(workspace, nbytes + 1, buff, nbytes + 1);
  
-               for (i = 0; workspace[i] != 0; i++)
+               for (curr_char = 0; workspace[curr_char] != 0; curr_char++)
                 {
                         if (wasalnum)
-                               workspace[i] = towlower(workspace[i]);
+                               workspace[curr_char] = towlower(workspace[curr_char]);
                         else
-                               workspace[i] = towupper(workspace[i]);
-                       wasalnum = iswalnum(workspace[i]);
+                               workspace[curr_char] = towupper(workspace[curr_char]);
+                       wasalnum = iswalnum(workspace[curr_char]);
                 }
  
-               out_text = wcstotext(workspace, i);
-               result = text_to_cstring(out_text);
+               /* Make result large enough; case change might change number of bytes */
+               result = palloc(curr_char * MB_CUR_MAX + 1);
  
+               wchar2char(result, workspace, curr_char * MB_CUR_MAX + 1);
                 pfree(workspace);
-               pfree(in_text);
-               pfree(out_text);
         }
         else
  #endif         /* USE_WIDE_UPPER_LOWER */
         {
                 char *p;
  
-               result = pstrdup(buff);
+               result = pnstrdup(buff, nbytes);
  
                 for (p = result; *p; p++)
                 {
@@ -1851,7 +1897,7 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                                 {
                                         char       *p = pstrdup(tmtcTzn(in));
  
-                                       strcpy(s, str_tolower(p));
+                                       strcpy(s, str_tolower(p, strlen(p)));
                                         pfree(p);
                                         s += strlen(s);
                                 }
@@ -1893,11 +1939,13 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                                 if (!tm->tm_mon)
                                         break;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1]));
+                                       strcpy(s, str_toupper(localized_full_months[tm->tm_mon - 1],
+                                                               strlen(localized_full_months[tm->tm_mon - 1])));
                                 else
                                 {
                                         strcpy(workbuff, months_full[tm->tm_mon - 1]);
-                                       sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
+                                       sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+                                                               str_toupper(workbuff, strlen(workbuff)));
                                 }
                                 s += strlen(s);
                                 break;
@@ -1906,7 +1954,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                                 if (!tm->tm_mon)
                                         break;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1]));
+                                       strcpy(s, str_initcap(localized_full_months[tm->tm_mon - 1],
+                                                               strlen(localized_full_months[tm->tm_mon - 1])));
                                 else
                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
                                 s += strlen(s);
@@ -1916,7 +1965,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                                 if (!tm->tm_mon)
                                         break;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1]));
+                                       strcpy(s, str_tolower(localized_full_months[tm->tm_mon - 1],
+                                                               strlen(localized_full_months[tm->tm_mon - 1])));
                                 else
                                 {
                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, months_full[tm->tm_mon - 1]);
@@ -1929,9 +1979,11 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                                 if (!tm->tm_mon)
                                         break;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1]));
+                                       strcpy(s, str_toupper(localized_abbrev_months[tm->tm_mon - 1],
+                                                               strlen(localized_abbrev_months[tm->tm_mon - 1])));
                                 else
-                                       strcpy(s, str_toupper(months[tm->tm_mon - 1]));
+                                       strcpy(s, str_toupper(months[tm->tm_mon - 1],
+                                                               strlen(months[tm->tm_mon - 1])));
                                 s += strlen(s);
                                 break;
                         case DCH_Mon:
@@ -1939,7 +1991,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                                 if (!tm->tm_mon)
                                         break;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1]));
+                                       strcpy(s, str_initcap(localized_abbrev_months[tm->tm_mon - 1],
+                                                               strlen(localized_abbrev_months[tm->tm_mon - 1])));
                                 else
                                         strcpy(s, months[tm->tm_mon - 1]);
                                 s += strlen(s);
@@ -1949,7 +2002,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                                 if (!tm->tm_mon)
                                         break;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1]));
+                                       strcpy(s, str_tolower(localized_abbrev_months[tm->tm_mon - 1],
+                                                               strlen(localized_abbrev_months[tm->tm_mon - 1])));
                                 else
                                 {
                                         strcpy(s, months[tm->tm_mon - 1]);
@@ -1966,18 +2020,21 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                         case DCH_DAY:
                                 INVALID_FOR_INTERVAL;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_toupper(localized_full_days[tm->tm_wday]));
+                                       strcpy(s, str_toupper(localized_full_days[tm->tm_wday],
+                                                               strlen(localized_full_days[tm->tm_wday])));
                                 else
                                 {
                                         strcpy(workbuff, days[tm->tm_wday]);
-                                       sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, str_toupper(workbuff));
+                                       sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9,
+                                                               str_toupper(workbuff, strlen(workbuff)));
                                 }
                                 s += strlen(s);
                                 break;
                         case DCH_Day:
                                 INVALID_FOR_INTERVAL;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_initcap(localized_full_days[tm->tm_wday]));
+                                       strcpy(s, str_initcap(localized_full_days[tm->tm_wday],
+                                                               strlen(localized_full_days[tm->tm_wday])));
                                 else
                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
                                 s += strlen(s);
@@ -1985,7 +2042,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                         case DCH_day:
                                 INVALID_FOR_INTERVAL;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_tolower(localized_full_days[tm->tm_wday]));
+                                       strcpy(s, str_tolower(localized_full_days[tm->tm_wday],
+                                                               strlen(localized_full_days[tm->tm_wday])));
                                 else
                                 {
                                         sprintf(s, "%*s", S_FM(n->suffix) ? 0 : -9, days[tm->tm_wday]);
@@ -1996,15 +2054,18 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                         case DCH_DY:
                                 INVALID_FOR_INTERVAL;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday]));
+                                       strcpy(s, str_toupper(localized_abbrev_days[tm->tm_wday],
+                                                               strlen(localized_abbrev_days[tm->tm_wday])));
                                 else
-                                       strcpy(s, str_toupper(days_short[tm->tm_wday]));
+                                       strcpy(s, str_toupper(days_short[tm->tm_wday],
+                                                               strlen(days_short[tm->tm_wday])));
                                 s += strlen(s);
                                 break;
                         case DCH_Dy:
                                 INVALID_FOR_INTERVAL;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday]));
+                                       strcpy(s, str_initcap(localized_abbrev_days[tm->tm_wday],
+                                                               strlen(localized_abbrev_days[tm->tm_wday])));
                                 else
                                         strcpy(s, days_short[tm->tm_wday]);
                                 s += strlen(s);
@@ -2012,7 +2073,8 @@ DCH_to_char(FormatNode *node, bool is_interval, TmToChar *in, char *out)
                         case DCH_dy:
                                 INVALID_FOR_INTERVAL;
                                 if (S_TM(n->suffix))
-                                       strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday]));
+                                       strcpy(s, str_tolower(localized_abbrev_days[tm->tm_wday],
+                                                               strlen(localized_abbrev_days[tm->tm_wday])));
                                 else
                                 {
                                         strcpy(s, days_short[tm->tm_wday]);
@@ -4277,12 +4339,14 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, char *number,
                                 case NUM_rn:
                                         if (IS_FILLMODE(Np->Num))
                                         {
-                                               strcpy(Np->inout_p, str_tolower(Np->number_p));
+                                               strcpy(Np->inout_p, str_tolower(Np->number_p,
+                                                               strlen(Np->number_p)));
                                                 Np->inout_p += strlen(Np->inout_p) - 1;
                                         }
                                         else
                                         {
-                                               sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p));
+                                               sprintf(Np->inout_p, "%15s", str_tolower(Np->number_p,
+                                                               strlen(Np->number_p)));
                                                 Np->inout_p += strlen(Np->inout_p) - 1;
                                         }
                                         break;
diff --git a/src/backend/utils/adt/oracle_compat.c b/src/backend/utils/adt/oracle_compat.c

index 372ff83..bf29e7d 100644 (file)
--- a/src/backend/utils/adt/oracle_compat.c
+++ b/src/backend/utils/adt/oracle_compat.c
@@ -9,7 +9,7 @@
   *
   *
   * IDENTIFICATION
- *     $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.80 2008/06/17 16:09:06 momjian Exp $
+ *     $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.81 2008/06/23 19:27:19 momjian Exp $
   *
   *-------------------------------------------------------------------------
   */
@@ -29,292 +29,16 @@
  #endif
  
  #include "utils/builtins.h"
+#include "utils/formatting.h"
  #include "utils/pg_locale.h"
  #include "mb/pg_wchar.h"
  
  
-/*
- * If the system provides the needed functions for wide-character manipulation
- * (which are all standardized by C99), then we implement upper/lower/initcap
- * using wide-character functions.     Otherwise we use the traditional <ctype.h>
- * functions, which of course will not work as desired in multibyte character
- * sets.  Note that in either case we are effectively assuming that the
- * database character encoding matches the encoding implied by LC_CTYPE.
- */
-#ifdef USE_WIDE_UPPER_LOWER
-char      *wstring_lower(char *str);
-char      *wstring_upper(char *str);
-wchar_t           *texttowcs(const text *txt);
-text      *wcstotext(const wchar_t *str, int ncodes);
-#endif
-
  static text *dotrim(const char *string, int stringlen,
            const char *set, int setlen,
            bool doltrim, bool dortrim);
  
  
-#ifdef USE_WIDE_UPPER_LOWER
-
-/*
- * Convert a TEXT value into a palloc'd wchar string.
- */
-wchar_t *
-texttowcs(const text *txt)
-{
-       int                     nbytes = VARSIZE_ANY_EXHDR(txt);
-       char       *workstr;
-       wchar_t    *result;
-       size_t          ncodes;
-
-       /* Overflow paranoia */
-       if (nbytes < 0 ||
-               nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OUT_OF_MEMORY),
-                                errmsg("out of memory")));
-
-       /* Need a null-terminated version of the input */
-       workstr = text_to_cstring(txt);
-
-       /* Output workspace cannot have more codes than input bytes */
-       result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-       /* Do the conversion */
-       ncodes = mbstowcs(result, workstr, nbytes + 1);
-
-       if (ncodes == (size_t) -1)
-       {
-               /*
-                * Invalid multibyte character encountered.  We try to give a useful
-                * error message by letting pg_verifymbstr check the string.  But it's
-                * possible that the string is OK to us, and not OK to mbstowcs ---
-                * this suggests that the LC_CTYPE locale is different from the
-                * database encoding.  Give a generic error message if verifymbstr
-                * can't find anything wrong.
-                */
-               pg_verifymbstr(workstr, nbytes, false);
-               ereport(ERROR,
-                               (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                                errmsg("invalid multibyte character for locale"),
-                                errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-       }
-
-       Assert(ncodes <= (size_t) nbytes);
-
-       return result;
-}
-
-
-/*
- * Convert a wchar string into a palloc'd TEXT value.  The wchar string
- * must be zero-terminated, but we also require the caller to pass the string
- * length, since it will know it anyway in current uses.
- */
-text *
-wcstotext(const wchar_t *str, int ncodes)
-{
-       text       *result;
-       size_t          nbytes;
-
-       /* Overflow paranoia */
-       if (ncodes < 0 ||
-               ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OUT_OF_MEMORY),
-                                errmsg("out of memory")));
-
-       /* Make workspace certainly large enough for result */
-       result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
-
-       /* Do the conversion */
-       nbytes = wcstombs((char *) VARDATA(result), str,
-                                         (ncodes + 1) * MB_CUR_MAX);
-
-       if (nbytes == (size_t) -1)
-       {
-               /* Invalid multibyte character encountered ... shouldn't happen */
-               ereport(ERROR,
-                               (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                                errmsg("invalid multibyte character for locale")));
-       }
-
-       Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
-
-       SET_VARSIZE(result, nbytes + VARHDRSZ);
-
-       return result;
-}
-#endif   /* USE_WIDE_UPPER_LOWER */
-
-
-/*
- * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding.
- * To make use of the upper/lower functionality, we need to map UTF8 to
- * UTF16, which for some reason mbstowcs and wcstombs won't do for us.
- * This conversion layer takes care of it.
- */
-
-#ifdef WIN32
-
-/* texttowcs for the case of UTF8 to UTF16 */
-static wchar_t *
-win32_utf8_texttowcs(const text *txt)
-{
-       int                     nbytes = VARSIZE_ANY_EXHDR(txt);
-       wchar_t    *result;
-       int                     r;
-
-       /* Overflow paranoia */
-       if (nbytes < 0 ||
-               nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
-               ereport(ERROR,
-                               (errcode(ERRCODE_OUT_OF_MEMORY),
-                                errmsg("out of memory")));
-
-       /* Output workspace cannot have more codes than input bytes */
-       result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
-
-       /* stupid Microsloth API does not work for zero-length input */
-       if (nbytes == 0)
-               r = 0;
-       else
-       {
-               /* Do the conversion */
-               r = MultiByteToWideChar(CP_UTF8, 0, VARDATA_ANY(txt), nbytes,
-                                                               result, nbytes);
-
-               if (r <= 0)                             /* assume it's NO_UNICODE_TRANSLATION */
-               {
-                       /* see notes above about error reporting */
-                       pg_verifymbstr(VARDATA_ANY(txt), nbytes, false);
-                       ereport(ERROR,
-                                       (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                                        errmsg("invalid multibyte character for locale"),
-                                        errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
-               }
-       }
-
-       /* Append trailing null wchar (MultiByteToWideChar won't have) */
-       Assert(r <= nbytes);
-       result[r] = 0;
-
-       return result;
-}
-
-/* wcstotext for the case of UTF16 to UTF8 */
-static text *
-win32_utf8_wcstotext(const wchar_t *str)
-{
-       text       *result;
-       int                     nbytes;
-       int                     r;
-
-       /* Compute size of output string (this *will* include trailing null) */
-       nbytes = WideCharToMultiByte(CP_UTF8, 0, str, -1, NULL, 0, NULL, NULL);
-       if (nbytes <= 0)                        /* shouldn't happen */
-               ereport(ERROR,
-                               (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                                errmsg("UTF-16 to UTF-8 translation failed: %lu",
-                                               GetLastError())));
-
-       result = palloc(nbytes + VARHDRSZ);
-
-       r = WideCharToMultiByte(CP_UTF8, 0, str, -1, VARDATA(result), nbytes,
-                                                       NULL, NULL);
-       if (r != nbytes)                        /* shouldn't happen */
-               ereport(ERROR,
-                               (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
-                                errmsg("UTF-16 to UTF-8 translation failed: %lu",
-                                               GetLastError())));
-
-       SET_VARSIZE(result, nbytes + VARHDRSZ - 1); /* -1 to ignore null */
-
-       return result;
-}
-
-/* interface layer to check which encoding is in use */
-
-static wchar_t *
-win32_texttowcs(const text *txt)
-{
-       if (GetDatabaseEncoding() == PG_UTF8)
-               return win32_utf8_texttowcs(txt);
-       else
-               return texttowcs(txt);
-}
-
-static text *
-win32_wcstotext(const wchar_t *str, int ncodes)
-{
-       if (GetDatabaseEncoding() == PG_UTF8)
-               return win32_utf8_wcstotext(str);
-       else
-               return wcstotext(str, ncodes);
-}
-
-/* use macros to cause routines below to call interface layer */
-
-#define texttowcs      win32_texttowcs
-#define wcstotext      win32_wcstotext
-#endif   /* WIN32 */
-
-#ifdef USE_WIDE_UPPER_LOWER
-/*
- * string_upper and string_lower are used for correct multibyte upper/lower
- * transformations localized strings. Returns pointers to transformated
- * string.
- */
-char *
-wstring_upper(char *str)
-{
-       wchar_t    *workspace;
-       text       *in_text;
-       text       *out_text;
-       char       *result;
-       int                     i;
-
-       in_text = cstring_to_text(str);
-       workspace = texttowcs(in_text);
-
-       for (i = 0; workspace[i] != 0; i++)
-               workspace[i] = towupper(workspace[i]);
-
-       out_text = wcstotext(workspace, i);
-       result = text_to_cstring(out_text);
-
-       pfree(workspace);
-       pfree(in_text);
-       pfree(out_text);
-
-       return result;
-}
-
-char *
-wstring_lower(char *str)
-{
-       wchar_t    *workspace;
-       text       *in_text;
-       text       *out_text;
-       char       *result;
-       int                     i;
-
-       in_text = cstring_to_text(str);
-       workspace = texttowcs(in_text);
-
-       for (i = 0; workspace[i] != 0; i++)
-               workspace[i] = towlower(workspace[i]);
-
-       out_text = wcstotext(workspace, i);
-       result = text_to_cstring(out_text);
-
-       pfree(workspace);
-       pfree(in_text);
-       pfree(out_text);
-
-       return result;
-}
-#endif   /* USE_WIDE_UPPER_LOWER */
-
  /********************************************************************
   *
   * lower
@@ -332,52 +56,15 @@ wstring_lower(char *str)
  Datum
  lower(PG_FUNCTION_ARGS)
  {
-#ifdef USE_WIDE_UPPER_LOWER
+       text    *in_string = PG_GETARG_TEXT_PP(0);
+       char    *out_string;
+       text    *result;
  
-       /*
-        * Use wide char code only when max encoding length > 1 and ctype != C.
-        * Some operating systems fail with multi-byte encodings and a C locale.
-        * Also, for a C locale there is no need to process as multibyte.
-        */
-       if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-       {
-               text       *string = PG_GETARG_TEXT_PP(0);
-               text       *result;
-               wchar_t    *workspace;
-               int                     i;
-
-               workspace = texttowcs(string);
-
-               for (i = 0; workspace[i] != 0; i++)
-                       workspace[i] = towlower(workspace[i]);
-
-               result = wcstotext(workspace, i);
-
-               pfree(workspace);
-
-               PG_RETURN_TEXT_P(result);
-       }
-       else
-#endif   /* USE_WIDE_UPPER_LOWER */
-       {
-               text       *string = PG_GETARG_TEXT_P_COPY(0);
-               char       *ptr;
-               int                     m;
+       out_string = str_tolower(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
+       result = cstring_to_text(out_string);
+       pfree(out_string);
  
-               /*
-                * Since we copied the string, we can scribble directly on the value
-                */
-               ptr = VARDATA(string);
-               m = VARSIZE(string) - VARHDRSZ;
-
-               while (m-- > 0)
-               {
-                       *ptr = tolower((unsigned char) *ptr);
-                       ptr++;
-               }
-
-               PG_RETURN_TEXT_P(string);
-       }
+       PG_RETURN_TEXT_P(result);
  }
  
  
@@ -398,52 +85,15 @@ lower(PG_FUNCTION_ARGS)
  Datum
  upper(PG_FUNCTION_ARGS)
  {
-#ifdef USE_WIDE_UPPER_LOWER
+       text    *in_string = PG_GETARG_TEXT_PP(0);
+       char    *out_string;
+       text    *result;
  
-       /*
-        * Use wide char code only when max encoding length > 1 and ctype != C.
-        * Some operating systems fail with multi-byte encodings and a C locale.
-        * Also, for a C locale there is no need to process as multibyte.
-        */
-       if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-       {
-               text       *string = PG_GETARG_TEXT_PP(0);
-               text       *result;
-               wchar_t    *workspace;
-               int                     i;
-
-               workspace = texttowcs(string);
-
-               for (i = 0; workspace[i] != 0; i++)
-                       workspace[i] = towupper(workspace[i]);
-
-               result = wcstotext(workspace, i);
-
-               pfree(workspace);
+       out_string = str_toupper(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
+       result = cstring_to_text(out_string);
+       pfree(out_string);
  
-               PG_RETURN_TEXT_P(result);
-       }
-       else
-#endif   /* USE_WIDE_UPPER_LOWER */
-       {
-               text       *string = PG_GETARG_TEXT_P_COPY(0);
-               char       *ptr;
-               int                     m;
-
-               /*
-                * Since we copied the string, we can scribble directly on the value
-                */
-               ptr = VARDATA(string);
-               m = VARSIZE(string) - VARHDRSZ;
-
-               while (m-- > 0)
-               {
-                       *ptr = toupper((unsigned char) *ptr);
-                       ptr++;
-               }
-
-               PG_RETURN_TEXT_P(string);
-       }
+       PG_RETURN_TEXT_P(result);
  }
  
  
@@ -467,64 +117,15 @@ upper(PG_FUNCTION_ARGS)
  Datum
  initcap(PG_FUNCTION_ARGS)
  {
-#ifdef USE_WIDE_UPPER_LOWER
+       text    *in_string = PG_GETARG_TEXT_PP(0);
+       char    *out_string;
+       text    *result;
  
-       /*
-        * Use wide char code only when max encoding length > 1 and ctype != C.
-        * Some operating systems fail with multi-byte encodings and a C locale.
-        * Also, for a C locale there is no need to process as multibyte.
-        */
-       if (pg_database_encoding_max_length() > 1 && !lc_ctype_is_c())
-       {
-               text       *string = PG_GETARG_TEXT_PP(0);
-               text       *result;
-               wchar_t    *workspace;
-               int                     wasalnum = 0;
-               int                     i;
-
-               workspace = texttowcs(string);
-
-               for (i = 0; workspace[i] != 0; i++)
-               {
-                       if (wasalnum)
-                               workspace[i] = towlower(workspace[i]);
-                       else
-                               workspace[i] = towupper(workspace[i]);
-                       wasalnum = iswalnum(workspace[i]);
-               }
-
-               result = wcstotext(workspace, i);
-
-               pfree(workspace);
-
-               PG_RETURN_TEXT_P(result);
-       }
-       else
-#endif   /* USE_WIDE_UPPER_LOWER */
-       {
-               text       *string = PG_GETARG_TEXT_P_COPY(0);
-               int                     wasalnum = 0;
-               char       *ptr;
-               int                     m;
-
-               /*
-                * Since we copied the string, we can scribble directly on the value
-                */
-               ptr = VARDATA(string);
-               m = VARSIZE(string) - VARHDRSZ;
-
-               while (m-- > 0)
-               {
-                       if (wasalnum)
-                               *ptr = tolower((unsigned char) *ptr);
-                       else
-                               *ptr = toupper((unsigned char) *ptr);
-                       wasalnum = isalnum((unsigned char) *ptr);
-                       ptr++;
-               }
+       out_string = str_initcap(VARDATA_ANY(in_string), VARSIZE_ANY_EXHDR(in_string));
+       result = cstring_to_text(out_string);
+       pfree(out_string);
  
-               PG_RETURN_TEXT_P(string);
-       }
+       PG_RETURN_TEXT_P(result);
  }
  
  
diff --git a/src/include/utils/formatting.h b/src/include/utils/formatting.h

index 2474afb..0674adf 100644 (file)
--- a/src/include/utils/formatting.h
+++ b/src/include/utils/formatting.h
@@ -2,7 +2,7 @@
  /* -----------------------------------------------------------------------
   * formatting.h
   *
- * $PostgreSQL: pgsql/src/include/utils/formatting.h,v 1.18 2008/01/01 19:45:59 momjian Exp $
+ * $PostgreSQL: pgsql/src/include/utils/formatting.h,v 1.19 2008/06/23 19:27:19 momjian Exp $
   *
   *
   *      Portions Copyright (c) 1999-2008, PostgreSQL Global Development Group
@@ -21,6 +21,10 @@
  #include "fmgr.h"
  
  
+extern char *str_tolower(char *buff, size_t nbytes);
+extern char *str_toupper(char *buff, size_t nbytes);
+extern char *str_initcap(char *buff, size_t nbytes);
+
  extern Datum timestamp_to_char(PG_FUNCTION_ARGS);
  extern Datum timestamptz_to_char(PG_FUNCTION_ARGS);
  extern Datum interval_to_char(PG_FUNCTION_ARGS);
author	Bruce Momjian <bruce@momjian.us>
	Mon, 23 Jun 2008 19:27:19 +0000 (19:27 +0000)
committer	Bruce Momjian <bruce@momjian.us>
	Mon, 23 Jun 2008 19:27:19 +0000 (19:27 +0000)
src/backend/utils/adt/formatting.c		patch \| blob \| history
src/backend/utils/adt/oracle_compat.c		patch \| blob \| history
src/include/utils/formatting.h		patch \| blob \| history