OSDN Git Service

Improve implementation of btrim/ltrim/rtrim: provide a special case for
authorTom Lane <tgl@sss.pgh.pa.us>
Fri, 23 May 2003 22:33:23 +0000 (22:33 +0000)
committerTom Lane <tgl@sss.pgh.pa.us>
Fri, 23 May 2003 22:33:23 +0000 (22:33 +0000)
single-byte encodings, and a direct C implementation of the single-argument
forms (where spaces are always what gets trimmed).  This is in preparation
for using rtrim1() as the bpchar-to-text cast operator, but is a useful
performance improvement even if we decide not to do that.

src/backend/utils/adt/oracle_compat.c
src/include/catalog/catversion.h
src/include/catalog/pg_proc.h
src/include/utils/builtins.h

index 48453c4..54730de 100644 (file)
@@ -2,26 +2,30 @@
  * oracle_compat.c
  *     Oracle compatible functions.
  *
- * Copyright (c) 1996-2001, PostgreSQL Global Development Group
+ * Copyright (c) 1996-2003, PostgreSQL Global Development Group
  *
  *     Author: Edmund Mergl <E.Mergl@bawue.de>
  *     Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
  *
  *
  * IDENTIFICATION
- *     $Header: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v 1.43 2002/09/04 20:31:28 momjian Exp $
+ *     $Header: /cvsroot/pgsql/src/backend/utils/adt/oracle_compat.c,v 1.44 2003/05/23 22:33:20 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
-
 #include "postgres.h"
 
 #include <ctype.h>
 
 #include "utils/builtins.h"
-
 #include "mb/pg_wchar.h"
 
+
+static text *dotrim(const char *string, int stringlen,
+                                       const char *set, int setlen,
+                                       bool doltrim, bool dortrim);
+
+
 /********************************************************************
  *
  * lower
@@ -349,86 +353,192 @@ btrim(PG_FUNCTION_ARGS)
        text       *string = PG_GETARG_TEXT_P(0);
        text       *set = PG_GETARG_TEXT_P(1);
        text       *ret;
-       char       *ptr,
-                          *end,
-                          *ptr2,
-                          *end2;
-       int                     m;
 
-       char      **mp;
-       int                     mplen;
-       char       *p;
-       int                     mblen;
-       int                     len;
+       ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
+                                VARDATA(set), VARSIZE(set) - VARHDRSZ,
+                                true, true);
 
-       if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
-               (VARSIZE(set) - VARHDRSZ) <= 0)
-               PG_RETURN_TEXT_P(string);
+       PG_RETURN_TEXT_P(ret);
+}
 
-       ptr = VARDATA(string);
+/********************************************************************
+ *
+ * btrim1 --- btrim with set fixed as ' '
+ *
+ ********************************************************************/
 
-       len = m;
-       mp = (char **) palloc(len * sizeof(char *));
-       p = ptr;
-       mplen = 0;
+Datum
+btrim1(PG_FUNCTION_ARGS)
+{
+       text       *string = PG_GETARG_TEXT_P(0);
+       text       *ret;
 
-       /* build the mb pointer array */
-       while (len > 0)
-       {
-               mp[mplen++] = p;
-               mblen = pg_mblen(p);
-               p += mblen;
-               len -= mblen;
-       }
-       mplen--;
-       end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
+       ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
+                                " ", 1,
+                                true, true);
 
-       while (m > 0)
-       {
-               int                     str_len = pg_mblen(ptr);
+       PG_RETURN_TEXT_P(ret);
+}
 
-               ptr2 = VARDATA(set);
-               while (ptr2 <= end2)
+/*
+ * Common implementation for btrim, ltrim, rtrim
+ */
+static text *
+dotrim(const char *string, int stringlen,
+          const char *set, int setlen,
+          bool doltrim, bool dortrim)
+{
+       text       *result;
+       int                     i;
+
+       /* Nothing to do if either string or set is empty */
+       if (stringlen > 0 && setlen > 0)
+       {
+               if (pg_database_encoding_max_length() > 1)
                {
-                       int                     set_len = pg_mblen(ptr2);
+                       /*
+                        * In the multibyte-encoding case, build arrays of pointers to
+                        * character starts, so that we can avoid inefficient checks in
+                        * the inner loops.
+                        */
+                       const char **stringchars;
+                       const char **setchars;
+                       int                *stringmblen;
+                       int                *setmblen;
+                       int                     stringnchars;
+                       int                     setnchars;
+                       int                     resultndx;
+                       int                     resultnchars;
+                       const char *p;
+                       int                     len;
+                       int                     mblen;
+                       const char *str_pos;
+                       int                     str_len;
+
+                       stringchars = (const char **) palloc(stringlen * sizeof(char *));
+                       stringmblen = (int *) palloc(stringlen * sizeof(int));
+                       stringnchars = 0;
+                       p = string;
+                       len = stringlen;
+                       while (len > 0)
+                       {
+                               stringchars[stringnchars] = p;
+                               stringmblen[stringnchars] = mblen = pg_mblen(p);
+                               stringnchars++;
+                               p += mblen;
+                               len -= mblen;
+                       }
 
-                       if (str_len == set_len &&
-                               memcmp(ptr, ptr2, str_len) == 0)
-                               break;
-                       ptr2 += set_len;
-               }
-               if (ptr2 > end2)
-                       break;
-               ptr += str_len;
-               m -= str_len;
-       }
+                       setchars = (const char **) palloc(setlen * sizeof(char *));
+                       setmblen = (int *) palloc(setlen * sizeof(int));
+                       setnchars = 0;
+                       p = set;
+                       len = setlen;
+                       while (len > 0)
+                       {
+                               setchars[setnchars] = p;
+                               setmblen[setnchars] = mblen = pg_mblen(p);
+                               setnchars++;
+                               p += mblen;
+                               len -= mblen;
+                       }
 
-       while (m > 0)
-       {
-               int                     str_len;
+                       resultndx = 0;          /* index in stringchars[] */
+                       resultnchars = stringnchars;
 
-               end = mp[mplen--];
-               str_len = pg_mblen(end);
-               ptr2 = VARDATA(set);
-               while (ptr2 <= end2)
+                       if (doltrim)
+                       {
+                               while (resultnchars > 0)
+                               {
+                                       str_pos = stringchars[resultndx];
+                                       str_len = stringmblen[resultndx];
+                                       for (i = 0; i < setnchars; i++)
+                                       {
+                                               if (str_len == setmblen[i] &&
+                                                       memcmp(str_pos, setchars[i], str_len) == 0)
+                                                       break;
+                                       }
+                                       if (i >= setnchars)
+                                               break;  /* no match here */
+                                       string += str_len;
+                                       stringlen -= str_len;
+                                       resultndx++;
+                                       resultnchars--;
+                               }
+                       }
+
+                       if (dortrim)
+                       {
+                               while (resultnchars > 0)
+                               {
+                                       str_pos = stringchars[resultndx + resultnchars - 1];
+                                       str_len = stringmblen[resultndx + resultnchars - 1];
+                                       for (i = 0; i < setnchars; i++)
+                                       {
+                                               if (str_len == setmblen[i] &&
+                                                       memcmp(str_pos, setchars[i], str_len) == 0)
+                                                       break;
+                                       }
+                                       if (i >= setnchars)
+                                               break;  /* no match here */
+                                       stringlen -= str_len;
+                                       resultnchars--;
+                               }
+                       }
+
+                       pfree(stringchars);
+                       pfree(stringmblen);
+                       pfree(setchars);
+                       pfree(setmblen);
+               }
+               else
                {
-                       int                     set_len = pg_mblen(ptr2);
+                       /*
+                        * In the single-byte-encoding case, we don't need such overhead.
+                        */
+                       if (doltrim)
+                       {
+                               while (stringlen > 0)
+                               {
+                                       char    str_ch = *string;
+
+                                       for (i = 0; i < setlen; i++)
+                                       {
+                                               if (str_ch == set[i])
+                                                       break;
+                                       }
+                                       if (i >= setlen)
+                                               break;  /* no match here */
+                                       string++;
+                                       stringlen--;
+                               }
+                       }
 
-                       if (str_len == set_len &&
-                               memcmp(end, ptr2, str_len) == 0)
-                               break;
-                       ptr2 += set_len;
+                       if (dortrim)
+                       {
+                               while (stringlen > 0)
+                               {
+                                       char    str_ch = string[stringlen - 1];
+
+                                       for (i = 0; i < setlen; i++)
+                                       {
+                                               if (str_ch == set[i])
+                                                       break;
+                                       }
+                                       if (i >= setlen)
+                                               break;  /* no match here */
+                                       stringlen--;
+                               }
+                       }
                }
-               if (ptr2 > end2)
-                       break;
-               m -= str_len;
        }
-       pfree(mp);
-       ret = (text *) palloc(VARHDRSZ + m);
-       VARATT_SIZEP(ret) = VARHDRSZ + m;
-       memcpy(VARDATA(ret), ptr, m);
 
-       PG_RETURN_TEXT_P(ret);
+       /* Return selected portion of string */
+       result = (text *) palloc(VARHDRSZ + stringlen);
+       VARATT_SIZEP(result) = VARHDRSZ + stringlen;
+       memcpy(VARDATA(result), string, stringlen);
+
+       return result;
 }
 
 /********************************************************************
@@ -525,45 +635,33 @@ ltrim(PG_FUNCTION_ARGS)
        text       *string = PG_GETARG_TEXT_P(0);
        text       *set = PG_GETARG_TEXT_P(1);
        text       *ret;
-       char       *ptr,
-                          *ptr2,
-                          *end2;
-       int                     m;
 
-       if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
-               (VARSIZE(set) - VARHDRSZ) <= 0)
-               PG_RETURN_TEXT_P(string);
+       ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
+                                VARDATA(set), VARSIZE(set) - VARHDRSZ,
+                                true, false);
 
-       ptr = VARDATA(string);
-       end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
+       PG_RETURN_TEXT_P(ret);
+}
 
-       while (m > 0)
-       {
-               int                     str_len = pg_mblen(ptr);
+/********************************************************************
+ *
+ * ltrim1 --- ltrim with set fixed as ' '
+ *
+ ********************************************************************/
 
-               ptr2 = VARDATA(set);
-               while (ptr2 <= end2)
-               {
-                       int                     set_len = pg_mblen(ptr2);
+Datum
+ltrim1(PG_FUNCTION_ARGS)
+{
+       text       *string = PG_GETARG_TEXT_P(0);
+       text       *ret;
 
-                       if (str_len == set_len &&
-                               memcmp(ptr, ptr2, str_len) == 0)
-                               break;
-                       ptr2 += set_len;
-               }
-               if (ptr2 > end2)
-                       break;
-               ptr += str_len;
-               m -= str_len;
-       }
-       ret = (text *) palloc(VARHDRSZ + m);
-       VARATT_SIZEP(ret) = VARHDRSZ + m;
-       memcpy(VARDATA(ret), ptr, m);
+       ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
+                                " ", 1,
+                                true, false);
 
        PG_RETURN_TEXT_P(ret);
 }
 
-
 /********************************************************************
  *
  * rtrim
@@ -586,64 +684,28 @@ rtrim(PG_FUNCTION_ARGS)
        text       *set = PG_GETARG_TEXT_P(1);
        text       *ret;
 
-       char       *ptr,
-                          *end,
-                          *ptr2,
-                          *end2;
-       int                     m;
-
-       char      **mp;
-       int                     mplen;
-       char       *p;
-       int                     mblen;
-       int                     len;
-
-       if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
-               (VARSIZE(set) - VARHDRSZ) <= 0)
-               PG_RETURN_TEXT_P(string);
-
-       ptr = VARDATA(string);
+       ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
+                                VARDATA(set), VARSIZE(set) - VARHDRSZ,
+                                false, true);
 
-       len = m;
-       mp = (char **) palloc(len * sizeof(char *));
-       p = ptr;
-       mplen = 0;
-
-       /* build the mb pointer array */
-       while (len > 0)
-       {
-               mp[mplen++] = p;
-               mblen = pg_mblen(p);
-               p += mblen;
-               len -= mblen;
-       }
-       mplen--;
-       end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
+       PG_RETURN_TEXT_P(ret);
+}
 
-       while (m > 0)
-       {
-               int                     str_len;
+/********************************************************************
+ *
+ * rtrim1 --- rtrim with set fixed as ' '
+ *
+ ********************************************************************/
 
-               end = mp[mplen--];
-               str_len = pg_mblen(end);
-               ptr2 = VARDATA(set);
-               while (ptr2 <= end2)
-               {
-                       int                     set_len = pg_mblen(ptr2);
+Datum
+rtrim1(PG_FUNCTION_ARGS)
+{
+       text       *string = PG_GETARG_TEXT_P(0);
+       text       *ret;
 
-                       if (str_len == set_len &&
-                               memcmp(end, ptr2, str_len) == 0)
-                               break;
-                       ptr2 += set_len;
-               }
-               if (ptr2 > end2)
-                       break;
-               m -= str_len;
-       }
-       pfree(mp);
-       ret = (text *) palloc(VARHDRSZ + m);
-       VARATT_SIZEP(ret) = VARHDRSZ + m;
-       memcpy(VARDATA(ret), ptr, m);
+       ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
+                                " ", 1,
+                                false, true);
 
        PG_RETURN_TEXT_P(ret);
 }
index 5accd1d..c7848af 100644 (file)
@@ -37,7 +37,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: catversion.h,v 1.194 2003/05/15 15:50:19 petere Exp $
+ * $Id: catversion.h,v 1.195 2003/05/23 22:33:22 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -53,6 +53,6 @@
  */
 
 /*                                                     yyyymmddN */
-#define CATALOG_VERSION_NO     200305151
+#define CATALOG_VERSION_NO     200305231
 
 #endif
index f53af27..f2f3e3e 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: pg_proc.h,v 1.300 2003/05/15 15:50:19 petere Exp $
+ * $Id: pg_proc.h,v 1.301 2003/05/23 22:33:22 tgl Exp $
  *
  * NOTES
  *       The script catalog/genbki.sh reads this file and generates .bki
@@ -2134,9 +2134,9 @@ DESCR("left-pad string to length");
 DATA(insert OID =  874 (  rpad            PGNSP PGUID 12 f f t f i 3 25 "25 23 25"  rpad - _null_ ));
 DESCR("right-pad string to length");
 DATA(insert OID =  875 (  ltrim                   PGNSP PGUID 12 f f t f i 2 25 "25 25"        ltrim - _null_ ));
-DESCR("left-pad string to length");
+DESCR("trim selected characters from left end of string");
 DATA(insert OID =  876 (  rtrim                   PGNSP PGUID 12 f f t f i 2 25 "25 25"        rtrim - _null_ ));
-DESCR("right-pad string to length");
+DESCR("trim selected characters from right end of string");
 DATA(insert OID =  877 (  substr          PGNSP PGUID 12 f f t f i 3 25 "25 23 23"  text_substr - _null_ ));
 DESCR("return portion of string");
 DATA(insert OID =  878 (  translate    PGNSP PGUID 12 f f t f i 3 25 "25 25 25"  translate - _null_ ));
@@ -2145,16 +2145,16 @@ DATA(insert OID =  879 (  lpad             PGNSP PGUID 14 f f t f i 2 25 "25 23"        "select
 DESCR("left-pad string to length");
 DATA(insert OID =  880 (  rpad            PGNSP PGUID 14 f f t f i 2 25 "25 23"        "select rpad($1, $2, \' \')" - _null_ ));
 DESCR("right-pad string to length");
-DATA(insert OID =  881 (  ltrim                   PGNSP PGUID 14 f f t f i 1 25 "25"  "select ltrim($1, \' \')" - _null_ ));
-DESCR("remove initial characters from string");
-DATA(insert OID =  882 (  rtrim                   PGNSP PGUID 14 f f t f i 1 25 "25"  "select rtrim($1, \' \')" - _null_ ));
-DESCR("remove trailing characters from string");
+DATA(insert OID =  881 (  ltrim                   PGNSP PGUID 12 f f t f i 1 25 "25"  ltrim1 - _null_ ));
+DESCR("trim spaces from left end of string");
+DATA(insert OID =  882 (  rtrim                   PGNSP PGUID 12 f f t f i 1 25 "25"  rtrim1 - _null_ ));
+DESCR("trim spaces from right end of string");
 DATA(insert OID =  883 (  substr          PGNSP PGUID 12 f f t f i 2 25 "25 23"        text_substr_no_len - _null_ ));
 DESCR("return portion of string");
 DATA(insert OID =  884 (  btrim                   PGNSP PGUID 12 f f t f i 2 25 "25 25"        btrim - _null_ ));
-DESCR("trim both ends of string");
-DATA(insert OID =  885 (  btrim                   PGNSP PGUID 14 f f t f i 1 25 "25"  "select btrim($1, \' \')" - _null_ ));
-DESCR("trim both ends of string");
+DESCR("trim selected characters from both ends of string");
+DATA(insert OID =  885 (  btrim                   PGNSP PGUID 12 f f t f i 1 25 "25"  btrim1 - _null_ ));
+DESCR("trim spaces from both ends of string");
 
 DATA(insert OID =  936 (  substring    PGNSP PGUID 12 f f t f i 3 25 "25 23 23"  text_substr - _null_ ));
 DESCR("return portion of string");
index 9620483..539dfbc 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2002, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $Id: builtins.h,v 1.217 2003/05/15 15:50:20 petere Exp $
+ * $Id: builtins.h,v 1.218 2003/05/23 22:33:23 tgl Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -608,9 +608,12 @@ extern Datum initcap(PG_FUNCTION_ARGS);
 extern Datum lpad(PG_FUNCTION_ARGS);
 extern Datum rpad(PG_FUNCTION_ARGS);
 extern Datum btrim(PG_FUNCTION_ARGS);
+extern Datum btrim1(PG_FUNCTION_ARGS);
 extern Datum byteatrim(PG_FUNCTION_ARGS);
 extern Datum ltrim(PG_FUNCTION_ARGS);
+extern Datum ltrim1(PG_FUNCTION_ARGS);
 extern Datum rtrim(PG_FUNCTION_ARGS);
+extern Datum rtrim1(PG_FUNCTION_ARGS);
 extern Datum translate(PG_FUNCTION_ARGS);
 extern Datum chr(PG_FUNCTION_ARGS);
 extern Datum repeat(PG_FUNCTION_ARGS);