OSDN Git Service

f24203de35216aecc094018259df774552bd5df1
[pg-rex/syncrep.git] / src / backend / utils / adt / oracle_compat.c
1 /*-------------------------------------------------------------------------
2  * oracle_compat.c
3  *      Oracle compatible functions.
4  *
5  * Copyright (c) 1996-2004, PostgreSQL Global Development Group
6  *
7  *      Author: Edmund Mergl <E.Mergl@bawue.de>
8  *      Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
9  *
10  *
11  * IDENTIFICATION
12  *      $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.54 2004/08/29 04:12:52 momjian Exp $
13  *
14  *-------------------------------------------------------------------------
15  */
16 #include "postgres.h"
17
18 #include <ctype.h>
19 #include <limits.h>
20 /*
21  * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
22  * declare them in <wchar.h>.
23  */
24 #ifdef HAVE_WCHAR_H
25 #include <wchar.h>
26 #endif
27 #ifdef HAVE_WCTYPE_H
28 #include <wctype.h>
29 #endif
30
31 #include "utils/builtins.h"
32 #include "mb/pg_wchar.h"
33
34
35 /*
36  * If the system provides the needed functions for wide-character manipulation
37  * (which are all standardized by C99), then we implement upper/lower/initcap
38  * using wide-character functions.  Otherwise we use the traditional <ctype.h>
39  * functions, which of course will not work as desired in multibyte character
40  * sets.  Note that in either case we are effectively assuming that the
41  * database character encoding matches the encoding implied by LC_CTYPE.
42  *
43  * We assume if we have these two functions, we have their friends too, and
44  * can use the wide-character method.
45  */
46 #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
47 #define USE_WIDE_UPPER_LOWER
48 #endif
49
50 static text *dotrim(const char *string, int stringlen,
51            const char *set, int setlen,
52            bool doltrim, bool dortrim);
53
54
55 #ifdef USE_WIDE_UPPER_LOWER
56
57 /*
58  * Convert a TEXT value into a palloc'd wchar string.
59  */
60 static wchar_t *
61 texttowcs(const text *txt)
62 {
63         int                     nbytes = VARSIZE(txt) - VARHDRSZ;
64         char       *workstr;
65         wchar_t    *result;
66         size_t          ncodes;
67
68         /* Overflow paranoia */
69         if (nbytes < 0 ||
70                 nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
71                 ereport(ERROR,
72                                 (errcode(ERRCODE_OUT_OF_MEMORY),
73                                  errmsg("out of memory")));
74
75         /* Need a null-terminated version of the input */
76         workstr = (char *) palloc(nbytes + 1);
77         memcpy(workstr, VARDATA(txt), nbytes);
78         workstr[nbytes] = '\0';
79
80         /* Output workspace cannot have more codes than input bytes */
81         result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
82
83         /* Do the conversion */
84         ncodes = mbstowcs(result, workstr, nbytes + 1);
85
86         if (ncodes == (size_t) -1)
87         {
88                 /*
89                  * Invalid multibyte character encountered.  We try to give a useful
90                  * error message by letting pg_verifymbstr check the string.  But
91                  * it's possible that the string is OK to us, and not OK to mbstowcs
92                  * --- this suggests that the LC_CTYPE locale is different from the
93                  * database encoding.  Give a generic error message if verifymbstr
94                  * can't find anything wrong.
95                  */
96                 pg_verifymbstr(workstr, nbytes, false);
97                 ereport(ERROR,
98                                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
99                                  errmsg("invalid multibyte character for locale")));
100         }
101
102         Assert(ncodes <= (size_t) nbytes);
103
104         return result;
105 }
106
107
108 /*
109  * Convert a wchar string into a palloc'd TEXT value.  The wchar string
110  * must be zero-terminated, but we also require the caller to pass the string
111  * length, since it will know it anyway in current uses.
112  */
113 static text *
114 wcstotext(const wchar_t *str, int ncodes)
115 {
116         text       *result;
117         size_t          nbytes;
118
119         /* Overflow paranoia */
120         if (ncodes < 0 ||
121                 ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
122                 ereport(ERROR,
123                                 (errcode(ERRCODE_OUT_OF_MEMORY),
124                                  errmsg("out of memory")));
125
126         /* Make workspace certainly large enough for result */
127         result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
128
129         /* Do the conversion */
130         nbytes = wcstombs((char *) VARDATA(result), str,
131                                           (ncodes + 1) * MB_CUR_MAX);
132
133         if (nbytes == (size_t) -1)
134         {
135                 /* Invalid multibyte character encountered ... shouldn't happen */
136                 ereport(ERROR,
137                                 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
138                                  errmsg("invalid multibyte character for locale")));
139         }
140
141         Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
142
143         VARATT_SIZEP(result) = nbytes + VARHDRSZ;
144
145         return result;
146 }
147
148 #endif /* USE_WIDE_UPPER_LOWER */
149
150
151 /********************************************************************
152  *
153  * lower
154  *
155  * Syntax:
156  *
157  *       text lower(text string)
158  *
159  * Purpose:
160  *
161  *       Returns string, with all letters forced to lowercase.
162  *
163  ********************************************************************/
164
165 Datum
166 lower(PG_FUNCTION_ARGS)
167 {
168 #ifdef USE_WIDE_UPPER_LOWER
169         /* use wide char code only when max encoding length > one */
170         if (pg_database_encoding_max_length() > 1)
171         {
172                 text       *string = PG_GETARG_TEXT_P(0);
173                 text       *result;
174                 wchar_t    *workspace;
175                 int                     i;
176
177                 workspace = texttowcs(string);
178
179                 for (i = 0; workspace[i] != 0; i++)
180                         workspace[i] = towlower(workspace[i]);
181
182                 result = wcstotext(workspace, i);
183
184                 pfree(workspace);
185
186                 PG_RETURN_TEXT_P(result);
187         }
188         else
189 #endif /* USE_WIDE_UPPER_LOWER */
190         {
191                 text       *string = PG_GETARG_TEXT_P_COPY(0);
192                 char       *ptr;
193                 int                     m;
194
195                 /* Since we copied the string, we can scribble directly on the value */
196                 ptr = VARDATA(string);
197                 m = VARSIZE(string) - VARHDRSZ;
198
199                 while (m-- > 0)
200                 {
201                         *ptr = tolower((unsigned char) *ptr);
202                         ptr++;
203                 }
204
205                 PG_RETURN_TEXT_P(string);
206         }
207 }
208
209
210 /********************************************************************
211  *
212  * upper
213  *
214  * Syntax:
215  *
216  *       text upper(text string)
217  *
218  * Purpose:
219  *
220  *       Returns string, with all letters forced to uppercase.
221  *
222  ********************************************************************/
223
224 Datum
225 upper(PG_FUNCTION_ARGS)
226 {
227 #ifdef USE_WIDE_UPPER_LOWER
228         /* use wide char code only when max encoding length > one */
229         if (pg_database_encoding_max_length() > 1)
230         {
231                 text       *string = PG_GETARG_TEXT_P(0);
232                 text       *result;
233                 wchar_t    *workspace;
234                 int                     i;
235
236                 workspace = texttowcs(string);
237
238                 for (i = 0; workspace[i] != 0; i++)
239                         workspace[i] = towupper(workspace[i]);
240
241                 result = wcstotext(workspace, i);
242
243                 pfree(workspace);
244
245                 PG_RETURN_TEXT_P(result);
246         }
247         else
248 #endif /* USE_WIDE_UPPER_LOWER */
249         {
250                 text       *string = PG_GETARG_TEXT_P_COPY(0);
251                 char       *ptr;
252                 int                     m;
253
254                 /* Since we copied the string, we can scribble directly on the value */
255                 ptr = VARDATA(string);
256                 m = VARSIZE(string) - VARHDRSZ;
257
258                 while (m-- > 0)
259                 {
260                         *ptr = toupper((unsigned char) *ptr);
261                         ptr++;
262                 }
263
264                 PG_RETURN_TEXT_P(string);
265         }
266 }
267
268
269 /********************************************************************
270  *
271  * initcap
272  *
273  * Syntax:
274  *
275  *       text initcap(text string)
276  *
277  * Purpose:
278  *
279  *       Returns string, with first letter of each word in uppercase, all
280  *       other letters in lowercase. A word is defined as a sequence of
281  *       alphanumeric characters, delimited by non-alphanumeric
282  *       characters.
283  *
284  ********************************************************************/
285
286 Datum
287 initcap(PG_FUNCTION_ARGS)
288 {
289 #ifdef USE_WIDE_UPPER_LOWER
290         /* use wide char code only when max encoding length > one */
291         if (pg_database_encoding_max_length() > 1)
292         {
293                 text       *string = PG_GETARG_TEXT_P(0);
294                 text       *result;
295                 wchar_t    *workspace;
296                 int                     wasalnum = 0;
297                 int                     i;
298
299                 workspace = texttowcs(string);
300
301                 for (i = 0; workspace[i] != 0; i++)
302                 {
303                         if (wasalnum)
304                                 workspace[i] = towlower(workspace[i]);
305                         else
306                                 workspace[i] = towupper(workspace[i]);
307                         wasalnum = iswalnum(workspace[i]);
308                 }
309
310                 result = wcstotext(workspace, i);
311
312                 pfree(workspace);
313
314                 PG_RETURN_TEXT_P(result);
315         }
316         else
317 #endif /* USE_WIDE_UPPER_LOWER */
318         {
319                 text       *string = PG_GETARG_TEXT_P_COPY(0);
320                 int                     wasalnum = 0;
321                 char       *ptr;
322                 int                     m;
323
324                 /* Since we copied the string, we can scribble directly on the value */
325                 ptr = VARDATA(string);
326                 m = VARSIZE(string) - VARHDRSZ;
327
328                 while (m-- > 0)
329                 {
330                         if (wasalnum)
331                                 *ptr = tolower((unsigned char) *ptr);
332                         else
333                                 *ptr = toupper((unsigned char) *ptr);
334                         wasalnum = isalnum((unsigned char) *ptr);
335                         ptr++;
336                 }
337
338                 PG_RETURN_TEXT_P(string);
339         }
340 }
341
342
343 /********************************************************************
344  *
345  * lpad
346  *
347  * Syntax:
348  *
349  *       text lpad(text string1, int4 len, text string2)
350  *
351  * Purpose:
352  *
353  *       Returns string1, left-padded to length len with the sequence of
354  *       characters in string2.  If len is less than the length of string1,
355  *       instead truncate (on the right) to len.
356  *
357  ********************************************************************/
358
359 Datum
360 lpad(PG_FUNCTION_ARGS)
361 {
362         text       *string1 = PG_GETARG_TEXT_P(0);
363         int32           len = PG_GETARG_INT32(1);
364         text       *string2 = PG_GETARG_TEXT_P(2);
365         text       *ret;
366         char       *ptr1,
367                            *ptr2,
368                            *ptr2end,
369                            *ptr_ret;
370         int                     m,
371                                 s1len,
372                                 s2len;
373
374         int                     bytelen;
375
376         /* Negative len is silently taken as zero */
377         if (len < 0)
378                 len = 0;
379
380         s1len = VARSIZE(string1) - VARHDRSZ;
381         if (s1len < 0)
382                 s1len = 0;                              /* shouldn't happen */
383
384         s2len = VARSIZE(string2) - VARHDRSZ;
385         if (s2len < 0)
386                 s2len = 0;                              /* shouldn't happen */
387
388         s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
389
390         if (s1len > len)
391                 s1len = len;                    /* truncate string1 to len chars */
392
393         if (s2len <= 0)
394                 len = s1len;                    /* nothing to pad with, so don't pad */
395
396         bytelen = pg_database_encoding_max_length() * len;
397
398         /* check for integer overflow */
399         if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
400                 ereport(ERROR,
401                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
402                                  errmsg("requested length too large")));
403
404         ret = (text *) palloc(VARHDRSZ + bytelen);
405
406         m = len - s1len;
407
408         ptr2 = VARDATA(string2);
409         ptr2end = ptr2 + s2len;
410         ptr_ret = VARDATA(ret);
411
412         while (m--)
413         {
414                 int                     mlen = pg_mblen(ptr2);
415
416                 memcpy(ptr_ret, ptr2, mlen);
417                 ptr_ret += mlen;
418                 ptr2 += mlen;
419                 if (ptr2 == ptr2end)    /* wrap around at end of s2 */
420                         ptr2 = VARDATA(string2);
421         }
422
423         ptr1 = VARDATA(string1);
424
425         while (s1len--)
426         {
427                 int                     mlen = pg_mblen(ptr1);
428
429                 memcpy(ptr_ret, ptr1, mlen);
430                 ptr_ret += mlen;
431                 ptr1 += mlen;
432         }
433
434         VARATT_SIZEP(ret) = ptr_ret - (char *) ret;
435
436         PG_RETURN_TEXT_P(ret);
437 }
438
439
440 /********************************************************************
441  *
442  * rpad
443  *
444  * Syntax:
445  *
446  *       text rpad(text string1, int4 len, text string2)
447  *
448  * Purpose:
449  *
450  *       Returns string1, right-padded to length len with the sequence of
451  *       characters in string2.  If len is less than the length of string1,
452  *       instead truncate (on the right) to len.
453  *
454  ********************************************************************/
455
456 Datum
457 rpad(PG_FUNCTION_ARGS)
458 {
459         text       *string1 = PG_GETARG_TEXT_P(0);
460         int32           len = PG_GETARG_INT32(1);
461         text       *string2 = PG_GETARG_TEXT_P(2);
462         text       *ret;
463         char       *ptr1,
464                            *ptr2,
465                            *ptr2end,
466                            *ptr_ret;
467         int                     m,
468                                 s1len,
469                                 s2len;
470
471         int                     bytelen;
472
473         /* Negative len is silently taken as zero */
474         if (len < 0)
475                 len = 0;
476
477         s1len = VARSIZE(string1) - VARHDRSZ;
478         if (s1len < 0)
479                 s1len = 0;                              /* shouldn't happen */
480
481         s2len = VARSIZE(string2) - VARHDRSZ;
482         if (s2len < 0)
483                 s2len = 0;                              /* shouldn't happen */
484
485         s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
486
487         if (s1len > len)
488                 s1len = len;                    /* truncate string1 to len chars */
489
490         if (s2len <= 0)
491                 len = s1len;                    /* nothing to pad with, so don't pad */
492
493         bytelen = pg_database_encoding_max_length() * len;
494
495         /* Check for integer overflow */
496         if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
497                 ereport(ERROR,
498                                 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
499                                  errmsg("requested length too large")));
500
501         ret = (text *) palloc(VARHDRSZ + bytelen);
502         m = len - s1len;
503
504         ptr1 = VARDATA(string1);
505         ptr_ret = VARDATA(ret);
506
507         while (s1len--)
508         {
509                 int                     mlen = pg_mblen(ptr1);
510
511                 memcpy(ptr_ret, ptr1, mlen);
512                 ptr_ret += mlen;
513                 ptr1 += mlen;
514         }
515
516         ptr2 = VARDATA(string2);
517         ptr2end = ptr2 + s2len;
518
519         while (m--)
520         {
521                 int                     mlen = pg_mblen(ptr2);
522
523                 memcpy(ptr_ret, ptr2, mlen);
524                 ptr_ret += mlen;
525                 ptr2 += mlen;
526                 if (ptr2 == ptr2end)    /* wrap around at end of s2 */
527                         ptr2 = VARDATA(string2);
528         }
529
530         VARATT_SIZEP(ret) = ptr_ret - (char *) ret;
531
532         PG_RETURN_TEXT_P(ret);
533 }
534
535
536 /********************************************************************
537  *
538  * btrim
539  *
540  * Syntax:
541  *
542  *       text btrim(text string, text set)
543  *
544  * Purpose:
545  *
546  *       Returns string with characters removed from the front and back
547  *       up to the first character not in set.
548  *
549  ********************************************************************/
550
551 Datum
552 btrim(PG_FUNCTION_ARGS)
553 {
554         text       *string = PG_GETARG_TEXT_P(0);
555         text       *set = PG_GETARG_TEXT_P(1);
556         text       *ret;
557
558         ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
559                                  VARDATA(set), VARSIZE(set) - VARHDRSZ,
560                                  true, true);
561
562         PG_RETURN_TEXT_P(ret);
563 }
564
565 /********************************************************************
566  *
567  * btrim1 --- btrim with set fixed as ' '
568  *
569  ********************************************************************/
570
571 Datum
572 btrim1(PG_FUNCTION_ARGS)
573 {
574         text       *string = PG_GETARG_TEXT_P(0);
575         text       *ret;
576
577         ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
578                                  " ", 1,
579                                  true, true);
580
581         PG_RETURN_TEXT_P(ret);
582 }
583
584 /*
585  * Common implementation for btrim, ltrim, rtrim
586  */
587 static text *
588 dotrim(const char *string, int stringlen,
589            const char *set, int setlen,
590            bool doltrim, bool dortrim)
591 {
592         text       *result;
593         int                     i;
594
595         /* Nothing to do if either string or set is empty */
596         if (stringlen > 0 && setlen > 0)
597         {
598                 if (pg_database_encoding_max_length() > 1)
599                 {
600                         /*
601                          * In the multibyte-encoding case, build arrays of pointers to
602                          * character starts, so that we can avoid inefficient checks
603                          * in the inner loops.
604                          */
605                         const char **stringchars;
606                         const char **setchars;
607                         int                *stringmblen;
608                         int                *setmblen;
609                         int                     stringnchars;
610                         int                     setnchars;
611                         int                     resultndx;
612                         int                     resultnchars;
613                         const char *p;
614                         int                     len;
615                         int                     mblen;
616                         const char *str_pos;
617                         int                     str_len;
618
619                         stringchars = (const char **) palloc(stringlen * sizeof(char *));
620                         stringmblen = (int *) palloc(stringlen * sizeof(int));
621                         stringnchars = 0;
622                         p = string;
623                         len = stringlen;
624                         while (len > 0)
625                         {
626                                 stringchars[stringnchars] = p;
627                                 stringmblen[stringnchars] = mblen = pg_mblen(p);
628                                 stringnchars++;
629                                 p += mblen;
630                                 len -= mblen;
631                         }
632
633                         setchars = (const char **) palloc(setlen * sizeof(char *));
634                         setmblen = (int *) palloc(setlen * sizeof(int));
635                         setnchars = 0;
636                         p = set;
637                         len = setlen;
638                         while (len > 0)
639                         {
640                                 setchars[setnchars] = p;
641                                 setmblen[setnchars] = mblen = pg_mblen(p);
642                                 setnchars++;
643                                 p += mblen;
644                                 len -= mblen;
645                         }
646
647                         resultndx = 0;          /* index in stringchars[] */
648                         resultnchars = stringnchars;
649
650                         if (doltrim)
651                         {
652                                 while (resultnchars > 0)
653                                 {
654                                         str_pos = stringchars[resultndx];
655                                         str_len = stringmblen[resultndx];
656                                         for (i = 0; i < setnchars; i++)
657                                         {
658                                                 if (str_len == setmblen[i] &&
659                                                         memcmp(str_pos, setchars[i], str_len) == 0)
660                                                         break;
661                                         }
662                                         if (i >= setnchars)
663                                                 break;  /* no match here */
664                                         string += str_len;
665                                         stringlen -= str_len;
666                                         resultndx++;
667                                         resultnchars--;
668                                 }
669                         }
670
671                         if (dortrim)
672                         {
673                                 while (resultnchars > 0)
674                                 {
675                                         str_pos = stringchars[resultndx + resultnchars - 1];
676                                         str_len = stringmblen[resultndx + resultnchars - 1];
677                                         for (i = 0; i < setnchars; i++)
678                                         {
679                                                 if (str_len == setmblen[i] &&
680                                                         memcmp(str_pos, setchars[i], str_len) == 0)
681                                                         break;
682                                         }
683                                         if (i >= setnchars)
684                                                 break;  /* no match here */
685                                         stringlen -= str_len;
686                                         resultnchars--;
687                                 }
688                         }
689
690                         pfree(stringchars);
691                         pfree(stringmblen);
692                         pfree(setchars);
693                         pfree(setmblen);
694                 }
695                 else
696                 {
697                         /*
698                          * In the single-byte-encoding case, we don't need such
699                          * overhead.
700                          */
701                         if (doltrim)
702                         {
703                                 while (stringlen > 0)
704                                 {
705                                         char            str_ch = *string;
706
707                                         for (i = 0; i < setlen; i++)
708                                         {
709                                                 if (str_ch == set[i])
710                                                         break;
711                                         }
712                                         if (i >= setlen)
713                                                 break;  /* no match here */
714                                         string++;
715                                         stringlen--;
716                                 }
717                         }
718
719                         if (dortrim)
720                         {
721                                 while (stringlen > 0)
722                                 {
723                                         char            str_ch = string[stringlen - 1];
724
725                                         for (i = 0; i < setlen; i++)
726                                         {
727                                                 if (str_ch == set[i])
728                                                         break;
729                                         }
730                                         if (i >= setlen)
731                                                 break;  /* no match here */
732                                         stringlen--;
733                                 }
734                         }
735                 }
736         }
737
738         /* Return selected portion of string */
739         result = (text *) palloc(VARHDRSZ + stringlen);
740         VARATT_SIZEP(result) = VARHDRSZ + stringlen;
741         memcpy(VARDATA(result), string, stringlen);
742
743         return result;
744 }
745
746 /********************************************************************
747  *
748  * byteatrim
749  *
750  * Syntax:
751  *
752  *       bytea byteatrim(byta string, bytea set)
753  *
754  * Purpose:
755  *
756  *       Returns string with characters removed from the front and back
757  *       up to the first character not in set.
758  *
759  * Cloned from btrim and modified as required.
760  ********************************************************************/
761
762 Datum
763 byteatrim(PG_FUNCTION_ARGS)
764 {
765         bytea      *string = PG_GETARG_BYTEA_P(0);
766         bytea      *set = PG_GETARG_BYTEA_P(1);
767         bytea      *ret;
768         char       *ptr,
769                            *end,
770                            *ptr2,
771                            *end2;
772         int                     m;
773
774         if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
775                 (VARSIZE(set) - VARHDRSZ) <= 0)
776                 PG_RETURN_BYTEA_P(string);
777
778         ptr = VARDATA(string);
779         end = VARDATA(string) + VARSIZE(string) - VARHDRSZ - 1;
780         end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
781
782         while (m > 0)
783         {
784                 ptr2 = VARDATA(set);
785                 while (ptr2 <= end2)
786                 {
787                         if (*ptr == *ptr2)
788                                 break;
789                         ++ptr2;
790                 }
791                 if (ptr2 > end2)
792                         break;
793                 ptr++;
794                 m--;
795         }
796
797         while (m > 0)
798         {
799                 ptr2 = VARDATA(set);
800                 while (ptr2 <= end2)
801                 {
802                         if (*end == *ptr2)
803                                 break;
804                         ++ptr2;
805                 }
806                 if (ptr2 > end2)
807                         break;
808                 end--;
809                 m--;
810         }
811
812         ret = (bytea *) palloc(VARHDRSZ + m);
813         VARATT_SIZEP(ret) = VARHDRSZ + m;
814         memcpy(VARDATA(ret), ptr, m);
815
816         PG_RETURN_BYTEA_P(ret);
817 }
818
819 /********************************************************************
820  *
821  * ltrim
822  *
823  * Syntax:
824  *
825  *       text ltrim(text string, text set)
826  *
827  * Purpose:
828  *
829  *       Returns string with initial characters removed up to the first
830  *       character not in set.
831  *
832  ********************************************************************/
833
834 Datum
835 ltrim(PG_FUNCTION_ARGS)
836 {
837         text       *string = PG_GETARG_TEXT_P(0);
838         text       *set = PG_GETARG_TEXT_P(1);
839         text       *ret;
840
841         ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
842                                  VARDATA(set), VARSIZE(set) - VARHDRSZ,
843                                  true, false);
844
845         PG_RETURN_TEXT_P(ret);
846 }
847
848 /********************************************************************
849  *
850  * ltrim1 --- ltrim with set fixed as ' '
851  *
852  ********************************************************************/
853
854 Datum
855 ltrim1(PG_FUNCTION_ARGS)
856 {
857         text       *string = PG_GETARG_TEXT_P(0);
858         text       *ret;
859
860         ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
861                                  " ", 1,
862                                  true, false);
863
864         PG_RETURN_TEXT_P(ret);
865 }
866
867 /********************************************************************
868  *
869  * rtrim
870  *
871  * Syntax:
872  *
873  *       text rtrim(text string, text set)
874  *
875  * Purpose:
876  *
877  *       Returns string with final characters removed after the last
878  *       character not in set.
879  *
880  ********************************************************************/
881
882 Datum
883 rtrim(PG_FUNCTION_ARGS)
884 {
885         text       *string = PG_GETARG_TEXT_P(0);
886         text       *set = PG_GETARG_TEXT_P(1);
887         text       *ret;
888
889         ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
890                                  VARDATA(set), VARSIZE(set) - VARHDRSZ,
891                                  false, true);
892
893         PG_RETURN_TEXT_P(ret);
894 }
895
896 /********************************************************************
897  *
898  * rtrim1 --- rtrim with set fixed as ' '
899  *
900  ********************************************************************/
901
902 Datum
903 rtrim1(PG_FUNCTION_ARGS)
904 {
905         text       *string = PG_GETARG_TEXT_P(0);
906         text       *ret;
907
908         ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
909                                  " ", 1,
910                                  false, true);
911
912         PG_RETURN_TEXT_P(ret);
913 }
914
915
916 /********************************************************************
917  *
918  * translate
919  *
920  * Syntax:
921  *
922  *       text translate(text string, text from, text to)
923  *
924  * Purpose:
925  *
926  *       Returns string after replacing all occurrences of characters in from
927  *       with the corresponding character in to.  If from is longer than to,
928  *       occurrences of the extra characters in from are deleted.
929  *       Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
930  *
931  ********************************************************************/
932
933 Datum
934 translate(PG_FUNCTION_ARGS)
935 {
936         text       *string = PG_GETARG_TEXT_P(0);
937         text       *from = PG_GETARG_TEXT_P(1);
938         text       *to = PG_GETARG_TEXT_P(2);
939         text       *result;
940         char       *from_ptr,
941                            *to_ptr;
942         char       *source,
943                            *target;
944         int                     m,
945                                 fromlen,
946                                 tolen,
947                                 retlen,
948                                 i;
949
950         int                     str_len;
951         int                     estimate_len;
952         int                     len;
953         int                     source_len;
954         int                     from_index;
955
956         if ((m = VARSIZE(string) - VARHDRSZ) <= 0)
957                 PG_RETURN_TEXT_P(string);
958
959         fromlen = VARSIZE(from) - VARHDRSZ;
960         from_ptr = VARDATA(from);
961         tolen = VARSIZE(to) - VARHDRSZ;
962         to_ptr = VARDATA(to);
963
964         str_len = VARSIZE(string);
965         estimate_len = (tolen * 1.0 / fromlen + 0.5) * str_len;
966         estimate_len = estimate_len > str_len ? estimate_len : str_len;
967         result = (text *) palloc(estimate_len);
968
969         source = VARDATA(string);
970         target = VARDATA(result);
971         retlen = 0;
972
973         while (m > 0)
974         {
975                 source_len = pg_mblen(source);
976                 from_index = 0;
977
978                 for (i = 0; i < fromlen; i += len)
979                 {
980                         len = pg_mblen(&from_ptr[i]);
981                         if (len == source_len &&
982                                 memcmp(source, &from_ptr[i], len) == 0)
983                                 break;
984
985                         from_index++;
986                 }
987                 if (i < fromlen)
988                 {
989                         /* substitute */
990                         char       *p = to_ptr;
991
992                         for (i = 0; i < from_index; i++)
993                         {
994                                 p += pg_mblen(p);
995                                 if (p >= (to_ptr + tolen))
996                                         break;
997                         }
998                         if (p < (to_ptr + tolen))
999                         {
1000                                 len = pg_mblen(p);
1001                                 memcpy(target, p, len);
1002                                 target += len;
1003                                 retlen += len;
1004                         }
1005
1006                 }
1007                 else
1008                 {
1009                         /* no match, so copy */
1010                         memcpy(target, source, source_len);
1011                         target += source_len;
1012                         retlen += source_len;
1013                 }
1014
1015                 source += source_len;
1016                 m -= source_len;
1017         }
1018
1019         VARATT_SIZEP(result) = retlen + VARHDRSZ;
1020
1021         /*
1022          * There may be some wasted space in the result if deletions occurred,
1023          * but it's not worth reallocating it; the function result probably
1024          * won't live long anyway.
1025          */
1026
1027         PG_RETURN_TEXT_P(result);
1028 }
1029
1030 /********************************************************************
1031  *
1032  * ascii
1033  *
1034  * Syntax:
1035  *
1036  *       int ascii(text string)
1037  *
1038  * Purpose:
1039  *
1040  *       Returns the decimal representation of the first character from
1041  *       string.
1042  *
1043  ********************************************************************/
1044
1045 Datum
1046 ascii(PG_FUNCTION_ARGS)
1047 {
1048         text       *string = PG_GETARG_TEXT_P(0);
1049
1050         if (VARSIZE(string) <= VARHDRSZ)
1051                 PG_RETURN_INT32(0);
1052
1053         PG_RETURN_INT32((int32) *((unsigned char *) VARDATA(string)));
1054 }
1055
1056 /********************************************************************
1057  *
1058  * chr
1059  *
1060  * Syntax:
1061  *
1062  *       text chr(int val)
1063  *
1064  * Purpose:
1065  *
1066  *      Returns the character having the binary equivalent to val
1067  *
1068  ********************************************************************/
1069
1070 Datum
1071 chr(PG_FUNCTION_ARGS)
1072 {
1073         int32           cvalue = PG_GETARG_INT32(0);
1074         text       *result;
1075
1076         result = (text *) palloc(VARHDRSZ + 1);
1077         VARATT_SIZEP(result) = VARHDRSZ + 1;
1078         *VARDATA(result) = (char) cvalue;
1079
1080         PG_RETURN_TEXT_P(result);
1081 }
1082
1083 /********************************************************************
1084  *
1085  * repeat
1086  *
1087  * Syntax:
1088  *
1089  *       text repeat(text string, int val)
1090  *
1091  * Purpose:
1092  *
1093  *      Repeat string by val.
1094  *
1095  ********************************************************************/
1096
1097 Datum
1098 repeat(PG_FUNCTION_ARGS)
1099 {
1100         text       *string = PG_GETARG_TEXT_P(0);
1101         int32           count = PG_GETARG_INT32(1);
1102         text       *result;
1103         int                     slen,
1104                                 tlen;
1105         int                     i;
1106         char       *cp;
1107
1108         if (count < 0)
1109                 count = 0;
1110
1111         slen = (VARSIZE(string) - VARHDRSZ);
1112         tlen = (VARHDRSZ + (count * slen));
1113
1114         /* Check for integer overflow */
1115         if (slen != 0 && count != 0)
1116         {
1117                 int                     check = count * slen;
1118                 int                     check2 = check + VARHDRSZ;
1119
1120                 if ((check / slen) != count || check2 <= check)
1121                         ereport(ERROR,
1122                                         (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1123                                          errmsg("requested length too large")));
1124         }
1125
1126         result = (text *) palloc(tlen);
1127
1128         VARATT_SIZEP(result) = tlen;
1129         cp = VARDATA(result);
1130         for (i = 0; i < count; i++)
1131         {
1132                 memcpy(cp, VARDATA(string), slen);
1133                 cp += slen;
1134         }
1135
1136         PG_RETURN_TEXT_P(result);
1137 }