1 /*-------------------------------------------------------------------------
3 * Oracle compatible functions.
5 * Copyright (c) 1996-2004, PostgreSQL Global Development Group
7 * Author: Edmund Mergl <E.Mergl@bawue.de>
8 * Multibyte enhancement: Tatsuo Ishii <ishii@postgresql.org>
12 * $PostgreSQL: pgsql/src/backend/utils/adt/oracle_compat.c,v 1.54 2004/08/29 04:12:52 momjian Exp $
14 *-------------------------------------------------------------------------
21 * towlower() and friends should be in <wctype.h>, but some pre-C99 systems
22 * declare them in <wchar.h>.
31 #include "utils/builtins.h"
32 #include "mb/pg_wchar.h"
36 * If the system provides the needed functions for wide-character manipulation
37 * (which are all standardized by C99), then we implement upper/lower/initcap
38 * using wide-character functions. Otherwise we use the traditional <ctype.h>
39 * functions, which of course will not work as desired in multibyte character
40 * sets. Note that in either case we are effectively assuming that the
41 * database character encoding matches the encoding implied by LC_CTYPE.
43 * We assume if we have these two functions, we have their friends too, and
44 * can use the wide-character method.
46 #if defined(HAVE_WCSTOMBS) && defined(HAVE_TOWLOWER)
47 #define USE_WIDE_UPPER_LOWER
50 static text *dotrim(const char *string, int stringlen,
51 const char *set, int setlen,
52 bool doltrim, bool dortrim);
55 #ifdef USE_WIDE_UPPER_LOWER
58 * Convert a TEXT value into a palloc'd wchar string.
61 texttowcs(const text *txt)
63 int nbytes = VARSIZE(txt) - VARHDRSZ;
68 /* Overflow paranoia */
70 nbytes > (int) (INT_MAX / sizeof(wchar_t)) - 1)
72 (errcode(ERRCODE_OUT_OF_MEMORY),
73 errmsg("out of memory")));
75 /* Need a null-terminated version of the input */
76 workstr = (char *) palloc(nbytes + 1);
77 memcpy(workstr, VARDATA(txt), nbytes);
78 workstr[nbytes] = '\0';
80 /* Output workspace cannot have more codes than input bytes */
81 result = (wchar_t *) palloc((nbytes + 1) * sizeof(wchar_t));
83 /* Do the conversion */
84 ncodes = mbstowcs(result, workstr, nbytes + 1);
86 if (ncodes == (size_t) -1)
89 * Invalid multibyte character encountered. We try to give a useful
90 * error message by letting pg_verifymbstr check the string. But
91 * it's possible that the string is OK to us, and not OK to mbstowcs
92 * --- this suggests that the LC_CTYPE locale is different from the
93 * database encoding. Give a generic error message if verifymbstr
94 * can't find anything wrong.
96 pg_verifymbstr(workstr, nbytes, false);
98 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
99 errmsg("invalid multibyte character for locale")));
102 Assert(ncodes <= (size_t) nbytes);
109 * Convert a wchar string into a palloc'd TEXT value. The wchar string
110 * must be zero-terminated, but we also require the caller to pass the string
111 * length, since it will know it anyway in current uses.
114 wcstotext(const wchar_t *str, int ncodes)
119 /* Overflow paranoia */
121 ncodes > (int) ((INT_MAX - VARHDRSZ) / MB_CUR_MAX) - 1)
123 (errcode(ERRCODE_OUT_OF_MEMORY),
124 errmsg("out of memory")));
126 /* Make workspace certainly large enough for result */
127 result = (text *) palloc((ncodes + 1) * MB_CUR_MAX + VARHDRSZ);
129 /* Do the conversion */
130 nbytes = wcstombs((char *) VARDATA(result), str,
131 (ncodes + 1) * MB_CUR_MAX);
133 if (nbytes == (size_t) -1)
135 /* Invalid multibyte character encountered ... shouldn't happen */
137 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
138 errmsg("invalid multibyte character for locale")));
141 Assert(nbytes <= (size_t) (ncodes * MB_CUR_MAX));
143 VARATT_SIZEP(result) = nbytes + VARHDRSZ;
148 #endif /* USE_WIDE_UPPER_LOWER */
151 /********************************************************************
157 * text lower(text string)
161 * Returns string, with all letters forced to lowercase.
163 ********************************************************************/
166 lower(PG_FUNCTION_ARGS)
168 #ifdef USE_WIDE_UPPER_LOWER
169 /* use wide char code only when max encoding length > one */
170 if (pg_database_encoding_max_length() > 1)
172 text *string = PG_GETARG_TEXT_P(0);
177 workspace = texttowcs(string);
179 for (i = 0; workspace[i] != 0; i++)
180 workspace[i] = towlower(workspace[i]);
182 result = wcstotext(workspace, i);
186 PG_RETURN_TEXT_P(result);
189 #endif /* USE_WIDE_UPPER_LOWER */
191 text *string = PG_GETARG_TEXT_P_COPY(0);
195 /* Since we copied the string, we can scribble directly on the value */
196 ptr = VARDATA(string);
197 m = VARSIZE(string) - VARHDRSZ;
201 *ptr = tolower((unsigned char) *ptr);
205 PG_RETURN_TEXT_P(string);
210 /********************************************************************
216 * text upper(text string)
220 * Returns string, with all letters forced to uppercase.
222 ********************************************************************/
225 upper(PG_FUNCTION_ARGS)
227 #ifdef USE_WIDE_UPPER_LOWER
228 /* use wide char code only when max encoding length > one */
229 if (pg_database_encoding_max_length() > 1)
231 text *string = PG_GETARG_TEXT_P(0);
236 workspace = texttowcs(string);
238 for (i = 0; workspace[i] != 0; i++)
239 workspace[i] = towupper(workspace[i]);
241 result = wcstotext(workspace, i);
245 PG_RETURN_TEXT_P(result);
248 #endif /* USE_WIDE_UPPER_LOWER */
250 text *string = PG_GETARG_TEXT_P_COPY(0);
254 /* Since we copied the string, we can scribble directly on the value */
255 ptr = VARDATA(string);
256 m = VARSIZE(string) - VARHDRSZ;
260 *ptr = toupper((unsigned char) *ptr);
264 PG_RETURN_TEXT_P(string);
269 /********************************************************************
275 * text initcap(text string)
279 * Returns string, with first letter of each word in uppercase, all
280 * other letters in lowercase. A word is defined as a sequence of
281 * alphanumeric characters, delimited by non-alphanumeric
284 ********************************************************************/
287 initcap(PG_FUNCTION_ARGS)
289 #ifdef USE_WIDE_UPPER_LOWER
290 /* use wide char code only when max encoding length > one */
291 if (pg_database_encoding_max_length() > 1)
293 text *string = PG_GETARG_TEXT_P(0);
299 workspace = texttowcs(string);
301 for (i = 0; workspace[i] != 0; i++)
304 workspace[i] = towlower(workspace[i]);
306 workspace[i] = towupper(workspace[i]);
307 wasalnum = iswalnum(workspace[i]);
310 result = wcstotext(workspace, i);
314 PG_RETURN_TEXT_P(result);
317 #endif /* USE_WIDE_UPPER_LOWER */
319 text *string = PG_GETARG_TEXT_P_COPY(0);
324 /* Since we copied the string, we can scribble directly on the value */
325 ptr = VARDATA(string);
326 m = VARSIZE(string) - VARHDRSZ;
331 *ptr = tolower((unsigned char) *ptr);
333 *ptr = toupper((unsigned char) *ptr);
334 wasalnum = isalnum((unsigned char) *ptr);
338 PG_RETURN_TEXT_P(string);
343 /********************************************************************
349 * text lpad(text string1, int4 len, text string2)
353 * Returns string1, left-padded to length len with the sequence of
354 * characters in string2. If len is less than the length of string1,
355 * instead truncate (on the right) to len.
357 ********************************************************************/
360 lpad(PG_FUNCTION_ARGS)
362 text *string1 = PG_GETARG_TEXT_P(0);
363 int32 len = PG_GETARG_INT32(1);
364 text *string2 = PG_GETARG_TEXT_P(2);
376 /* Negative len is silently taken as zero */
380 s1len = VARSIZE(string1) - VARHDRSZ;
382 s1len = 0; /* shouldn't happen */
384 s2len = VARSIZE(string2) - VARHDRSZ;
386 s2len = 0; /* shouldn't happen */
388 s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
391 s1len = len; /* truncate string1 to len chars */
394 len = s1len; /* nothing to pad with, so don't pad */
396 bytelen = pg_database_encoding_max_length() * len;
398 /* check for integer overflow */
399 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
401 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
402 errmsg("requested length too large")));
404 ret = (text *) palloc(VARHDRSZ + bytelen);
408 ptr2 = VARDATA(string2);
409 ptr2end = ptr2 + s2len;
410 ptr_ret = VARDATA(ret);
414 int mlen = pg_mblen(ptr2);
416 memcpy(ptr_ret, ptr2, mlen);
419 if (ptr2 == ptr2end) /* wrap around at end of s2 */
420 ptr2 = VARDATA(string2);
423 ptr1 = VARDATA(string1);
427 int mlen = pg_mblen(ptr1);
429 memcpy(ptr_ret, ptr1, mlen);
434 VARATT_SIZEP(ret) = ptr_ret - (char *) ret;
436 PG_RETURN_TEXT_P(ret);
440 /********************************************************************
446 * text rpad(text string1, int4 len, text string2)
450 * Returns string1, right-padded to length len with the sequence of
451 * characters in string2. If len is less than the length of string1,
452 * instead truncate (on the right) to len.
454 ********************************************************************/
457 rpad(PG_FUNCTION_ARGS)
459 text *string1 = PG_GETARG_TEXT_P(0);
460 int32 len = PG_GETARG_INT32(1);
461 text *string2 = PG_GETARG_TEXT_P(2);
473 /* Negative len is silently taken as zero */
477 s1len = VARSIZE(string1) - VARHDRSZ;
479 s1len = 0; /* shouldn't happen */
481 s2len = VARSIZE(string2) - VARHDRSZ;
483 s2len = 0; /* shouldn't happen */
485 s1len = pg_mbstrlen_with_len(VARDATA(string1), s1len);
488 s1len = len; /* truncate string1 to len chars */
491 len = s1len; /* nothing to pad with, so don't pad */
493 bytelen = pg_database_encoding_max_length() * len;
495 /* Check for integer overflow */
496 if (len != 0 && bytelen / pg_database_encoding_max_length() != len)
498 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
499 errmsg("requested length too large")));
501 ret = (text *) palloc(VARHDRSZ + bytelen);
504 ptr1 = VARDATA(string1);
505 ptr_ret = VARDATA(ret);
509 int mlen = pg_mblen(ptr1);
511 memcpy(ptr_ret, ptr1, mlen);
516 ptr2 = VARDATA(string2);
517 ptr2end = ptr2 + s2len;
521 int mlen = pg_mblen(ptr2);
523 memcpy(ptr_ret, ptr2, mlen);
526 if (ptr2 == ptr2end) /* wrap around at end of s2 */
527 ptr2 = VARDATA(string2);
530 VARATT_SIZEP(ret) = ptr_ret - (char *) ret;
532 PG_RETURN_TEXT_P(ret);
536 /********************************************************************
542 * text btrim(text string, text set)
546 * Returns string with characters removed from the front and back
547 * up to the first character not in set.
549 ********************************************************************/
552 btrim(PG_FUNCTION_ARGS)
554 text *string = PG_GETARG_TEXT_P(0);
555 text *set = PG_GETARG_TEXT_P(1);
558 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
559 VARDATA(set), VARSIZE(set) - VARHDRSZ,
562 PG_RETURN_TEXT_P(ret);
565 /********************************************************************
567 * btrim1 --- btrim with set fixed as ' '
569 ********************************************************************/
572 btrim1(PG_FUNCTION_ARGS)
574 text *string = PG_GETARG_TEXT_P(0);
577 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
581 PG_RETURN_TEXT_P(ret);
585 * Common implementation for btrim, ltrim, rtrim
588 dotrim(const char *string, int stringlen,
589 const char *set, int setlen,
590 bool doltrim, bool dortrim)
595 /* Nothing to do if either string or set is empty */
596 if (stringlen > 0 && setlen > 0)
598 if (pg_database_encoding_max_length() > 1)
601 * In the multibyte-encoding case, build arrays of pointers to
602 * character starts, so that we can avoid inefficient checks
603 * in the inner loops.
605 const char **stringchars;
606 const char **setchars;
619 stringchars = (const char **) palloc(stringlen * sizeof(char *));
620 stringmblen = (int *) palloc(stringlen * sizeof(int));
626 stringchars[stringnchars] = p;
627 stringmblen[stringnchars] = mblen = pg_mblen(p);
633 setchars = (const char **) palloc(setlen * sizeof(char *));
634 setmblen = (int *) palloc(setlen * sizeof(int));
640 setchars[setnchars] = p;
641 setmblen[setnchars] = mblen = pg_mblen(p);
647 resultndx = 0; /* index in stringchars[] */
648 resultnchars = stringnchars;
652 while (resultnchars > 0)
654 str_pos = stringchars[resultndx];
655 str_len = stringmblen[resultndx];
656 for (i = 0; i < setnchars; i++)
658 if (str_len == setmblen[i] &&
659 memcmp(str_pos, setchars[i], str_len) == 0)
663 break; /* no match here */
665 stringlen -= str_len;
673 while (resultnchars > 0)
675 str_pos = stringchars[resultndx + resultnchars - 1];
676 str_len = stringmblen[resultndx + resultnchars - 1];
677 for (i = 0; i < setnchars; i++)
679 if (str_len == setmblen[i] &&
680 memcmp(str_pos, setchars[i], str_len) == 0)
684 break; /* no match here */
685 stringlen -= str_len;
698 * In the single-byte-encoding case, we don't need such
703 while (stringlen > 0)
705 char str_ch = *string;
707 for (i = 0; i < setlen; i++)
709 if (str_ch == set[i])
713 break; /* no match here */
721 while (stringlen > 0)
723 char str_ch = string[stringlen - 1];
725 for (i = 0; i < setlen; i++)
727 if (str_ch == set[i])
731 break; /* no match here */
738 /* Return selected portion of string */
739 result = (text *) palloc(VARHDRSZ + stringlen);
740 VARATT_SIZEP(result) = VARHDRSZ + stringlen;
741 memcpy(VARDATA(result), string, stringlen);
746 /********************************************************************
752 * bytea byteatrim(byta string, bytea set)
756 * Returns string with characters removed from the front and back
757 * up to the first character not in set.
759 * Cloned from btrim and modified as required.
760 ********************************************************************/
763 byteatrim(PG_FUNCTION_ARGS)
765 bytea *string = PG_GETARG_BYTEA_P(0);
766 bytea *set = PG_GETARG_BYTEA_P(1);
774 if ((m = VARSIZE(string) - VARHDRSZ) <= 0 ||
775 (VARSIZE(set) - VARHDRSZ) <= 0)
776 PG_RETURN_BYTEA_P(string);
778 ptr = VARDATA(string);
779 end = VARDATA(string) + VARSIZE(string) - VARHDRSZ - 1;
780 end2 = VARDATA(set) + VARSIZE(set) - VARHDRSZ - 1;
812 ret = (bytea *) palloc(VARHDRSZ + m);
813 VARATT_SIZEP(ret) = VARHDRSZ + m;
814 memcpy(VARDATA(ret), ptr, m);
816 PG_RETURN_BYTEA_P(ret);
819 /********************************************************************
825 * text ltrim(text string, text set)
829 * Returns string with initial characters removed up to the first
830 * character not in set.
832 ********************************************************************/
835 ltrim(PG_FUNCTION_ARGS)
837 text *string = PG_GETARG_TEXT_P(0);
838 text *set = PG_GETARG_TEXT_P(1);
841 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
842 VARDATA(set), VARSIZE(set) - VARHDRSZ,
845 PG_RETURN_TEXT_P(ret);
848 /********************************************************************
850 * ltrim1 --- ltrim with set fixed as ' '
852 ********************************************************************/
855 ltrim1(PG_FUNCTION_ARGS)
857 text *string = PG_GETARG_TEXT_P(0);
860 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
864 PG_RETURN_TEXT_P(ret);
867 /********************************************************************
873 * text rtrim(text string, text set)
877 * Returns string with final characters removed after the last
878 * character not in set.
880 ********************************************************************/
883 rtrim(PG_FUNCTION_ARGS)
885 text *string = PG_GETARG_TEXT_P(0);
886 text *set = PG_GETARG_TEXT_P(1);
889 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
890 VARDATA(set), VARSIZE(set) - VARHDRSZ,
893 PG_RETURN_TEXT_P(ret);
896 /********************************************************************
898 * rtrim1 --- rtrim with set fixed as ' '
900 ********************************************************************/
903 rtrim1(PG_FUNCTION_ARGS)
905 text *string = PG_GETARG_TEXT_P(0);
908 ret = dotrim(VARDATA(string), VARSIZE(string) - VARHDRSZ,
912 PG_RETURN_TEXT_P(ret);
916 /********************************************************************
922 * text translate(text string, text from, text to)
926 * Returns string after replacing all occurrences of characters in from
927 * with the corresponding character in to. If from is longer than to,
928 * occurrences of the extra characters in from are deleted.
929 * Improved by Edwin Ramirez <ramirez@doc.mssm.edu>.
931 ********************************************************************/
934 translate(PG_FUNCTION_ARGS)
936 text *string = PG_GETARG_TEXT_P(0);
937 text *from = PG_GETARG_TEXT_P(1);
938 text *to = PG_GETARG_TEXT_P(2);
956 if ((m = VARSIZE(string) - VARHDRSZ) <= 0)
957 PG_RETURN_TEXT_P(string);
959 fromlen = VARSIZE(from) - VARHDRSZ;
960 from_ptr = VARDATA(from);
961 tolen = VARSIZE(to) - VARHDRSZ;
962 to_ptr = VARDATA(to);
964 str_len = VARSIZE(string);
965 estimate_len = (tolen * 1.0 / fromlen + 0.5) * str_len;
966 estimate_len = estimate_len > str_len ? estimate_len : str_len;
967 result = (text *) palloc(estimate_len);
969 source = VARDATA(string);
970 target = VARDATA(result);
975 source_len = pg_mblen(source);
978 for (i = 0; i < fromlen; i += len)
980 len = pg_mblen(&from_ptr[i]);
981 if (len == source_len &&
982 memcmp(source, &from_ptr[i], len) == 0)
992 for (i = 0; i < from_index; i++)
995 if (p >= (to_ptr + tolen))
998 if (p < (to_ptr + tolen))
1001 memcpy(target, p, len);
1009 /* no match, so copy */
1010 memcpy(target, source, source_len);
1011 target += source_len;
1012 retlen += source_len;
1015 source += source_len;
1019 VARATT_SIZEP(result) = retlen + VARHDRSZ;
1022 * There may be some wasted space in the result if deletions occurred,
1023 * but it's not worth reallocating it; the function result probably
1024 * won't live long anyway.
1027 PG_RETURN_TEXT_P(result);
1030 /********************************************************************
1036 * int ascii(text string)
1040 * Returns the decimal representation of the first character from
1043 ********************************************************************/
1046 ascii(PG_FUNCTION_ARGS)
1048 text *string = PG_GETARG_TEXT_P(0);
1050 if (VARSIZE(string) <= VARHDRSZ)
1053 PG_RETURN_INT32((int32) *((unsigned char *) VARDATA(string)));
1056 /********************************************************************
1066 * Returns the character having the binary equivalent to val
1068 ********************************************************************/
1071 chr(PG_FUNCTION_ARGS)
1073 int32 cvalue = PG_GETARG_INT32(0);
1076 result = (text *) palloc(VARHDRSZ + 1);
1077 VARATT_SIZEP(result) = VARHDRSZ + 1;
1078 *VARDATA(result) = (char) cvalue;
1080 PG_RETURN_TEXT_P(result);
1083 /********************************************************************
1089 * text repeat(text string, int val)
1093 * Repeat string by val.
1095 ********************************************************************/
1098 repeat(PG_FUNCTION_ARGS)
1100 text *string = PG_GETARG_TEXT_P(0);
1101 int32 count = PG_GETARG_INT32(1);
1111 slen = (VARSIZE(string) - VARHDRSZ);
1112 tlen = (VARHDRSZ + (count * slen));
1114 /* Check for integer overflow */
1115 if (slen != 0 && count != 0)
1117 int check = count * slen;
1118 int check2 = check + VARHDRSZ;
1120 if ((check / slen) != count || check2 <= check)
1122 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
1123 errmsg("requested length too large")));
1126 result = (text *) palloc(tlen);
1128 VARATT_SIZEP(result) = tlen;
1129 cp = VARDATA(result);
1130 for (i = 0; i < count; i++)
1132 memcpy(cp, VARDATA(string), slen);
1136 PG_RETURN_TEXT_P(result);