OSDN Git Service

Add PQmbdsplen() which returns the "display length" of a character.
authorTatsuo Ishii <ishii@postgresql.org>
Mon, 15 Mar 2004 10:41:26 +0000 (10:41 +0000)
committerTatsuo Ishii <ishii@postgresql.org>
Mon, 15 Mar 2004 10:41:26 +0000 (10:41 +0000)
Still some works needed:
- UTF-8, MULE_INTERNAL always returns 1

src/backend/utils/mb/mbutils.c
src/backend/utils/mb/wchar.c
src/bin/psql/common.c
src/include/mb/pg_wchar.h
src/interfaces/libpq/fe-misc.c
src/interfaces/libpq/libpq-fe.h

index 3838a0a..96dd563 100644 (file)
@@ -4,7 +4,7 @@
  * (currently mule internal code (mic) is used)
  * Tatsuo Ishii
  *
- * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.45 2003/11/29 19:52:02 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/mbutils.c,v 1.46 2004/03/15 10:41:25 ishii Exp $
  */
 #include "postgres.h"
 
@@ -463,6 +463,13 @@ pg_mblen(const unsigned char *mbstr)
        return ((*pg_wchar_table[DatabaseEncoding->encoding].mblen) (mbstr));
 }
 
+/* returns the display length of a multibyte word */
+int
+pg_dsplen(const unsigned char *mbstr)
+{
+       return ((*pg_wchar_table[DatabaseEncoding->encoding].dsplen) (mbstr));
+}
+
 /* returns the length (counted as a wchar) of a multibyte string */
 int
 pg_mbstrlen(const unsigned char *mbstr)
index b2d48c9..f08cffa 100644 (file)
@@ -1,7 +1,7 @@
 /*
  * conversion functions between pg_wchar and multibyte streams.
  * Tatsuo Ishii
- * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.35 2003/11/29 22:39:59 pgsql Exp $
+ * $PostgreSQL: pgsql/src/backend/utils/mb/wchar.c,v 1.36 2004/03/15 10:41:25 ishii Exp $
  *
  * WIN1250 client encoding updated by Pavel Behal
  *
@@ -49,6 +49,12 @@ pg_ascii_mblen(const unsigned char *s)
        return (1);
 }
 
+static int
+pg_ascii_dsplen(const unsigned char *s)
+{
+       return (1);
+}
+
 /*
  * EUC
  */
@@ -107,6 +113,22 @@ pg_euc_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_euc_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s == SS2)
+               len = 2;
+       else if (*s == SS3)
+               len = 2;
+       else if (*s & 0x80)
+               len = 2;
+       else
+               len = 1;
+       return (len);
+}
+
 /*
  * EUC_JP
  */
@@ -122,6 +144,22 @@ pg_eucjp_mblen(const unsigned char *s)
        return (pg_euc_mblen(s));
 }
 
+static int
+pg_eucjp_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s == SS2)
+               len = 1;
+       else if (*s == SS3)
+               len = 2;
+       else if (*s & 0x80)
+               len = 2;
+       else
+               len = 1;
+       return (len);
+}
+
 /*
  * EUC_KR
  */
@@ -137,6 +175,12 @@ pg_euckr_mblen(const unsigned char *s)
        return (pg_euc_mblen(s));
 }
 
+static int
+pg_euckr_dsplen(const unsigned char *s)
+{
+       return (pg_euc_dsplen(s));
+}
+
 /*
  * EUC_CN
  */
@@ -191,6 +235,18 @@ pg_euccn_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_euccn_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s & 0x80)
+               len = 2;
+       else
+               len = 1;
+       return (len);
+}
+
 /*
  * EUC_TW
  */
@@ -250,6 +306,22 @@ pg_euctw_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_euctw_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s == SS2)
+               len = 2;
+       else if (*s == SS3)
+               len = 2;
+       else if (*s & 0x80)
+               len = 2;
+       else
+               len = 1;
+       return (len);
+}
+
 /*
  * JOHAB
  */
@@ -265,6 +337,12 @@ pg_johab_mblen(const unsigned char *s)
        return (pg_euc_mblen(s));
 }
 
+static int
+pg_johab_dsplen(const unsigned char *s)
+{
+       return (pg_euc_dsplen(s));
+}
+
 /*
  * convert UTF-8 string to pg_wchar (UCS-2)
  * caller should allocate enough space for "to"
@@ -333,6 +411,12 @@ pg_utf_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_utf_dsplen(const unsigned char *s)
+{
+  return 1;    /* XXX fix me! */
+}
+
 /*
  * convert mule internal code to pg_wchar
  * caller should allocate enough space for "to"
@@ -406,6 +490,12 @@ pg_mule_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_mule_dsplen(const unsigned char *s)
+{
+  return 1;    /* XXX fix me! */
+}
+
 /*
  * ISO8859-1
  */
@@ -430,6 +520,12 @@ pg_latin1_mblen(const unsigned char *s)
        return (1);
 }
 
+static int
+pg_latin1_dsplen(const unsigned char *s)
+{
+       return (1);
+}
+
 /*
  * SJIS
  */
@@ -453,6 +549,26 @@ pg_sjis_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_sjis_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s >= 0xa1 && *s <= 0xdf)
+       {                                                       /* 1 byte kana? */
+               len = 1;
+       }
+       else if (*s > 0x7f)
+       {                                                       /* kanji? */
+               len = 2;
+       }
+       else
+       {                                                       /* should be ASCII */
+               len = 1;
+       }
+       return (len);
+}
+
 /*
  * Big5
  */
@@ -472,6 +588,22 @@ pg_big5_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_big5_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s > 0x7f)
+       {                                                       /* kanji? */
+               len = 2;
+       }
+       else
+       {                                                       /* should be ASCII */
+               len = 1;
+       }
+       return (len);
+}
+
 /*
  * GBK
  */
@@ -491,6 +623,22 @@ pg_gbk_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_gbk_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s > 0x7f)
+       {                                                       /* kanji? */
+               len = 2;
+       }
+       else
+       {                                                       /* should be ASCII */
+               len = 1;
+       }
+       return (len);
+}
+
 /*
  * UHC
  */
@@ -510,6 +658,22 @@ pg_uhc_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_uhc_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s > 0x7f)
+       {                                                       /* 2byte? */
+               len = 2;
+       }
+       else
+       {                                                       /* should be ASCII */
+               len = 1;
+       }
+       return (len);
+}
+
 /*
  *     * GB18030
  *      * Added by Bill Huang <bhuang@redhat.com>,<bill_huanghb@ybb.ne.jp>
@@ -535,42 +699,58 @@ pg_gb18030_mblen(const unsigned char *s)
        return (len);
 }
 
+static int
+pg_gb18030_dsplen(const unsigned char *s)
+{
+       int                     len;
+
+       if (*s <= 0x7f)
+       {                                                       /* ASCII */
+               len = 1;
+       }
+       else
+       {
+               len = 2;
+       }
+       return (len);
+}
+
 
 pg_wchar_tbl pg_wchar_table[] = {
-       {pg_ascii2wchar_with_len, pg_ascii_mblen, 1},           /* 0; PG_SQL_ASCII      */
-       {pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3},           /* 1; PG_EUC_JP */
-       {pg_euccn2wchar_with_len, pg_euccn_mblen, 3},           /* 2; PG_EUC_CN */
-       {pg_euckr2wchar_with_len, pg_euckr_mblen, 3},           /* 3; PG_EUC_KR */
-       {pg_euctw2wchar_with_len, pg_euctw_mblen, 3},           /* 4; PG_EUC_TW */
-       {pg_johab2wchar_with_len, pg_johab_mblen, 3},           /* 5; PG_JOHAB */
-       {pg_utf2wchar_with_len, pg_utf_mblen, 3},       /* 6; PG_UNICODE */
-       {pg_mule2wchar_with_len, pg_mule_mblen, 3}, /* 7; PG_MULE_INTERNAL */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 8; PG_LATIN1 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 9; PG_LATIN2 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 10; PG_LATIN3 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 11; PG_LATIN4 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 12; PG_LATIN5 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 13; PG_LATIN6 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 14; PG_LATIN7 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 15; PG_LATIN8 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 16; PG_LATIN9 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 17; PG_LATIN10 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 18; PG_WIN1256 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 19; PG_TCVN */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 20; PG_WIN874 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 21; PG_KOI8 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 22; PG_WIN1251 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 23; PG_ALT */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 24; ISO-8859-5 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 25; ISO-8859-6 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 26; ISO-8859-7 */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 27; ISO-8859-8 */
-       {0, pg_sjis_mblen, 2},          /* 28; PG_SJIS */
-       {0, pg_big5_mblen, 2},          /* 29; PG_BIG5 */
-       {0, pg_gbk_mblen, 2},           /* 30; PG_GBK */
-       {0, pg_uhc_mblen, 2},           /* 31; PG_UHC */
-       {pg_latin12wchar_with_len, pg_latin1_mblen, 1},         /* 32; PG_WIN1250 */
-       {0, pg_gb18030_mblen, 2}        /* 33; PG_GB18030 */
+       {pg_ascii2wchar_with_len, pg_ascii_mblen, pg_ascii_dsplen, 1},          /* 0; PG_SQL_ASCII      */
+       {pg_eucjp2wchar_with_len, pg_eucjp_mblen, pg_eucjp_dsplen, 3},          /* 1; PG_EUC_JP */
+       {pg_euccn2wchar_with_len, pg_euccn_mblen, pg_euccn_dsplen, 3},          /* 2; PG_EUC_CN */
+       {pg_euckr2wchar_with_len, pg_euckr_mblen, pg_euckr_dsplen, 3},          /* 3; PG_EUC_KR */
+       {pg_euctw2wchar_with_len, pg_euctw_mblen, pg_euctw_dsplen, 3},          /* 4; PG_EUC_TW */
+       {pg_johab2wchar_with_len, pg_johab_mblen, pg_johab_dsplen, 3},          /* 5; PG_JOHAB */
+       {pg_utf2wchar_with_len, pg_utf_mblen, pg_utf_dsplen, 3},        /* 6; PG_UNICODE */
+       {pg_mule2wchar_with_len, pg_mule_mblen, pg_mule_dsplen, 3}, /* 7; PG_MULE_INTERNAL */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 8; PG_LATIN1 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 9; PG_LATIN2 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 10; PG_LATIN3 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 11; PG_LATIN4 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 12; PG_LATIN5 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 13; PG_LATIN6 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 14; PG_LATIN7 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 15; PG_LATIN8 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 16; PG_LATIN9 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 17; PG_LATIN10 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 18; PG_WIN1256 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 19; PG_TCVN */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 20; PG_WIN874 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 21; PG_KOI8 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 22; PG_WIN1251 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 23; PG_ALT */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 24; ISO-8859-5 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 25; ISO-8859-6 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 26; ISO-8859-7 */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 27; ISO-8859-8 */
+       {0, pg_sjis_mblen, pg_sjis_dsplen, 2},          /* 28; PG_SJIS */
+       {0, pg_big5_mblen, pg_big5_dsplen,2},           /* 29; PG_BIG5 */
+       {0, pg_gbk_mblen, pg_gbk_dsplen, 2},            /* 30; PG_GBK */
+       {0, pg_uhc_mblen, pg_uhc_dsplen, 2},            /* 31; PG_UHC */
+       {pg_latin12wchar_with_len, pg_latin1_mblen, pg_latin1_dsplen, 1},               /* 32; PG_WIN1250 */
+       {0, pg_gb18030_mblen, pg_gb18030_dsplen, 2}     /* 33; PG_GB18030 */
 };
 
 /* returns the byte length of a word for mule internal code */
@@ -595,6 +775,20 @@ pg_encoding_mblen(int encoding, const unsigned char *mbstr)
 }
 
 /*
+ * Returns the display length of a multibyte word.
+ */
+int
+pg_encoding_dsplen(int encoding, const unsigned char *mbstr)
+{
+       Assert(PG_VALID_ENCODING(encoding));
+
+       return ((encoding >= 0 &&
+                        encoding < sizeof(pg_wchar_table) / sizeof(pg_wchar_tbl)) ?
+                       ((*pg_wchar_table[encoding].dsplen) (mbstr)) :
+                       ((*pg_wchar_table[PG_SQL_ASCII].dsplen) (mbstr)));
+}
+
+/*
  * fetch maximum length of a char encoding
  */
 int
@@ -688,6 +882,3 @@ pg_database_encoding_max_length(void)
 }
 
 #endif
-
-
-
index 511107b..2a6be54 100644 (file)
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2000-2003, PostgreSQL Global Development Group
  *
- * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.83 2004/03/14 04:25:17 tgl Exp $
+ * $PostgreSQL: pgsql/src/bin/psql/common.c,v 1.84 2004/03/15 10:41:26 ishii Exp $
  */
 #include "postgres_fe.h"
 #include "common.h"
@@ -410,7 +410,7 @@ ReportSyntaxErrorPosition(const PGresult *result, const char *query)
        {
                qidx[i] = qoffset;
                scridx[i] = scroffset;
-               scroffset += 1;         /* XXX fix me when we have screen width info */
+               scroffset += PQdsplen(&query[qoffset], pset.encoding);
                qoffset += PQmblen(&query[qoffset], pset.encoding);
        }
        qidx[i] = qoffset;
@@ -526,7 +526,7 @@ ReportSyntaxErrorPosition(const PGresult *result, const char *query)
                scroffset = 0;
                for (i = 0; i < msg.len; i += PQmblen(&msg.data[i], pset.encoding))
                {
-                       scroffset += 1;         /* XXX fix me when we have screen width info */
+                       scroffset += PQdsplen(&msg.data[i], pset.encoding);
                }
 
                /* Finish and emit the message. */
index 4273da9..c4fd6f5 100644 (file)
@@ -1,4 +1,4 @@
-/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.49 2003/11/29 22:41:04 pgsql Exp $ */
+/* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.50 2004/03/15 10:41:26 ishii Exp $ */
 
 #ifndef PG_WCHAR_H
 #define PG_WCHAR_H
@@ -248,11 +248,14 @@ typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
                                                                                                                int len);
 typedef int (*mblen_converter) (const unsigned char *mbstr);
 
+typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
+
 typedef struct
 {
        mb2wchar_with_len_converter mb2wchar_with_len;          /* convert a multibyte
                                                                                                                 * string to a wchar */
        mblen_converter mblen;          /* returns the length of a multibyte char */
+       mbdisplaylen_converter  dsplen; /* returns the lenghth of a display length */
        int                     maxmblen;               /* max bytes for a char in this charset */
 } pg_wchar_tbl;
 
@@ -283,7 +286,9 @@ extern int  pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
 extern int     pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
 extern size_t pg_wchar_strlen(const pg_wchar *wstr);
 extern int     pg_mblen(const unsigned char *mbstr);
+extern int     pg_dsplen(const unsigned char *mbstr);
 extern int     pg_encoding_mblen(int encoding, const unsigned char *mbstr);
+extern int     pg_encoding_dsplen(int encoding, const unsigned char *mbstr);
 extern int     pg_mule_mblen(const unsigned char *mbstr);
 extern int     pg_mic_mblen(const unsigned char *mbstr);
 extern int     pg_mbstrlen(const unsigned char *mbstr);
index 3717998..d484747 100644 (file)
@@ -23,7 +23,7 @@
  * Portions Copyright (c) 1994, Regents of the University of California
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.104 2003/11/29 19:52:12 pgsql Exp $
+ *       $PostgreSQL: pgsql/src/interfaces/libpq/fe-misc.c,v 1.105 2004/03/15 10:41:26 ishii Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -1096,6 +1096,16 @@ PQmblen(const unsigned char *s, int encoding)
 }
 
 /*
+ * returns the display length of the word beginning s, using the
+ * specified encoding.
+ */
+int
+PQdsplen(const unsigned char *s, int encoding)
+{
+       return (pg_encoding_dsplen(encoding, s));
+}
+
+/*
  * Get encoding id from environment variable PGCLIENTENCODING.
  */
 int
index f6ea1f4..293d50e 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2003, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/interfaces/libpq/libpq-fe.h,v 1.102 2004/01/09 02:02:43 momjian Exp $
+ * $PostgreSQL: pgsql/src/interfaces/libpq/libpq-fe.h,v 1.103 2004/03/15 10:41:26 ishii Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -447,6 +447,9 @@ extern int  lo_export(PGconn *conn, Oid lobjId, const char *filename);
 /* Determine length of multibyte encoded char at *s */
 extern int     PQmblen(const unsigned char *s, int encoding);
 
+/* Determine display length of multibyte encoded char at *s */
+extern int     PQdsplen(const unsigned char *s, int encoding);
+
 /* Get encoding id from environment variable PGCLIENTENCODING */
 extern int     PQenv2encoding(void);