OSDN Git Service

Fix/improve bytea and boolean support in PL/Python
authorPeter Eisentraut <peter_e@gmx.net>
Wed, 9 Sep 2009 19:00:09 +0000 (19:00 +0000)
committerPeter Eisentraut <peter_e@gmx.net>
Wed, 9 Sep 2009 19:00:09 +0000 (19:00 +0000)
Before, PL/Python converted data between SQL and Python by going
through a C string representation.  This broke for bytea in two ways:

- On input (function parameters), you would get a Python string that
  contains bytea's particular external representation with backslashes
  etc., instead of a sequence of bytes, which is what you would expect
  in a Python environment.  This problem is exacerbated by the new
  bytea output format.

- On output (function return value), null bytes in the Python string
  would cause truncation before the data gets stored into a bytea
  datum.

This is now fixed by converting directly between the PostgreSQL datum
and the Python representation.

The required generalized infrastructure also allows for other
improvements in passing:

- When returning a boolean value, the SQL datum is now true if and
  only if Python considers the value that was passed out of the
  PL/Python function to be true.  Previously, this determination was
  left to the boolean data type input function.  So, now returning
  'foo' results in true, because Python considers it true, rather than
  false because PostgreSQL considers it false.

- On input, we can convert the integer and float types directly to
  their Python equivalents without having to go through an
  intermediate string representation.

original patch by Caleb Welton, with updates by myself

src/backend/utils/adt/domains.c
src/include/utils/builtins.h
src/pl/plpython/expected/plpython_types.out
src/pl/plpython/plpython.c
src/pl/plpython/sql/plpython_types.sql

index 0aaa9b8..5b58ee8 100644 (file)
@@ -25,7 +25,7 @@
  *
  *
  * IDENTIFICATION
- *       $PostgreSQL: pgsql/src/backend/utils/adt/domains.c,v 1.8 2009/01/01 17:23:49 momjian Exp $
+ *       $PostgreSQL: pgsql/src/backend/utils/adt/domains.c,v 1.9 2009/09/09 19:00:09 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -302,3 +302,40 @@ domain_recv(PG_FUNCTION_ARGS)
        else
                PG_RETURN_DATUM(value);
 }
+
+/*
+ * domain_check - check that a datum satisfies the constraints of a
+ * domain.  extra and mcxt can be passed if they are available from,
+ * say, a FmgrInfo structure, or they can be NULL, in which case the
+ * setup is repeated for each call.
+ */
+void
+domain_check(Datum value, bool isnull, Oid domainType, void **extra, MemoryContext mcxt)
+{
+       DomainIOData *my_extra = NULL;
+
+       if (mcxt == NULL)
+               mcxt = CurrentMemoryContext;
+
+       /*
+        * We arrange to look up the needed info just once per series of calls,
+        * assuming the domain type doesn't change underneath us.
+        */
+       if (extra)
+               my_extra = (DomainIOData *) *extra;
+       if (my_extra == NULL)
+       {
+               my_extra = (DomainIOData *) MemoryContextAlloc(mcxt,
+                                                                                                          sizeof(DomainIOData));
+               domain_state_setup(my_extra, domainType, true, mcxt);
+               if (extra)
+                       *extra = (void *) my_extra;
+       }
+       else if (my_extra->domain_type != domainType)
+               domain_state_setup(my_extra, domainType, true, mcxt);
+
+       /*
+        * Do the necessary checks to ensure it's a valid domain value.
+        */
+       domain_check_input(value, isnull, my_extra);
+}
index b664799..bcf027c 100644 (file)
@@ -7,7 +7,7 @@
  * Portions Copyright (c) 1996-2009, PostgreSQL Global Development Group
  * Portions Copyright (c) 1994, Regents of the University of California
  *
- * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.338 2009/08/04 16:08:36 tgl Exp $
+ * $PostgreSQL: pgsql/src/include/utils/builtins.h,v 1.339 2009/09/09 19:00:09 petere Exp $
  *
  *-------------------------------------------------------------------------
  */
@@ -137,6 +137,7 @@ extern Datum char_text(PG_FUNCTION_ARGS);
 /* domains.c */
 extern Datum domain_in(PG_FUNCTION_ARGS);
 extern Datum domain_recv(PG_FUNCTION_ARGS);
+extern void domain_check(Datum value, bool isnull, Oid domainType, void **extra, MemoryContext mcxt);
 
 /* encode.c */
 extern Datum binary_encode(PG_FUNCTION_ARGS);
index 19b3c9e..2dd498c 100644 (file)
@@ -32,6 +32,74 @@ CONTEXT:  PL/Python function "test_type_conversion_bool"
  
 (1 row)
 
+-- test various other ways to express Booleans in Python
+CREATE FUNCTION test_type_conversion_bool_other(n int) RETURNS bool AS $$
+# numbers
+if n == 0:
+   ret = 0
+elif n == 1:
+   ret = 5
+# strings
+elif n == 2:
+   ret = ''
+elif n == 3:
+   ret = 'fa' # true in Python, false in PostgreSQL
+# containers
+elif n == 4:
+   ret = []
+elif n == 5:
+   ret = [0]
+plpy.info(ret, not not ret)
+return ret
+$$ LANGUAGE plpythonu;
+SELECT * FROM test_type_conversion_bool_other(0);
+INFO:  (0, False)
+CONTEXT:  PL/Python function "test_type_conversion_bool_other"
+ test_type_conversion_bool_other 
+---------------------------------
+ f
+(1 row)
+
+SELECT * FROM test_type_conversion_bool_other(1);
+INFO:  (5, True)
+CONTEXT:  PL/Python function "test_type_conversion_bool_other"
+ test_type_conversion_bool_other 
+---------------------------------
+ t
+(1 row)
+
+SELECT * FROM test_type_conversion_bool_other(2);
+INFO:  ('', False)
+CONTEXT:  PL/Python function "test_type_conversion_bool_other"
+ test_type_conversion_bool_other 
+---------------------------------
+ f
+(1 row)
+
+SELECT * FROM test_type_conversion_bool_other(3);
+INFO:  ('fa', True)
+CONTEXT:  PL/Python function "test_type_conversion_bool_other"
+ test_type_conversion_bool_other 
+---------------------------------
+ t
+(1 row)
+
+SELECT * FROM test_type_conversion_bool_other(4);
+INFO:  ([], False)
+CONTEXT:  PL/Python function "test_type_conversion_bool_other"
+ test_type_conversion_bool_other 
+---------------------------------
+ f
+(1 row)
+
+SELECT * FROM test_type_conversion_bool_other(5);
+INFO:  ([0], True)
+CONTEXT:  PL/Python function "test_type_conversion_bool_other"
+ test_type_conversion_bool_other 
+---------------------------------
+ t
+(1 row)
+
 CREATE FUNCTION test_type_conversion_char(x char) RETURNS char AS $$
 plpy.info(x, type(x))
 return x
@@ -278,13 +346,21 @@ plpy.info(x, type(x))
 return x
 $$ LANGUAGE plpythonu;
 SELECT * FROM test_type_conversion_bytea('hello world');
-INFO:  ('\\x68656c6c6f20776f726c64', <type 'str'>)
+INFO:  ('hello world', <type 'str'>)
 CONTEXT:  PL/Python function "test_type_conversion_bytea"
  test_type_conversion_bytea 
 ----------------------------
  \x68656c6c6f20776f726c64
 (1 row)
 
+SELECT * FROM test_type_conversion_bytea(E'null\\000byte');
+INFO:  ('null\x00byte', <type 'str'>)
+CONTEXT:  PL/Python function "test_type_conversion_bytea"
+ test_type_conversion_bytea 
+----------------------------
+ \x6e756c6c0062797465
+(1 row)
+
 SELECT * FROM test_type_conversion_bytea(null);
 INFO:  (None, <type 'NoneType'>)
 CONTEXT:  PL/Python function "test_type_conversion_bytea"
@@ -304,17 +380,31 @@ try:
 except ValueError, e:
     return 'FAILED: ' + str(e)
 $$ LANGUAGE plpythonu;
-/* This will currently fail because the bytea datum is presented to
-   Python as a string in bytea-encoding, which Python doesn't understand. */
 SELECT test_type_unmarshal(x) FROM test_type_marshal() x;
-   test_type_unmarshal    
---------------------------
- FAILED: bad marshal data
+ test_type_unmarshal 
+---------------------
+ hello world
 (1 row)
 
 --
 -- Domains
 --
+CREATE DOMAIN booltrue AS bool CHECK (VALUE IS TRUE OR VALUE IS NULL);
+CREATE FUNCTION test_type_conversion_booltrue(x booltrue, y bool) RETURNS booltrue AS $$
+return y
+$$ LANGUAGE plpythonu;
+SELECT * FROM test_type_conversion_booltrue(true, true);
+ test_type_conversion_booltrue 
+-------------------------------
+ t
+(1 row)
+
+SELECT * FROM test_type_conversion_booltrue(false, true);
+ERROR:  value for domain booltrue violates check constraint "booltrue_check"
+SELECT * FROM test_type_conversion_booltrue(true, false);
+ERROR:  value for domain booltrue violates check constraint "booltrue_check"
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_booltrue"
 CREATE DOMAIN uint2 AS int2 CHECK (VALUE >= 0);
 CREATE FUNCTION test_type_conversion_uint2(x uint2, y int) RETURNS uint2 AS $$
 plpy.info(x, type(x))
@@ -342,13 +432,29 @@ CONTEXT:  PL/Python function "test_type_conversion_uint2"
                           1
 (1 row)
 
+CREATE DOMAIN nnint AS int CHECK (VALUE IS NOT NULL);
+CREATE FUNCTION test_type_conversion_nnint(x nnint, y int) RETURNS nnint AS $$
+return y
+$$ LANGUAGE plpythonu;
+SELECT * FROM test_type_conversion_nnint(10, 20);
+ test_type_conversion_nnint 
+----------------------------
+                         20
+(1 row)
+
+SELECT * FROM test_type_conversion_nnint(null, 20);
+ERROR:  value for domain nnint violates check constraint "nnint_check"
+SELECT * FROM test_type_conversion_nnint(10, null);
+ERROR:  value for domain nnint violates check constraint "nnint_check"
+CONTEXT:  while creating return value
+PL/Python function "test_type_conversion_nnint"
 CREATE DOMAIN bytea10 AS bytea CHECK (octet_length(VALUE) = 10 AND VALUE IS NOT NULL);
 CREATE FUNCTION test_type_conversion_bytea10(x bytea10, y bytea) RETURNS bytea10 AS $$
 plpy.info(x, type(x))
 return y
 $$ LANGUAGE plpythonu;
 SELECT * FROM test_type_conversion_bytea10('hello wold', 'hello wold');
-INFO:  ('\\x68656c6c6f20776f6c64', <type 'str'>)
+INFO:  ('hello wold', <type 'str'>)
 CONTEXT:  PL/Python function "test_type_conversion_bytea10"
  test_type_conversion_bytea10 
 ------------------------------
@@ -358,7 +464,7 @@ CONTEXT:  PL/Python function "test_type_conversion_bytea10"
 SELECT * FROM test_type_conversion_bytea10('hello world', 'hello wold');
 ERROR:  value for domain bytea10 violates check constraint "bytea10_check"
 SELECT * FROM test_type_conversion_bytea10('hello word', 'hello world');
-INFO:  ('\\x68656c6c6f20776f7264', <type 'str'>)
+INFO:  ('hello word', <type 'str'>)
 CONTEXT:  PL/Python function "test_type_conversion_bytea10"
 ERROR:  value for domain bytea10 violates check constraint "bytea10_check"
 CONTEXT:  while creating return value
@@ -366,7 +472,7 @@ PL/Python function "test_type_conversion_bytea10"
 SELECT * FROM test_type_conversion_bytea10(null, 'hello word');
 ERROR:  value for domain bytea10 violates check constraint "bytea10_check"
 SELECT * FROM test_type_conversion_bytea10('hello word', null);
-INFO:  ('\\x68656c6c6f20776f7264', <type 'str'>)
+INFO:  ('hello word', <type 'str'>)
 CONTEXT:  PL/Python function "test_type_conversion_bytea10"
 ERROR:  value for domain bytea10 violates check constraint "bytea10_check"
 CONTEXT:  while creating return value
index 4aba4ba..909eab0 100644 (file)
@@ -1,7 +1,7 @@
 /**********************************************************************
  * plpython.c - python as a procedural language for PostgreSQL
  *
- *     $PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.127 2009/08/25 12:44:59 petere Exp $
+ *     $PostgreSQL: pgsql/src/pl/plpython/plpython.c,v 1.128 2009/09/09 19:00:09 petere Exp $
  *
  *********************************************************************
  */
@@ -78,7 +78,8 @@ PG_MODULE_MAGIC;
  * objects.
  */
 
-typedef PyObject *(*PLyDatumToObFunc) (const char *);
+struct PLyDatumToOb;
+typedef PyObject *(*PLyDatumToObFunc) (struct PLyDatumToOb*, Datum);
 
 typedef struct PLyDatumToOb
 {
@@ -104,8 +105,16 @@ typedef union PLyTypeInput
 /* convert PyObject to a Postgresql Datum or tuple.
  * output from Python
  */
+
+struct PLyObToDatum;
+struct PLyTypeInfo;
+typedef Datum (*PLyObToDatumFunc) (struct PLyTypeInfo*,
+                                                                  struct PLyObToDatum*,
+                                                                  PyObject *);
+
 typedef struct PLyObToDatum
 {
+       PLyObToDatumFunc func;
        FmgrInfo        typfunc;                /* The type's input function */
        Oid                     typoid;                 /* The OID of the type */
        Oid                     typioparam;
@@ -131,12 +140,11 @@ typedef struct PLyTypeInfo
 {
        PLyTypeInput in;
        PLyTypeOutput out;
-       int                     is_rowtype;
-
        /*
-        * is_rowtype can be: -1  not known yet (initial state) 0  scalar datatype
-        * 1  rowtype 2  rowtype, but I/O functions not set up yet
+        * is_rowtype can be: -1 = not known yet (initial state); 0 = scalar datatype;
+        * 1 = rowtype; 2 = rowtype, but I/O functions not set up yet
         */
+       int                     is_rowtype;
 } PLyTypeInfo;
 
 
@@ -263,12 +271,24 @@ static void PLy_output_tuple_funcs(PLyTypeInfo *, TupleDesc);
 static void PLy_input_tuple_funcs(PLyTypeInfo *, TupleDesc);
 
 /* conversion functions */
+static PyObject *PLyBool_FromBool(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyFloat_FromFloat4(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyFloat_FromFloat8(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyFloat_FromNumeric(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyInt_FromInt16(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyInt_FromInt32(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyLong_FromInt64(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyString_FromBytea(PLyDatumToOb *arg, Datum d);
+static PyObject *PLyString_FromDatum(PLyDatumToOb *arg, Datum d);
+
 static PyObject *PLyDict_FromTuple(PLyTypeInfo *, HeapTuple, TupleDesc);
-static PyObject *PLyBool_FromString(const char *);
-static PyObject *PLyFloat_FromString(const char *);
-static PyObject *PLyInt_FromString(const char *);
-static PyObject *PLyLong_FromString(const char *);
-static PyObject *PLyString_FromString(const char *);
+
+static Datum PLyObject_ToBool(PLyTypeInfo *, PLyObToDatum *,
+                                                         PyObject *);
+static Datum PLyObject_ToBytea(PLyTypeInfo *, PLyObToDatum *,
+                                                          PyObject *);
+static Datum PLyObject_ToDatum(PLyTypeInfo *, PLyObToDatum *,
+                                                          PyObject *);
 
 static HeapTuple PLyMapping_ToTuple(PLyTypeInfo *, PyObject *);
 static HeapTuple PLySequence_ToTuple(PLyTypeInfo *, PyObject *);
@@ -552,8 +572,6 @@ PLy_modify_tuple(PLyProcedure *proc, PyObject *pltd, TriggerData *tdata,
 
                for (i = 0; i < natts; i++)
                {
-                       char       *src;
-
                        platt = PyList_GetItem(plkeys, i);
                        if (!PyString_Check(platt))
                                ereport(ERROR,
@@ -580,20 +598,9 @@ PLy_modify_tuple(PLyProcedure *proc, PyObject *pltd, TriggerData *tdata,
                        }
                        else if (plval != Py_None)
                        {
-                               plstr = PyObject_Str(plval);
-                               if (!plstr)
-                                       PLy_elog(ERROR, "could not create string representation of Python object");
-                               src = PyString_AsString(plstr);
-
-                               modvalues[i] =
-                                       InputFunctionCall(&proc->result.out.r.atts[atti].typfunc,
-                                                                         src,
-                                                                       proc->result.out.r.atts[atti].typioparam,
-                                                                         tupdesc->attrs[atti]->atttypmod);
+                               PLyObToDatum *att = &proc->result.out.r.atts[atti];
+                               modvalues[i] = (att->func) (&proc->result, att, plval);
                                modnulls[i] = ' ';
-
-                               Py_DECREF(plstr);
-                               plstr = NULL;
                        }
                        else
                        {
@@ -830,8 +837,6 @@ PLy_function_handler(FunctionCallInfo fcinfo, PLyProcedure *proc)
        Datum           rv;
        PyObject   *volatile plargs = NULL;
        PyObject   *volatile plrv = NULL;
-       PyObject   *volatile plrv_so = NULL;
-       char       *plrv_sc;
        ErrorContextCallback plerrcontext;
 
        PG_TRY();
@@ -909,7 +914,6 @@ PLy_function_handler(FunctionCallInfo fcinfo, PLyProcedure *proc)
 
                                Py_XDECREF(plargs);
                                Py_XDECREF(plrv);
-                               Py_XDECREF(plrv_so);
 
                                PLy_function_delete_args(proc);
 
@@ -983,21 +987,15 @@ PLy_function_handler(FunctionCallInfo fcinfo, PLyProcedure *proc)
                else
                {
                        fcinfo->isnull = false;
-                       plrv_so = PyObject_Str(plrv);
-                       if (!plrv_so)
-                               PLy_elog(ERROR, "could not create string representation of Python object");
-                       plrv_sc = PyString_AsString(plrv_so);
-                       rv = InputFunctionCall(&proc->result.out.d.typfunc,
-                                                                  plrv_sc,
-                                                                  proc->result.out.d.typioparam,
-                                                                  -1);
+                       rv = (proc->result.out.d.func) (&proc->result,
+                                                                                       &proc->result.out.d,
+                                                                                       plrv);
                }
        }
        PG_CATCH();
        {
                Py_XDECREF(plargs);
                Py_XDECREF(plrv);
-               Py_XDECREF(plrv_so);
 
                PG_RE_THROW();
        }
@@ -1007,7 +1005,6 @@ PLy_function_handler(FunctionCallInfo fcinfo, PLyProcedure *proc)
 
        Py_XDECREF(plargs);
        Py_DECREF(plrv);
-       Py_XDECREF(plrv_so);
 
        return rv;
 }
@@ -1090,12 +1087,8 @@ PLy_function_build_args(FunctionCallInfo fcinfo, PLyProcedure *proc)
                                        arg = NULL;
                                else
                                {
-                                       char       *ct;
-
-                                       ct = OutputFunctionCall(&(proc->args[i].in.d.typfunc),
-                                                                                       fcinfo->arg[i]);
-                                       arg = (proc->args[i].in.d.func) (ct);
-                                       pfree(ct);
+                                       arg = (proc->args[i].in.d.func) (&(proc->args[i].in.d),
+                                                                                                        fcinfo->arg[i]);
                                }
                        }
 
@@ -1646,6 +1639,24 @@ PLy_output_datum_func2(PLyObToDatum *arg, HeapTuple typeTup)
        arg->typoid = HeapTupleGetOid(typeTup);
        arg->typioparam = getTypeIOParam(typeTup);
        arg->typbyval = typeStruct->typbyval;
+
+       /*
+        * Select a conversion function to convert Python objects to
+        * PostgreSQL datums.  Most data types can go through the generic
+        * function.
+        */
+       switch (getBaseType(arg->typoid))
+       {
+               case BOOLOID:
+                       arg->func = PLyObject_ToBool;
+                       break;
+               case BYTEAOID:
+                       arg->func = PLyObject_ToBytea;
+                       break;
+               default:
+                       arg->func = PLyObject_ToDatum;
+                       break;
+       }
 }
 
 static void
@@ -1672,22 +1683,31 @@ PLy_input_datum_func2(PLyDatumToOb *arg, Oid typeOid, HeapTuple typeTup)
        switch (getBaseType(typeOid))
        {
                case BOOLOID:
-                       arg->func = PLyBool_FromString;
+                       arg->func = PLyBool_FromBool;
                        break;
                case FLOAT4OID:
+                       arg->func = PLyFloat_FromFloat4;
+                       break;
                case FLOAT8OID:
+                       arg->func = PLyFloat_FromFloat8;
+                       break;
                case NUMERICOID:
-                       arg->func = PLyFloat_FromString;
+                       arg->func = PLyFloat_FromNumeric;
                        break;
                case INT2OID:
+                       arg->func = PLyInt_FromInt16;
+                       break;
                case INT4OID:
-                       arg->func = PLyInt_FromString;
+                       arg->func = PLyInt_FromInt32;
                        break;
                case INT8OID:
-                       arg->func = PLyLong_FromString;
+                       arg->func = PLyLong_FromInt64;
+                       break;
+               case BYTEAOID:
+                       arg->func = PLyString_FromBytea;
                        break;
                default:
-                       arg->func = PLyString_FromString;
+                       arg->func = PLyString_FromDatum;
                        break;
        }
 }
@@ -1713,9 +1733,8 @@ PLy_typeinfo_dealloc(PLyTypeInfo *arg)
        }
 }
 
-/* assumes that a bool is always returned as a 't' or 'f' */
 static PyObject *
-PLyBool_FromString(const char *src)
+PLyBool_FromBool(PLyDatumToOb *arg, Datum d)
 {
        /*
         * We would like to use Py_RETURN_TRUE and Py_RETURN_FALSE here for
@@ -1723,47 +1742,75 @@ PLyBool_FromString(const char *src)
         * Python >= 2.3, and we support older versions.
         * http://docs.python.org/api/boolObjects.html
         */
-       if (src[0] == 't')
+       if (DatumGetBool(d))
                return PyBool_FromLong(1);
        return PyBool_FromLong(0);
 }
 
 static PyObject *
-PLyFloat_FromString(const char *src)
+PLyFloat_FromFloat4(PLyDatumToOb *arg, Datum d)
 {
-       double          v;
-       char       *eptr;
+       return PyFloat_FromDouble(DatumGetFloat4(d));
+}
 
-       errno = 0;
-       v = strtod(src, &eptr);
-       if (*eptr != '\0' || errno)
-               return NULL;
-       return PyFloat_FromDouble(v);
+static PyObject *
+PLyFloat_FromFloat8(PLyDatumToOb *arg, Datum d)
+{
+       return PyFloat_FromDouble(DatumGetFloat8(d));
 }
 
 static PyObject *
-PLyInt_FromString(const char *src)
+PLyFloat_FromNumeric(PLyDatumToOb *arg, Datum d)
 {
-       long            v;
-       char       *eptr;
+       /*
+        * Numeric is cast to a PyFloat:
+        *   This results in a loss of precision
+        *   Would it be better to cast to PyString?
+        */
+       Datum  f = DirectFunctionCall1(numeric_float8, d);
+       double x = DatumGetFloat8(f);
+       return PyFloat_FromDouble(x);
+}
 
-       errno = 0;
-       v = strtol(src, &eptr, 0);
-       if (*eptr != '\0' || errno)
-               return NULL;
-       return PyInt_FromLong(v);
+static PyObject *
+PLyInt_FromInt16(PLyDatumToOb *arg, Datum d)
+{
+       return PyInt_FromLong(DatumGetInt16(d));
 }
 
 static PyObject *
-PLyLong_FromString(const char *src)
+PLyInt_FromInt32(PLyDatumToOb *arg, Datum d)
 {
-       return PyLong_FromString((char *) src, NULL, 0);
+       return PyInt_FromLong(DatumGetInt32(d));
 }
 
 static PyObject *
-PLyString_FromString(const char *src)
+PLyLong_FromInt64(PLyDatumToOb *arg, Datum d)
 {
-       return PyString_FromString(src);
+       /* on 32 bit platforms "long" may be too small */
+       if (sizeof(int64) > sizeof(long))
+               return PyLong_FromLongLong(DatumGetInt64(d));
+       else
+               return PyLong_FromLong(DatumGetInt64(d));
+}
+
+static PyObject *
+PLyString_FromBytea(PLyDatumToOb *arg, Datum d)
+{
+       text     *txt = DatumGetByteaP(d);
+       char     *str = VARDATA(txt);
+       size_t    size = VARSIZE(txt) - VARHDRSZ;
+
+       return PyString_FromStringAndSize(str, size);
+}
+
+static PyObject *
+PLyString_FromDatum(PLyDatumToOb *arg, Datum d)
+{
+       char     *x = OutputFunctionCall(&arg->typfunc, d);
+       PyObject *r = PyString_FromString(x);
+       pfree(x);
+       return r;
 }
 
 static PyObject *
@@ -1783,8 +1830,7 @@ PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc)
        {
                for (i = 0; i < info->in.r.natts; i++)
                {
-                       char       *key,
-                                          *vsrc;
+                       char       *key;
                        Datum           vattr;
                        bool            is_null;
                        PyObject   *value;
@@ -1799,14 +1845,7 @@ PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc)
                                PyDict_SetItemString(dict, key, Py_None);
                        else
                        {
-                               vsrc = OutputFunctionCall(&info->in.r.atts[i].typfunc,
-                                                                                 vattr);
-
-                               /*
-                                * no exceptions allowed
-                                */
-                               value = info->in.r.atts[i].func(vsrc);
-                               pfree(vsrc);
+                               value = (info->in.r.atts[i].func) (&info->in.r.atts[i], vattr);
                                PyDict_SetItemString(dict, key, value);
                                Py_DECREF(value);
                        }
@@ -1822,6 +1861,116 @@ PLyDict_FromTuple(PLyTypeInfo *info, HeapTuple tuple, TupleDesc desc)
        return dict;
 }
 
+/*
+ * Convert a Python object to a PostgreSQL bool datum.  This can't go
+ * through the generic conversion function, because Python attaches a
+ * Boolean value to everything, more things than the PostgreSQL bool
+ * type can parse.
+ */
+static Datum
+PLyObject_ToBool(PLyTypeInfo *info,
+                                PLyObToDatum *arg,
+                                PyObject *plrv)
+{
+       Datum           rv;
+
+       Assert(plrv != Py_None);
+       rv = BoolGetDatum(PyObject_IsTrue(plrv));
+
+       if (get_typtype(arg->typoid) == TYPTYPE_DOMAIN)
+               domain_check(rv, false, arg->typoid, &arg->typfunc.fn_extra, arg->typfunc.fn_mcxt);
+
+       return rv;
+}
+
+/*
+ * Convert a Python object to a PostgreSQL bytea datum.  This doesn't
+ * go through the generic conversion function to circumvent problems
+ * with embedded nulls.  And it's faster this way.
+ */
+static Datum
+PLyObject_ToBytea(PLyTypeInfo *info,
+                                 PLyObToDatum *arg,
+                                 PyObject *plrv)
+{
+       PyObject   *volatile plrv_so = NULL;
+       Datum       rv;
+
+       Assert(plrv != Py_None);
+
+       plrv_so = PyObject_Str(plrv);
+       if (!plrv_so)
+               PLy_elog(ERROR, "could not create string representation of Python object");
+
+       PG_TRY();
+       {
+               char *plrv_sc = PyString_AsString(plrv_so);
+               size_t len = PyString_Size(plrv_so);
+               size_t size = len + VARHDRSZ;
+               bytea *result = palloc(size);
+
+               SET_VARSIZE(result, size);
+               memcpy(VARDATA(result), plrv_sc, len);
+               rv = PointerGetDatum(result);
+       }
+       PG_CATCH();
+       {
+               Py_XDECREF(plrv_so);
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
+
+       Py_XDECREF(plrv_so);
+
+       if (get_typtype(arg->typoid) == TYPTYPE_DOMAIN)
+               domain_check(rv, false, arg->typoid, &arg->typfunc.fn_extra, arg->typfunc.fn_mcxt);
+
+       return rv;
+}
+
+/*
+ * Generic conversion function: Convert PyObject to cstring and
+ * cstring into PostgreSQL type.
+ */
+static Datum
+PLyObject_ToDatum(PLyTypeInfo *info,
+                                 PLyObToDatum *arg,
+                                 PyObject *plrv)
+{
+       PyObject *volatile plrv_so = NULL;
+       Datum     rv;
+
+       Assert(plrv != Py_None);
+
+       plrv_so = PyObject_Str(plrv);
+       if (!plrv_so)
+               PLy_elog(ERROR, "could not create string representation of Python object");
+
+       PG_TRY();
+       {
+               char *plrv_sc = PyString_AsString(plrv_so);
+               size_t plen = PyString_Size(plrv_so);
+               size_t slen = strlen(plrv_sc);
+
+               if (slen < plen)
+                       ereport(ERROR,
+                                       (errcode(ERRCODE_DATATYPE_MISMATCH),
+                                        errmsg("could not convert Python object into cstring: Python string representation appears to contain null bytes")));
+               else if (slen > plen)
+                       elog(ERROR, "could not convert Python object into cstring: Python string longer than reported length");
+               rv = InputFunctionCall(&arg->typfunc, plrv_sc, arg->typioparam, -1);
+       }
+       PG_CATCH();
+       {
+               Py_XDECREF(plrv_so);
+               PG_RE_THROW();
+       }
+       PG_END_TRY();
+
+       Py_XDECREF(plrv_so);
+
+       return rv;
+}
 
 static HeapTuple
 PLyMapping_ToTuple(PLyTypeInfo *info, PyObject *mapping)
@@ -1845,11 +1994,12 @@ PLyMapping_ToTuple(PLyTypeInfo *info, PyObject *mapping)
        for (i = 0; i < desc->natts; ++i)
        {
                char       *key;
-               PyObject   *volatile value,
-                                  *volatile so;
+               PyObject   *volatile value;
+               PLyObToDatum *att;
 
                key = NameStr(desc->attrs[i]->attname);
-               value = so = NULL;
+               value = NULL;
+               att = &info->out.r.atts[i];
                PG_TRY();
                {
                        value = PyMapping_GetItemString(mapping, key);
@@ -1860,19 +2010,7 @@ PLyMapping_ToTuple(PLyTypeInfo *info, PyObject *mapping)
                        }
                        else if (value)
                        {
-                               char       *valuestr;
-
-                               so = PyObject_Str(value);
-                               if (so == NULL)
-                                       PLy_elog(ERROR, "could not compute string representation of Python object");
-                               valuestr = PyString_AsString(so);
-
-                               values[i] = InputFunctionCall(&info->out.r.atts[i].typfunc
-                                                                                         ,valuestr
-                                                                                         ,info->out.r.atts[i].typioparam
-                                                                                         ,-1);
-                               Py_DECREF(so);
-                               so = NULL;
+                               values[i] = (att->func) (info, att, value);
                                nulls[i] = false;
                        }
                        else
@@ -1887,7 +2025,6 @@ PLyMapping_ToTuple(PLyTypeInfo *info, PyObject *mapping)
                }
                PG_CATCH();
                {
-                       Py_XDECREF(so);
                        Py_XDECREF(value);
                        PG_RE_THROW();
                }
@@ -1934,10 +2071,11 @@ PLySequence_ToTuple(PLyTypeInfo *info, PyObject *sequence)
        nulls = palloc(sizeof(bool) * desc->natts);
        for (i = 0; i < desc->natts; ++i)
        {
-               PyObject   *volatile value,
-                                  *volatile so;
+               PyObject   *volatile value;
+               PLyObToDatum *att;
 
-               value = so = NULL;
+               value = NULL;
+               att = &info->out.r.atts[i];
                PG_TRY();
                {
                        value = PySequence_GetItem(sequence, i);
@@ -1949,18 +2087,7 @@ PLySequence_ToTuple(PLyTypeInfo *info, PyObject *sequence)
                        }
                        else if (value)
                        {
-                               char       *valuestr;
-
-                               so = PyObject_Str(value);
-                               if (so == NULL)
-                                       PLy_elog(ERROR, "could not compute string representation of Python object");
-                               valuestr = PyString_AsString(so);
-                               values[i] = InputFunctionCall(&info->out.r.atts[i].typfunc
-                                                                                         ,valuestr
-                                                                                         ,info->out.r.atts[i].typioparam
-                                                                                         ,-1);
-                               Py_DECREF(so);
-                               so = NULL;
+                               values[i] = (att->func) (info, att, value);
                                nulls[i] = false;
                        }
 
@@ -1969,7 +2096,6 @@ PLySequence_ToTuple(PLyTypeInfo *info, PyObject *sequence)
                }
                PG_CATCH();
                {
-                       Py_XDECREF(so);
                        Py_XDECREF(value);
                        PG_RE_THROW();
                }
@@ -2005,11 +2131,12 @@ PLyObject_ToTuple(PLyTypeInfo *info, PyObject *object)
        for (i = 0; i < desc->natts; ++i)
        {
                char       *key;
-               PyObject   *volatile value,
-                                  *volatile so;
+               PyObject   *volatile value;
+               PLyObToDatum *att;
 
                key = NameStr(desc->attrs[i]->attname);
-               value = so = NULL;
+               value = NULL;
+               att = &info->out.r.atts[i];
                PG_TRY();
                {
                        value = PyObject_GetAttrString(object, key);
@@ -2020,18 +2147,7 @@ PLyObject_ToTuple(PLyTypeInfo *info, PyObject *object)
                        }
                        else if (value)
                        {
-                               char       *valuestr;
-
-                               so = PyObject_Str(value);
-                               if (so == NULL)
-                                       PLy_elog(ERROR, "could not compute string representation of Python object");
-                               valuestr = PyString_AsString(so);
-                               values[i] = InputFunctionCall(&info->out.r.atts[i].typfunc
-                                                                                         ,valuestr
-                                                                                         ,info->out.r.atts[i].typioparam
-                                                                                         ,-1);
-                               Py_DECREF(so);
-                               so = NULL;
+                               values[i] = (att->func) (info, att, value);
                                nulls[i] = false;
                        }
                        else
@@ -2047,7 +2163,6 @@ PLyObject_ToTuple(PLyTypeInfo *info, PyObject *object)
                }
                PG_CATCH();
                {
-                       Py_XDECREF(so);
                        Py_XDECREF(value);
                        PG_RE_THROW();
                }
index 79fbbb9..becf5cf 100644 (file)
@@ -16,6 +16,35 @@ SELECT * FROM test_type_conversion_bool(false);
 SELECT * FROM test_type_conversion_bool(null);
 
 
+-- test various other ways to express Booleans in Python
+CREATE FUNCTION test_type_conversion_bool_other(n int) RETURNS bool AS $$
+# numbers
+if n == 0:
+   ret = 0
+elif n == 1:
+   ret = 5
+# strings
+elif n == 2:
+   ret = ''
+elif n == 3:
+   ret = 'fa' # true in Python, false in PostgreSQL
+# containers
+elif n == 4:
+   ret = []
+elif n == 5:
+   ret = [0]
+plpy.info(ret, not not ret)
+return ret
+$$ LANGUAGE plpythonu;
+
+SELECT * FROM test_type_conversion_bool_other(0);
+SELECT * FROM test_type_conversion_bool_other(1);
+SELECT * FROM test_type_conversion_bool_other(2);
+SELECT * FROM test_type_conversion_bool_other(3);
+SELECT * FROM test_type_conversion_bool_other(4);
+SELECT * FROM test_type_conversion_bool_other(5);
+
+
 CREATE FUNCTION test_type_conversion_char(x char) RETURNS char AS $$
 plpy.info(x, type(x))
 return x
@@ -105,6 +134,7 @@ return x
 $$ LANGUAGE plpythonu;
 
 SELECT * FROM test_type_conversion_bytea('hello world');
+SELECT * FROM test_type_conversion_bytea(E'null\\000byte');
 SELECT * FROM test_type_conversion_bytea(null);
 
 
@@ -121,8 +151,6 @@ except ValueError, e:
     return 'FAILED: ' + str(e)
 $$ LANGUAGE plpythonu;
 
-/* This will currently fail because the bytea datum is presented to
-   Python as a string in bytea-encoding, which Python doesn't understand. */
 SELECT test_type_unmarshal(x) FROM test_type_marshal() x;
 
 
@@ -130,6 +158,17 @@ SELECT test_type_unmarshal(x) FROM test_type_marshal() x;
 -- Domains
 --
 
+CREATE DOMAIN booltrue AS bool CHECK (VALUE IS TRUE OR VALUE IS NULL);
+
+CREATE FUNCTION test_type_conversion_booltrue(x booltrue, y bool) RETURNS booltrue AS $$
+return y
+$$ LANGUAGE plpythonu;
+
+SELECT * FROM test_type_conversion_booltrue(true, true);
+SELECT * FROM test_type_conversion_booltrue(false, true);
+SELECT * FROM test_type_conversion_booltrue(true, false);
+
+
 CREATE DOMAIN uint2 AS int2 CHECK (VALUE >= 0);
 
 CREATE FUNCTION test_type_conversion_uint2(x uint2, y int) RETURNS uint2 AS $$
@@ -142,6 +181,17 @@ SELECT * FROM test_type_conversion_uint2(100::uint2, -50);
 SELECT * FROM test_type_conversion_uint2(null, 1);
 
 
+CREATE DOMAIN nnint AS int CHECK (VALUE IS NOT NULL);
+
+CREATE FUNCTION test_type_conversion_nnint(x nnint, y int) RETURNS nnint AS $$
+return y
+$$ LANGUAGE plpythonu;
+
+SELECT * FROM test_type_conversion_nnint(10, 20);
+SELECT * FROM test_type_conversion_nnint(null, 20);
+SELECT * FROM test_type_conversion_nnint(10, null);
+
+
 CREATE DOMAIN bytea10 AS bytea CHECK (octet_length(VALUE) = 10 AND VALUE IS NOT NULL);
 
 CREATE FUNCTION test_type_conversion_bytea10(x bytea10, y bytea) RETURNS bytea10 AS $$