doc/src/sgml/extend.sgml

   1 <!-- $PostgreSQL: pgsql/doc/src/sgml/extend.sgml,v 1.44 2010/06/03 14:41:25 momjian Exp $ -->
   2
   3  <chapter id="extend">
   4   <title>Extending <acronym>SQL</acronym></title>
   5
   6   <indexterm zone="extend">
   7    <primary>extending SQL</primary>
   8   </indexterm>
   9
  10   <para>
  11    In  the  sections  that follow, we will discuss how you
  12    can extend the <productname>PostgreSQL</productname>
  13    <acronym>SQL</acronym> query language by adding:
  14
  15    <itemizedlist spacing="compact" mark="bullet">
  16     <listitem>
  17      <para>
  18       functions (starting in <xref linkend="xfunc">)
  19      </para>
  20     </listitem>
  21     <listitem>
  22      <para>
  23       aggregates (starting in <xref linkend="xaggr">)
  24      </para>
  25     </listitem>
  26     <listitem>
  27      <para>
  28       data types (starting in <xref linkend="xtypes">)
  29      </para>
  30     </listitem>
  31     <listitem>
  32      <para>
  33       operators (starting in <xref linkend="xoper">)
  34      </para>
  35     </listitem>
  36     <listitem>
  37      <para>
  38       operator classes for indexes (starting in <xref linkend="xindex">)
  39      </para>
  40     </listitem>
  41    </itemizedlist>
  42   </para>
  43
  44   <sect1 id="extend-how">
  45    <title>How Extensibility Works</title>
  46
  47    <para>
  48     <productname>PostgreSQL</productname> is extensible because its operation  is
  49     catalog-driven.   If  you  are familiar with standard
  50     relational database systems, you know that  they  store  information
  51     about  databases,  tables,  columns,  etc., in what are
  52     commonly known as system catalogs.  (Some systems  call
  53     this  the data dictionary.)  The catalogs appear to the
  54     user as tables like any other, but  the  <acronym>DBMS</acronym>  stores
  55     its  internal  bookkeeping in them.  One key difference
  56     between <productname>PostgreSQL</productname> and  standard  relational database systems  is
  57     that <productname>PostgreSQL</productname> stores much more information in its
  58     catalogs: not only information about tables and  columns,
  59     but also information about data types, functions, access
  60     methods, and so on.  These tables can be  modified  by
  61     the  user, and since <productname>PostgreSQL</productname> bases its operation
  62     on these tables, this means that <productname>PostgreSQL</productname> can  be
  63     extended   by   users.    By  comparison,  conventional
  64     database systems can only be extended by changing hardcoded
  65     procedures in the source code or by loading modules
  66     specially written by the <acronym>DBMS</acronym> vendor.
  67    </para>
  68
  69    <para>
  70     The <productname>PostgreSQL</productname> server can moreover
  71     incorporate user-written code into itself through dynamic loading.
  72     That is, the user can specify an object code file (e.g., a shared
  73     library) that implements a new type or function, and
  74     <productname>PostgreSQL</productname> will load it as required.
  75     Code written in <acronym>SQL</acronym> is even more trivial to add
  76     to the server.  This ability to modify its operation <quote>on the
  77     fly</quote> makes <productname>PostgreSQL</productname> uniquely
  78     suited for rapid prototyping of new applications and storage
  79     structures.
  80    </para>
  81   </sect1>
  82
  83   <sect1 id="extend-type-system">
  84    <title>The <productname>PostgreSQL</productname> Type System</title>
  85
  86    <indexterm zone="extend-type-system">
  87     <primary>base type</primary>
  88    </indexterm>
  89
  90    <indexterm zone="extend-type-system">
  91     <primary>data type</primary>
  92     <secondary>base</secondary>
  93    </indexterm>
  94
  95    <indexterm zone="extend-type-system">
  96     <primary>composite type</primary>
  97    </indexterm>
  98
  99    <indexterm zone="extend-type-system">
 100     <primary>data type</primary>
 101     <secondary>composite</secondary>
 102    </indexterm>
 103
 104    <para>
 105     <productname>PostgreSQL</productname> data types are divided into base
 106     types, composite types, domains, and pseudo-types.
 107    </para>
 108
 109    <sect2>
 110     <title>Base Types</title>
 111
 112     <para>
 113      Base types are those, like <type>int4</type>, that are
 114      implemented below the level of the <acronym>SQL</> language
 115      (typically in a low-level language such as C).  They generally
 116      correspond to what are often known as abstract data types.
 117      <productname>PostgreSQL</productname> can only operate on such
 118      types through functions provided by the user and only understands
 119      the behavior of such types to the extent that the user describes
 120      them.  Base types are further subdivided into scalar and array
 121      types.  For each scalar type, a corresponding array type is
 122      automatically created that can hold variable-size arrays of that
 123      scalar type.
 124     </para>
 125    </sect2>
 126
 127    <sect2>
 128     <title>Composite Types</title>
 129
 130     <para>
 131      Composite types, or row types, are created whenever the user
 132      creates a table. It is also possible to use <xref
 133      linkend="sql-createtype"> to
 134      define a <quote>stand-alone</> composite type with no associated
 135      table.  A composite type is simply a list of types with
 136      associated field names.  A value of a composite type is a row or
 137      record of field values.  The user can access the component fields
 138      from <acronym>SQL</> queries. Refer to <xref linkend="rowtypes">
 139      for more information on composite types.
 140     </para>
 141    </sect2>
 142
 143    <sect2>
 144     <title>Domains</title>
 145
 146     <para>
 147      A domain is based on a particular base type and for many purposes
 148      is interchangeable with its base type.  However, a domain can
 149      have constraints that restrict its valid values to a subset of
 150      what the underlying base type would allow.
 151     </para>
 152
 153     <para>
 154      Domains can be created using the <acronym>SQL</> command
 155      <xref linkend="sql-createdomain">.
 156      Their creation and use is not discussed in this chapter.
 157     </para>
 158    </sect2>
 159
 160    <sect2>
 161     <title>Pseudo-Types</title>
 162
 163     <para>
 164      There are a few <quote>pseudo-types</> for special purposes.
 165      Pseudo-types cannot appear as columns of tables or attributes of
 166      composite types, but they can be used to declare the argument and
 167      result types of functions.  This provides a mechanism within the
 168      type system to identify special classes of functions.  <xref
 169      linkend="datatype-pseudotypes-table"> lists the existing
 170      pseudo-types.
 171     </para>
 172    </sect2>
 173
 174    <sect2 id="extend-types-polymorphic">
 175     <title>Polymorphic Types</title>
 176
 177    <indexterm zone="extend-types-polymorphic">
 178     <primary>polymorphic type</primary>
 179    </indexterm>
 180
 181    <indexterm zone="extend-types-polymorphic">
 182     <primary>polymorphic function</primary>
 183    </indexterm>
 184
 185    <indexterm zone="extend-types-polymorphic">
 186     <primary>type</primary>
 187     <secondary>polymorphic</secondary>
 188    </indexterm>
 189
 190    <indexterm zone="extend-types-polymorphic">
 191     <primary>function</primary>
 192     <secondary>polymorphic</secondary>
 193    </indexterm>
 194
 195     <para>
 196      Four pseudo-types of special interest are <type>anyelement</>,
 197      <type>anyarray</>, <type>anynonarray</>, and <type>anyenum</>,
 198      which are collectively called <firstterm>polymorphic types</>.
 199      Any function declared using these types is said to be
 200      a <firstterm>polymorphic function</>.  A polymorphic function can
 201      operate on many different data types, with the specific data type(s)
 202      being determined by the data types actually passed to it in a particular
 203      call.
 204     </para>
 205
 206     <para>
 207      Polymorphic arguments and results are tied to each other and are resolved
 208      to a specific data type when a query calling a polymorphic function is
 209      parsed.  Each position (either argument or return value) declared as
 210      <type>anyelement</type> is allowed to have any specific actual
 211      data type, but in any given call they must all be the
 212      <emphasis>same</emphasis> actual type. Each
 213      position declared as <type>anyarray</type> can have any array data type,
 214      but similarly they must all be the same type. If there are
 215      positions declared <type>anyarray</type> and others declared
 216      <type>anyelement</type>, the actual array type in the
 217      <type>anyarray</type> positions must be an array whose elements are
 218      the same type appearing in the <type>anyelement</type> positions.
 219      <type>anynonarray</> is treated exactly the same as <type>anyelement</>,
 220      but adds the additional constraint that the actual type must not be
 221      an array type.
 222      <type>anyenum</> is treated exactly the same as <type>anyelement</>,
 223      but adds the additional constraint that the actual type must
 224      be an enum type.
 225     </para>
 226
 227     <para>
 228      Thus, when more than one argument position is declared with a polymorphic
 229      type, the net effect is that only certain combinations of actual argument
 230      types are allowed.  For example, a function declared as
 231      <literal>equal(anyelement, anyelement)</> will take any two input values,
 232      so long as they are of the same data type.
 233     </para>
 234
 235     <para>
 236      When the return value of a function is declared as a polymorphic type,
 237      there must be at least one argument position that is also polymorphic,
 238      and the actual data type supplied as the argument determines the actual
 239      result type for that call.  For example, if there were not already
 240      an array subscripting mechanism, one could define a function that
 241      implements subscripting as <literal>subscript(anyarray, integer)
 242      returns anyelement</>.  This declaration constrains the actual first
 243      argument to be an array type, and allows the parser to infer the correct
 244      result type from the actual first argument's type.  Another example
 245      is that a function declared as <literal>f(anyarray) returns anyenum</>
 246      will only accept arrays of enum types.
 247     </para>
 248
 249     <para>
 250      Note that <type>anynonarray</> and <type>anyenum</> do not represent
 251      separate type variables; they are the same type as
 252      <type>anyelement</type>, just with an additional constraint.  For
 253      example, declaring a function as <literal>f(anyelement, anyenum)</>
 254      is equivalent to declaring it as <literal>f(anyenum, anyenum)</>:
 255      both actual arguments have to be the same enum type.
 256     </para>
 257
 258     <para>
 259      A variadic function (one taking a variable number of arguments, as in
 260      <xref linkend="xfunc-sql-variadic-functions">) can be
 261      polymorphic: this is accomplished by declaring its last parameter as
 262      <literal>VARIADIC</> <type>anyarray</>.  For purposes of argument
 263      matching and determining the actual result type, such a function behaves
 264      the same as if you had written the appropriate number of
 265      <type>anynonarray</> parameters.
 266     </para>
 267    </sect2>
 268   </sect1>
 269
 270   &xfunc;
 271   &xaggr;
 272   &xtypes;
 273   &xoper;
 274   &xindex;
 275
 276   <sect1 id="extend-Cpp">
 277    <title>Using C++ for Extensibility</title>
 278
 279    <indexterm zone="extend-Cpp">
 280     <primary>C++</primary>
 281    </indexterm>
 282
 283    <para>
 284     It is possible to use a compiler in C++ mode to build
 285     <productname>PostgreSQL</productname> extensions by following these
 286     guidelines:
 287
 288     <itemizedlist>
 289      <listitem>
 290       <para>
 291         All functions accessed by the backend must present a C interface
 292         to the backend;  these C functions can then call C++ functions.
 293         For example, <literal>extern C</> linkage is required for
 294         backend-accessed functions.  This is also necessary for any
 295         functions that are passed as pointers between the backend and
 296         C++ code.
 297       </para>
 298      </listitem>
 299      <listitem>
 300       <para>
 301        Free memory using the appropriate deallocation method.  For example,
 302        most backend memory is allocated using <function>palloc()</>, so use
 303        <function>pfree()</> to free it, i.e. using C++
 304        <function>delete()</> in such cases will fail.
 305       </para>
 306      </listitem>
 307      <listitem>
 308       <para>
 309        Prevent exceptions from propagating into the C code (use a
 310        catch-all block at the top level of all <literal>extern C</>
 311        functions).  This is necessary even if the C++ code does not
 312        throw any exceptions because events like out-of-memory still
 313        throw exceptions.  Any exceptions must be caught and appropriate
 314        errors passed back to the C interface.  If possible, compile C++
 315        with <option>-fno-exceptions</> to eliminate exceptions entirely;
 316        in such cases, you must check for failures in your C++ code, e.g.
 317        check for NULL returned by <function>new()</>.
 318       </para>
 319      </listitem>
 320      <listitem>
 321       <para>
 322        If calling backend functions from C++ code, be sure that the
 323        C++ call stack contains only plain old data structure
 324        (<acronym>POD</>).  This is necessary because backend errors
 325        generate a distant <function>longjump()</> that does not properly
 326        unroll a C++ call stack with non-POD objects.
 327       </para>
 328      </listitem>
 329     </itemizedlist>
 330    </para>
 331
 332    <para>
 333     In summary, it is best to place C++ code behind a wall of
 334     <literal>extern C</> functions that interface to the backend,
 335     and avoid exception, memory, and call stack leakage.
 336    </para>
 337   </sect1>
 338
 339  </chapter>