doc/src/sgml/xfunc.sgml

   1 <!--
   2 $PostgreSQL: pgsql/doc/src/sgml/xfunc.sgml,v 1.93 2005/01/07 22:40:46 tgl Exp $
   3 -->
   4
   5  <sect1 id="xfunc">
   6   <title>User-Defined Functions</title>
   7
   8   <indexterm zone="xfunc">
   9    <primary>function</primary>
  10    <secondary>user-defined</secondary>
  11   </indexterm>
  12
  13   <para>
  14    <productname>PostgreSQL</productname> provides four kinds of
  15    functions:
  16
  17    <itemizedlist>
  18     <listitem>
  19      <para>
  20       query language functions (functions written in
  21       <acronym>SQL</acronym>) (<xref linkend="xfunc-sql">)
  22      </para>
  23     </listitem>
  24     <listitem>
  25      <para>
  26       procedural language functions (functions written in, for
  27       example, <application>PL/pgSQL</> or <application>PL/Tcl</>)
  28       (<xref linkend="xfunc-pl">)
  29      </para>
  30     </listitem>
  31     <listitem>
  32      <para>
  33       internal functions (<xref linkend="xfunc-internal">)
  34      </para>
  35     </listitem>
  36     <listitem>
  37      <para>
  38       C-language functions (<xref linkend="xfunc-c">)
  39      </para>
  40     </listitem>
  41    </itemizedlist>
  42   </para>
  43
  44   <para>
  45    Every kind
  46    of  function  can take base types, composite types, or
  47    combinations of these as arguments (parameters). In addition,
  48    every kind of function can return a base type or
  49    a composite type.  Functions may also be defined to return
  50    sets of base or composite values.
  51   </para>
  52
  53   <para>
  54    Many kinds of functions can take or return certain pseudo-types
  55    (such as polymorphic types), but the available facilities vary.
  56    Consult the description of each kind of function for more details.
  57   </para>
  58
  59   <para>
  60    It's easiest to define <acronym>SQL</acronym>
  61    functions, so we'll start by discussing those.
  62    Most of the concepts presented for <acronym>SQL</acronym> functions
  63    will carry over to the other types of functions.
  64   </para>
  65
  66   <para>
  67    Throughout this chapter, it can be useful to look at the reference
  68    page of the <xref linkend="sql-createfunction"
  69    endterm="sql-createfunction-title"> command to
  70    understand the examples better.  Some examples from this chapter
  71    can be found in <filename>funcs.sql</filename> and
  72    <filename>funcs.c</filename> in the <filename>src/tutorial</>
  73    directory in the <productname>PostgreSQL</productname> source
  74    distribution.
  75   </para>
  76   </sect1>
  77
  78   <sect1 id="xfunc-sql">
  79    <title>Query Language (<acronym>SQL</acronym>) Functions</title>
  80
  81    <indexterm zone="xfunc-sql">
  82     <primary>function</primary>
  83     <secondary>user-defined</secondary>
  84     <tertiary>in SQL</tertiary>
  85    </indexterm>
  86
  87    <para>
  88     SQL functions execute an arbitrary list of SQL statements, returning
  89     the result of the last query in the list.
  90     In the simple (non-set)
  91     case, the first row of the last query's result will be returned.
  92     (Bear in mind that <quote>the first row</quote> of a multirow
  93     result is not well-defined unless you use <literal>ORDER BY</>.)
  94     If the last query happens
  95     to return no rows at all, the null value will be returned.
  96    </para>
  97
  98    <para>
  99     <indexterm><primary>SETOF</><seealso>function</></> Alternatively,
 100     an SQL function may be declared to return a set, by specifying the
 101     function's return type as <literal>SETOF
 102     <replaceable>sometype</></literal>.<indexterm><primary>SETOF</></>
 103     In this case all rows of the last query's result are returned.
 104     Further details appear below.
 105    </para>
 106
 107    <para>
 108     The body of an SQL function must be a list of SQL
 109     statements separated by semicolons.  A semicolon after the last
 110     statement is optional.  Unless the function is declared to return
 111     <type>void</>, the last statement must be a <command>SELECT</>.
 112    </para>
 113
 114     <para>
 115      Any collection of commands in the  <acronym>SQL</acronym>
 116      language can be packaged together and defined as a function.
 117      Besides <command>SELECT</command> queries, the commands can include data
 118      modification queries (<command>INSERT</command>,
 119      <command>UPDATE</command>, and <command>DELETE</command>), as well as
 120      other SQL commands. (The only exception is that you can't put
 121      <command>BEGIN</>, <command>COMMIT</>, <command>ROLLBACK</>, or
 122      <command>SAVEPOINT</> commands into a <acronym>SQL</acronym> function.)
 123      However, the final command
 124      must be a <command>SELECT</command> that returns whatever is
 125      specified as the function's return type.  Alternatively, if you
 126      want to define a SQL function that performs actions but has no
 127      useful value to return, you can define it as returning <type>void</>.
 128      In that case, the function body must not end with a <command>SELECT</command>.
 129      For example, this function removes rows with negative salaries from
 130      the <literal>emp</> table:
 131
 132 <screen>
 133 CREATE FUNCTION clean_emp() RETURNS void AS '
 134     DELETE FROM emp
 135         WHERE salary &lt; 0;
 136 ' LANGUAGE SQL;
 137
 138 SELECT clean_emp();
 139
 140  clean_emp
 141 -----------
 142
 143 (1 row)
 144 </screen>
 145     </para>
 146
 147    <para>
 148     The syntax of the <command>CREATE FUNCTION</command> command requires
 149     the function body to be written as a string constant.  It is usually
 150     most convenient to use dollar quoting (see <xref
 151     linkend="sql-syntax-dollar-quoting">) for the string constant.
 152     If you choose to use regular single-quoted string constant syntax,
 153     you must escape single quote marks (<literal>'</>) and backslashes
 154     (<literal>\</>) used in the body of the function, typically by
 155     doubling them (see <xref linkend="sql-syntax-strings">).
 156    </para>
 157
 158    <para>
 159     Arguments to the SQL function are referenced in the function
 160     body using the syntax <literal>$<replaceable>n</></>: <literal>$1</>
 161     refers to the first argument, <literal>$2</> to the second, and so on.
 162     If an argument is of a composite type, then the dot notation,
 163     e.g., <literal>$1.name</literal>, may be used to access attributes
 164     of the argument.
 165    </para>
 166
 167    <sect2>
 168     <title><acronym>SQL</acronym> Functions on Base Types</title>
 169
 170     <para>
 171      The simplest possible <acronym>SQL</acronym> function has no arguments and
 172      simply returns a base type, such as <type>integer</type>:
 173
 174 <screen>
 175 CREATE FUNCTION one() RETURNS integer AS $$
 176     SELECT 1 AS result;
 177 $$ LANGUAGE SQL;
 178
 179 -- Alternative syntax for string literal:
 180 CREATE FUNCTION one() RETURNS integer AS '
 181     SELECT 1 AS result;
 182 ' LANGUAGE SQL;
 183
 184 SELECT one();
 185
 186  one
 187 -----
 188    1
 189 </screen>
 190     </para>
 191
 192     <para>
 193      Notice that we defined a column alias within the function body for the result of the function
 194      (with  the  name <literal>result</>),  but this column alias is not visible
 195      outside the function.  Hence,  the  result  is labeled <literal>one</>
 196      instead of <literal>result</>.
 197     </para>
 198
 199     <para>
 200      It is almost as easy to define <acronym>SQL</acronym> functions
 201      that take base types as arguments.  In the example below, notice
 202      how we refer to the arguments within the function as <literal>$1</>
 203      and <literal>$2</>.
 204
 205 <screen>
 206 CREATE FUNCTION add_em(integer, integer) RETURNS integer AS $$
 207     SELECT $1 + $2;
 208 $$ LANGUAGE SQL;
 209
 210 SELECT add_em(1, 2) AS answer;
 211
 212  answer
 213 --------
 214       3
 215 </screen>
 216     </para>
 217
 218     <para>
 219      Here is a more useful function, which might be used to debit a
 220      bank account:
 221
 222 <programlisting>
 223 CREATE FUNCTION tf1 (integer, numeric) RETURNS integer AS $$
 224     UPDATE bank
 225         SET balance = balance - $2
 226         WHERE accountno = $1;
 227     SELECT 1;
 228 $$ LANGUAGE SQL;
 229 </programlisting>
 230
 231      A user could execute this function to debit account 17 by $100.00 as
 232      follows:
 233
 234 <programlisting>
 235 SELECT tf1(17, 100.0);
 236 </programlisting>
 237     </para>
 238
 239     <para>
 240      In practice one would probably like a more useful result from the
 241      function than a constant 1, so a more likely definition
 242      is
 243
 244 <programlisting>
 245 CREATE FUNCTION tf1 (integer, numeric) RETURNS numeric AS $$
 246     UPDATE bank
 247         SET balance = balance - $2
 248         WHERE accountno = $1;
 249     SELECT balance FROM bank WHERE accountno = $1;
 250 $$ LANGUAGE SQL;
 251 </programlisting>
 252
 253      which adjusts the balance and returns the new balance.
 254     </para>
 255    </sect2>
 256
 257    <sect2>
 258     <title><acronym>SQL</acronym> Functions on Composite Types</title>
 259
 260     <para>
 261      When writing  functions with arguments of composite
 262      types, we must  not  only  specify  which
 263      argument  we  want (as we did above with <literal>$1</> and <literal>$2</literal>) but
 264      also the desired attribute (field) of  that  argument.   For  example,
 265      suppose that
 266      <type>emp</type> is a table containing employee data, and therefore
 267      also the name of the composite type of each row of the table.  Here
 268      is a function <function>double_salary</function> that computes what someone's
 269      salary would be if it were doubled:
 270
 271 <screen>
 272 CREATE TABLE emp (
 273     name        text,
 274     salary      numeric,
 275     age         integer,
 276     cubicle     point
 277 );
 278
 279 CREATE FUNCTION double_salary(emp) RETURNS numeric AS $$
 280     SELECT $1.salary * 2 AS salary;
 281 $$ LANGUAGE SQL;
 282
 283 SELECT name, double_salary(emp.*) AS dream
 284     FROM emp
 285     WHERE emp.cubicle ~= point '(2,1)';
 286
 287  name | dream
 288 ------+-------
 289  Bill |  8400
 290 </screen>
 291     </para>
 292
 293     <para>
 294      Notice the use of the syntax <literal>$1.salary</literal>
 295      to select one field of the argument row value.  Also notice
 296      how the calling <command>SELECT</> command uses <literal>*</>
 297      to select
 298      the entire current row of a table as a composite value.  The table
 299      row can alternatively be referenced using just the table name,
 300      like this:
 301 <screen>
 302 SELECT name, double_salary(emp) AS dream
 303     FROM emp
 304     WHERE emp.cubicle ~= point '(2,1)';
 305 </screen>
 306      but this usage is deprecated since it's easy to get confused.
 307     </para>
 308
 309     <para>
 310      Sometimes it is handy to construct a composite argument value
 311      on-the-fly.  This can be done with the <literal>ROW</> construct.
 312      For example, we could adjust the data being passed to the function:
 313 <screen>
 314 SELECT name, double_salary(ROW(name, salary*1.1, age, cubicle)) AS dream
 315     FROM emp;
 316 </screen>
 317     </para>
 318
 319     <para>
 320      It is also possible to build a function that returns a composite type.
 321      This is an example of a function
 322      that returns a single <type>emp</type> row:
 323
 324 <programlisting>
 325 CREATE FUNCTION new_emp() RETURNS emp AS $$
 326     SELECT text 'None' AS name,
 327         1000.0 AS salary,
 328         25 AS age,
 329         point '(2,2)' AS cubicle;
 330 $$ LANGUAGE SQL;
 331 </programlisting>
 332
 333      In this example we have specified each of  the  attributes
 334      with  a  constant value, but any computation
 335      could have been substituted for these constants.
 336     </para>
 337
 338     <para>
 339      Note two important things about defining the function:
 340
 341      <itemizedlist>
 342       <listitem>
 343        <para>
 344         The select list order in the query must be exactly the same as
 345         that in which the columns appear in the table associated
 346         with the composite type.  (Naming the columns, as we did above,
 347         is irrelevant to the system.)
 348        </para>
 349       </listitem>
 350       <listitem>
 351        <para>
 352         You must typecast the expressions to match the
 353         definition of the composite type, or you will get errors like this:
 354 <screen>
 355 <computeroutput>
 356 ERROR:  function declared to return emp returns varchar instead of text at column 1
 357 </computeroutput>
 358 </screen>
 359        </para>
 360       </listitem>
 361      </itemizedlist>
 362     </para>
 363
 364     <para>
 365      A different way to define the same function is:
 366
 367 <programlisting>
 368 CREATE FUNCTION new_emp() RETURNS emp AS $$
 369     SELECT ROW('None', 1000.0, 25, '(2,2)')::emp;
 370 $$ LANGUAGE SQL;
 371 </programlisting>
 372
 373      Here we wrote a <command>SELECT</> that returns just a single
 374      column of the correct composite type.  This isn't really better
 375      in this situation, but it is a handy alternative in some cases
 376      &mdash; for example, if we need to compute the result by calling
 377      another function that returns the desired composite value.
 378     </para>
 379
 380     <para>
 381      We could call this function directly in either of two ways:
 382
 383 <screen>
 384 SELECT new_emp();
 385
 386          new_emp
 387 --------------------------
 388  (None,1000.0,25,"(2,2)")
 389
 390 SELECT * FROM new_emp();
 391
 392  name | salary | age | cubicle
 393 ------+--------+-----+---------
 394  None | 1000.0 |  25 | (2,2)
 395 </screen>
 396
 397      The second way is described more fully in <xref
 398      linkend="xfunc-sql-table-functions">.
 399     </para>
 400
 401     <para>
 402      When you use a function that returns a composite type,
 403      you might want only one field (attribute) from its result.
 404      You can do that with syntax like this:
 405
 406 <screen>
 407 SELECT (new_emp()).name;
 408
 409  name
 410 ------
 411  None
 412 </screen>
 413
 414      The extra parentheses are needed to keep the parser from getting
 415      confused.  If you try to do it without them, you get something like this:
 416
 417 <screen>
 418 SELECT new_emp().name;
 419 ERROR:  syntax error at or near "." at character 17
 420 LINE 1: SELECT new_emp().name;
 421                         ^
 422 </screen>
 423     </para>
 424
 425     <para>
 426      Another option is to use
 427      functional notation for extracting an attribute.  The  simple  way
 428      to explain this is that we can use the
 429      notations <literal>attribute(table)</>  and  <literal>table.attribute</>
 430      interchangeably.
 431
 432 <screen>
 433 SELECT name(new_emp());
 434
 435  name
 436 ------
 437  None
 438 </screen>
 439
 440 <screen>
 441 -- This is the same as:
 442 -- SELECT emp.name AS youngster FROM emp WHERE emp.age &lt; 30;
 443
 444 SELECT name(emp) AS youngster FROM emp WHERE age(emp) &lt; 30;
 445
 446  youngster
 447 -----------
 448  Sam
 449  Andy
 450 </screen>
 451     </para>
 452
 453     <para>
 454      Another way to use a function returning a row result is to pass the
 455      result to another function that accepts the correct row type as input:
 456
 457 <screen>
 458 CREATE FUNCTION getname(emp) RETURNS text AS $$
 459     SELECT $1.name;
 460 $$ LANGUAGE SQL;
 461
 462 SELECT getname(new_emp());
 463  getname
 464 ---------
 465  None
 466 (1 row)
 467 </screen>
 468     </para>
 469
 470     <para>
 471      Another way to use a function that returns a composite type is to
 472      call it as a table function, as described below.
 473     </para>
 474    </sect2>
 475
 476    <sect2 id="xfunc-sql-table-functions">
 477     <title><acronym>SQL</acronym> Functions as Table Sources</title>
 478
 479     <para>
 480      All SQL functions may be used in the <literal>FROM</> clause of a query,
 481      but it is particularly useful for functions returning composite types.
 482      If the function is defined to return a base type, the table function
 483      produces a one-column table.  If the function is defined to return
 484      a composite type, the table function produces a column for each attribute
 485      of the composite type.
 486     </para>
 487
 488     <para>
 489      Here is an example:
 490
 491 <screen>
 492 CREATE TABLE foo (fooid int, foosubid int, fooname text);
 493 INSERT INTO foo VALUES (1, 1, 'Joe');
 494 INSERT INTO foo VALUES (1, 2, 'Ed');
 495 INSERT INTO foo VALUES (2, 1, 'Mary');
 496
 497 CREATE FUNCTION getfoo(int) RETURNS foo AS $$
 498     SELECT * FROM foo WHERE fooid = $1;
 499 $$ LANGUAGE SQL;
 500
 501 SELECT *, upper(fooname) FROM getfoo(1) AS t1;
 502
 503  fooid | foosubid | fooname | upper
 504 -------+----------+---------+-------
 505      1 |        1 | Joe     | JOE
 506 (2 rows)
 507 </screen>
 508
 509      As the example shows, we can work with the columns of the function's
 510      result just the same as if they were columns of a regular table.
 511     </para>
 512
 513     <para>
 514      Note that we only got one row out of the function.  This is because
 515      we did not use <literal>SETOF</>.  That is described in the next section.
 516     </para>
 517    </sect2>
 518
 519    <sect2>
 520     <title><acronym>SQL</acronym> Functions Returning Sets</title>
 521
 522     <para>
 523      When an SQL function is declared as returning <literal>SETOF
 524      <replaceable>sometype</></literal>, the function's final
 525      <command>SELECT</> query is executed to completion, and each row it
 526      outputs is returned as an element of the result set.
 527     </para>
 528
 529     <para>
 530      This feature is normally used when calling the function in the <literal>FROM</>
 531      clause.  In this case each row returned by the function becomes
 532      a row of the table seen by the query.  For example, assume that
 533      table <literal>foo</> has the same contents as above, and we say:
 534
 535 <programlisting>
 536 CREATE FUNCTION getfoo(int) RETURNS SETOF foo AS $$
 537     SELECT * FROM foo WHERE fooid = $1;
 538 $$ LANGUAGE SQL;
 539
 540 SELECT * FROM getfoo(1) AS t1;
 541 </programlisting>
 542
 543      Then we would get:
 544 <screen>
 545  fooid | foosubid | fooname
 546 -------+----------+---------
 547      1 |        1 | Joe
 548      1 |        2 | Ed
 549 (2 rows)
 550 </screen>
 551     </para>
 552
 553     <para>
 554      Currently, functions returning sets may also be called in the select list
 555      of a query.  For each row that the query
 556      generates by itself, the function returning set is invoked, and an output
 557      row is generated for each element of the function's result set. Note,
 558      however, that this capability is deprecated and may be removed in future
 559      releases. The following is an example function returning a set from the
 560      select list:
 561
 562 <screen>
 563 CREATE FUNCTION listchildren(text) RETURNS SETOF text AS $$
 564     SELECT name FROM nodes WHERE parent = $1
 565 $$ LANGUAGE SQL;
 566
 567 SELECT * FROM nodes;
 568    name    | parent
 569 -----------+--------
 570  Top       |
 571  Child1    | Top
 572  Child2    | Top
 573  Child3    | Top
 574  SubChild1 | Child1
 575  SubChild2 | Child1
 576 (6 rows)
 577
 578 SELECT listchildren('Top');
 579  listchildren
 580 --------------
 581  Child1
 582  Child2
 583  Child3
 584 (3 rows)
 585
 586 SELECT name, listchildren(name) FROM nodes;
 587   name  | listchildren
 588 --------+--------------
 589  Top    | Child1
 590  Top    | Child2
 591  Top    | Child3
 592  Child1 | SubChild1
 593  Child1 | SubChild2
 594 (5 rows)
 595 </screen>
 596
 597      In the last <command>SELECT</command>,
 598      notice that no output row appears for <literal>Child2</>, <literal>Child3</>, etc.
 599      This happens because <function>listchildren</function> returns an empty set
 600      for those arguments, so no result rows are generated.
 601     </para>
 602    </sect2>
 603
 604    <sect2>
 605     <title>Polymorphic <acronym>SQL</acronym> Functions</title>
 606
 607     <para>
 608      <acronym>SQL</acronym> functions may be declared to accept and
 609      return the polymorphic types <type>anyelement</type> and
 610      <type>anyarray</type>.  See <xref
 611      linkend="extend-types-polymorphic"> for a more detailed
 612      explanation of polymorphic functions. Here is a polymorphic
 613      function <function>make_array</function> that builds up an array
 614      from two arbitrary data type elements:
 615 <screen>
 616 CREATE FUNCTION make_array(anyelement, anyelement) RETURNS anyarray AS $$
 617     SELECT ARRAY[$1, $2];
 618 $$ LANGUAGE SQL;
 619
 620 SELECT make_array(1, 2) AS intarray, make_array('a'::text, 'b') AS textarray;
 621  intarray | textarray
 622 ----------+-----------
 623  {1,2}    | {a,b}
 624 (1 row)
 625 </screen>
 626     </para>
 627
 628     <para>
 629      Notice the use of the typecast <literal>'a'::text</literal>
 630      to specify that the argument is of type <type>text</type>. This is
 631      required if the argument is just a string literal, since otherwise
 632      it would be treated as type
 633      <type>unknown</type>, and array of <type>unknown</type> is not a valid
 634      type.
 635      Without the typecast, you will get errors like this:
 636 <screen>
 637 <computeroutput>
 638 ERROR:  could not determine "anyarray"/"anyelement" type because input has type "unknown"
 639 </computeroutput>
 640 </screen>
 641     </para>
 642
 643     <para>
 644      It is permitted to have polymorphic arguments with a fixed
 645      return type, but the converse is not. For example:
 646 <screen>
 647 CREATE FUNCTION is_greater(anyelement, anyelement) RETURNS boolean AS $$
 648     SELECT $1 > $2;
 649 $$ LANGUAGE SQL;
 650
 651 SELECT is_greater(1, 2);
 652  is_greater
 653 ------------
 654  f
 655 (1 row)
 656
 657 CREATE FUNCTION invalid_func() RETURNS anyelement AS $$
 658     SELECT 1;
 659 $$ LANGUAGE SQL;
 660 ERROR:  cannot determine result data type
 661 DETAIL:  A function returning "anyarray" or "anyelement" must have at least one argument of either type.
 662 </screen>
 663     </para>
 664    </sect2>
 665   </sect1>
 666
 667   <sect1 id="xfunc-overload">
 668    <title>Function Overloading</title>
 669
 670    <indexterm zone="xfunc-overload">
 671     <primary>overloading</primary>
 672     <secondary>functions</secondary>
 673    </indexterm>
 674
 675    <para>
 676     More than one function may be defined with the same SQL name, so long
 677     as the arguments they take are different.  In other words,
 678     function names can be <firstterm>overloaded</firstterm>.  When a
 679     query is executed, the server will determine which function to
 680     call from the data types and the number of the provided arguments.
 681     Overloading can also be used to simulate functions with a variable
 682     number of arguments, up to a finite maximum number.
 683    </para>
 684
 685    <para>
 686     When creating a family of overloaded functions, one should be
 687     careful not to create ambiguities.  For instance, given the
 688     functions
 689 <programlisting>
 690 CREATE FUNCTION test(int, real) RETURNS ...
 691 CREATE FUNCTION test(smallint, double precision) RETURNS ...
 692 </programlisting>
 693     it is not immediately clear which function would be called with
 694     some trivial input like <literal>test(1, 1.5)</literal>.  The
 695     currently implemented resolution rules are described in
 696     <xref linkend="typeconv">, but it is unwise to design a system that subtly
 697     relies on this behavior.
 698    </para>
 699
 700    <para>
 701     A function that takes a single argument of a composite type should
 702     generally not have the same name as any attribute (field) of that type.
 703     Recall that <literal>attribute(table)</literal> is considered equivalent
 704     to <literal>table.attribute</literal>.  In the case that there is an
 705     ambiguity between a function on a composite type and an attribute of
 706     the composite type, the attribute will always be used.  It is possible
 707     to override that choice by schema-qualifying the function name
 708     (that is, <literal>schema.func(table)</literal>) but it's better to
 709     avoid the problem by not choosing conflicting names.
 710    </para>
 711
 712    <para>
 713     When overloading C-language functions, there is an additional
 714     constraint: The C name of each function in the family of
 715     overloaded functions must be different from the C names of all
 716     other functions, either internal or dynamically loaded.  If this
 717     rule is violated, the behavior is not portable.  You might get a
 718     run-time linker error, or one of the functions will get called
 719     (usually the internal one).  The alternative form of the
 720     <literal>AS</> clause for the SQL <command>CREATE
 721     FUNCTION</command> command decouples the SQL function name from
 722     the function name in the C source code.  For instance,
 723 <programlisting>
 724 CREATE FUNCTION test(int) RETURNS int
 725     AS '<replaceable>filename</>', 'test_1arg'
 726     LANGUAGE C;
 727 CREATE FUNCTION test(int, int) RETURNS int
 728     AS '<replaceable>filename</>', 'test_2arg'
 729     LANGUAGE C;
 730 </programlisting>
 731     The names of the C functions here reflect one of many possible conventions.
 732    </para>
 733   </sect1>
 734
 735   <sect1 id="xfunc-volatility">
 736    <title>Function Volatility Categories</title>
 737
 738    <indexterm zone="xfunc-volatility">
 739     <primary>volatility</primary>
 740     <secondary>functions</secondary>
 741    </indexterm>
 742
 743    <para>
 744     Every function has a <firstterm>volatility</> classification, with
 745     the possibilities being <literal>VOLATILE</>, <literal>STABLE</>, or
 746     <literal>IMMUTABLE</>.  <literal>VOLATILE</> is the default if the
 747     <command>CREATE FUNCTION</command> command does not specify a category.
 748     The volatility category is a promise to the optimizer about the behavior
 749     of the function:
 750
 751    <itemizedlist>
 752     <listitem>
 753      <para>
 754       A <literal>VOLATILE</> function can do anything, including modifying
 755       the database.  It can return different results on successive calls with
 756       the same arguments.  The optimizer makes no assumptions about the
 757       behavior of such functions.  A query using a volatile function will
 758       re-evaluate the function at every row where its value is needed.
 759      </para>
 760     </listitem>
 761     <listitem>
 762      <para>
 763       A <literal>STABLE</> function cannot modify the database and is
 764       guaranteed to return the same results given the same arguments
 765       for all calls within a single surrounding query.  This category
 766       allows the optimizer to optimize away multiple calls of the function
 767       within a single query.  In particular, it is safe to use an expression
 768       containing such a function in an index scan condition.  (Since an
 769       index scan will evaluate the comparison value only once, not once at
 770       each row, it is not valid to use a <literal>VOLATILE</> function in
 771       an index scan condition.)
 772      </para>
 773     </listitem>
 774     <listitem>
 775      <para>
 776       An <literal>IMMUTABLE</> function cannot modify the database and is
 777       guaranteed to return the same results given the same arguments forever.
 778       This category allows the optimizer to pre-evaluate the function when
 779       a query calls it with constant arguments.  For example, a query like
 780       <literal>SELECT ... WHERE x = 2 + 2</> can be simplified on sight to
 781       <literal>SELECT ... WHERE x = 4</>, because the function underlying
 782       the integer addition operator is marked <literal>IMMUTABLE</>.
 783      </para>
 784     </listitem>
 785    </itemizedlist>
 786    </para>
 787
 788    <para>
 789     For best optimization results, you should label your functions with the
 790     strictest volatility category that is valid for them.
 791    </para>
 792
 793    <para>
 794     Any function with side-effects <emphasis>must</> be labeled
 795     <literal>VOLATILE</>, so that calls to it cannot be optimized away.
 796     Even a function with no side-effects needs to be labeled
 797     <literal>VOLATILE</> if its value can change within a single query;
 798     some examples are <literal>random()</>, <literal>currval()</>,
 799     <literal>timeofday()</>.
 800    </para>
 801
 802    <para>
 803     There is relatively little difference between <literal>STABLE</> and
 804     <literal>IMMUTABLE</> categories when considering simple interactive
 805     queries that are planned and immediately executed: it doesn't matter
 806     a lot whether a function is executed once during planning or once during
 807     query execution startup.  But there is a big difference if the plan is
 808     saved and reused later.  Labeling a function <literal>IMMUTABLE</> when
 809     it really isn't may allow it to be prematurely folded to a constant during
 810     planning, resulting in a stale value being re-used during subsequent uses
 811     of the plan.  This is a hazard when using prepared statements or when
 812     using function languages that cache plans (such as
 813     <application>PL/pgSQL</>).
 814    </para>
 815
 816    <para>
 817     Because of the snapshotting behavior of MVCC (see <xref linkend="mvcc">)
 818     a function containing only <command>SELECT</> commands can safely be
 819     marked <literal>STABLE</>, even if it selects from tables that might be
 820     undergoing modifications by concurrent queries.
 821     <productname>PostgreSQL</productname> will execute a <literal>STABLE</>
 822     function using the snapshot established for the calling query, and so it
 823     will see a fixed view of the database throughout that query.
 824     Also note
 825     that the <function>current_timestamp</> family of functions qualify
 826     as stable, since their values do not change within a transaction.
 827    </para>
 828
 829    <para>
 830     The same snapshotting behavior is used for <command>SELECT</> commands
 831     within <literal>IMMUTABLE</> functions.  It is generally unwise to select
 832     from database tables within an <literal>IMMUTABLE</> function at all,
 833     since the immutability will be broken if the table contents ever change.
 834     However, <productname>PostgreSQL</productname> does not enforce that you
 835     do not do that.
 836    </para>
 837
 838    <para>
 839     A common error is to label a function <literal>IMMUTABLE</> when its
 840     results depend on a configuration parameter.  For example, a function
 841     that manipulates timestamps might well have results that depend on the
 842     <xref linkend="guc-timezone"> setting.  For safety, such functions should
 843     be labeled <literal>STABLE</> instead.
 844    </para>
 845
 846    <note>
 847     <para>
 848      Before <productname>PostgreSQL</productname> release 8.0, the requirement
 849      that <literal>STABLE</> and <literal>IMMUTABLE</> functions cannot modify
 850      the database was not enforced by the system.  Release 8.0 enforces it
 851      by requiring SQL functions and procedural language functions of these
 852      categories to contain no SQL commands other than <command>SELECT</>.
 853      (This is not a completely bulletproof test, since such functions could
 854      still call <literal>VOLATILE</> functions that modify the database.
 855      If you do that, you will find that the <literal>STABLE</> or
 856      <literal>IMMUTABLE</> function does not notice the database changes
 857      applied by the called function.)
 858     </para>
 859    </note>
 860   </sect1>
 861
 862   <sect1 id="xfunc-pl">
 863    <title>Procedural Language Functions</title>
 864
 865    <para>
 866     <productname>PostgreSQL</productname> allows user-defined functions
 867     to be written in other languages besides SQL and C.  These other
 868     languages are generically called <firstterm>procedural
 869     languages</firstterm> (<acronym>PL</>s).
 870     Procedural languages aren't built into the
 871     <productname>PostgreSQL</productname> server; they are offered
 872     by loadable modules.
 873     See <xref linkend="xplang"> and following chapters for more
 874     information.
 875    </para>
 876   </sect1>
 877
 878   <sect1 id="xfunc-internal">
 879    <title>Internal Functions</title>
 880
 881    <indexterm zone="xfunc-internal"><primary>function</><secondary>internal</></>
 882
 883    <para>
 884     Internal functions are functions written in C that have been statically
 885     linked into the <productname>PostgreSQL</productname> server.
 886     The <quote>body</quote> of the function definition
 887     specifies the C-language name of the function, which need not be the
 888     same as the name being declared for SQL use.
 889     (For reasons of backwards compatibility, an empty body
 890     is accepted as meaning that the C-language function name is the
 891     same as the SQL name.)
 892    </para>
 893
 894    <para>
 895     Normally, all internal functions present in the
 896     server are declared during the initialization of the database cluster (<command>initdb</command>),
 897     but a user could use <command>CREATE FUNCTION</command>
 898     to create additional alias names for an internal function.
 899     Internal functions are declared in <command>CREATE FUNCTION</command>
 900     with language name <literal>internal</literal>.  For instance, to
 901     create an alias for the <function>sqrt</function> function:
 902 <programlisting>
 903 CREATE FUNCTION square_root(double precision) RETURNS double precision
 904     AS 'dsqrt'
 905     LANGUAGE internal
 906     STRICT;
 907 </programlisting>
 908     (Most internal functions expect to be declared <quote>strict</quote>.)
 909    </para>
 910
 911    <note>
 912     <para>
 913      Not all <quote>predefined</quote> functions are
 914      <quote>internal</quote> in the above sense.  Some predefined
 915      functions are written in SQL.
 916     </para>
 917    </note>
 918   </sect1>
 919
 920   <sect1 id="xfunc-c">
 921    <title>C-Language Functions</title>
 922
 923    <indexterm zone="xfunc-sql">
 924     <primary>function</primary>
 925     <secondary>user-defined</secondary>
 926     <tertiary>in C</tertiary>
 927    </indexterm>
 928
 929    <para>
 930     User-defined functions can be written in C (or a language that can
 931     be made compatible with C, such as C++).  Such functions are
 932     compiled into dynamically loadable objects (also called shared
 933     libraries) and are loaded by the server on demand.  The dynamic
 934     loading feature is what distinguishes <quote>C language</> functions
 935     from <quote>internal</> functions &mdash; the actual coding conventions
 936     are essentially the same for both.  (Hence, the standard internal
 937     function library is a rich source of coding examples for user-defined
 938     C functions.)
 939    </para>
 940
 941    <para>
 942     Two different calling conventions are currently used for C functions.
 943     The newer <quote>version 1</quote> calling convention is indicated by writing
 944     a <literal>PG_FUNCTION_INFO_V1()</literal> macro call for the function,
 945     as illustrated below.  Lack of such a macro indicates an old-style
 946     (<quote>version 0</quote>) function.  The language name specified in <command>CREATE FUNCTION</command>
 947     is <literal>C</literal> in either case.  Old-style functions are now deprecated
 948     because of portability problems and lack of functionality, but they
 949     are still supported for compatibility reasons.
 950    </para>
 951
 952   <sect2 id="xfunc-c-dynload">
 953    <title>Dynamic Loading</title>
 954
 955    <indexterm zone="xfunc-c-dynload">
 956     <primary>dynamic loading</primary>
 957    </indexterm>
 958
 959    <para>
 960     The first time a user-defined function in a particular
 961     loadable object file is called in a session,
 962     the dynamic loader loads that object file into memory so that the
 963     function can be called.  The <command>CREATE FUNCTION</command>
 964     for a user-defined C function must therefore specify two pieces of
 965     information for the function: the name of the loadable
 966     object file, and the C name (link symbol) of the specific function to call
 967     within that object file.  If the C name is not explicitly specified then
 968     it is assumed to be the same as the SQL function name.
 969    </para>
 970
 971    <para>
 972     The following algorithm is used to locate the shared object file
 973     based on the name given in the <command>CREATE FUNCTION</command>
 974     command:
 975
 976     <orderedlist>
 977      <listitem>
 978       <para>
 979        If the name is an absolute path, the given file is loaded.
 980       </para>
 981      </listitem>
 982
 983      <listitem>
 984       <para>
 985        If the name starts with the string <literal>$libdir</literal>,
 986        that part is replaced by the <productname>PostgreSQL</> package
 987         library directory
 988        name, which is determined at build time.<indexterm><primary>$libdir</></>
 989       </para>
 990      </listitem>
 991
 992      <listitem>
 993       <para>
 994        If the name does not contain a directory part, the file is
 995        searched for in the path specified by the configuration variable
 996        <xref linkend="guc-dynamic-library-path">.<indexterm><primary>dynamic_library_path</></>
 997       </para>
 998      </listitem>
 999
1000      <listitem>
1001       <para>
1002        Otherwise (the file was not found in the path, or it contains a
1003        non-absolute directory part), the dynamic loader will try to
1004        take the name as given, which will most likely fail.  (It is
1005        unreliable to depend on the current working directory.)
1006       </para>
1007      </listitem>
1008     </orderedlist>
1009
1010     If this sequence does not work, the platform-specific shared
1011     library file name extension (often <filename>.so</filename>) is
1012     appended to the given name and this sequence is tried again.  If
1013     that fails as well, the load will fail.
1014    </para>
1015
1016    <para>
1017     The user ID the <productname>PostgreSQL</productname> server runs
1018     as must be able to traverse the path to the file you intend to
1019     load.  Making the file or a higher-level directory not readable
1020     and/or not executable by the <systemitem>postgres</systemitem>
1021     user is a common mistake.
1022    </para>
1023
1024    <para>
1025     In any case, the file name that is given in the
1026     <command>CREATE FUNCTION</command> command is recorded literally
1027     in the system catalogs, so if the file needs to be loaded again
1028     the same procedure is applied.
1029    </para>
1030
1031    <note>
1032     <para>
1033      <productname>PostgreSQL</productname> will not compile a C function
1034      automatically.  The object file must be compiled before it is referenced
1035      in a <command>CREATE
1036      FUNCTION</> command.  See <xref linkend="dfunc"> for additional
1037      information.
1038     </para>
1039    </note>
1040
1041    <para>
1042     After it is used for the first time, a dynamically loaded object
1043     file is retained in memory.  Future calls in the same session to
1044     the function(s) in that file will only incur the small overhead of
1045     a symbol table lookup.  If you need to force a reload of an object
1046     file, for example after recompiling it, use the <command>LOAD</>
1047     command or begin a fresh session.
1048    </para>
1049
1050    <para>
1051     It is recommended to locate shared libraries either relative to
1052     <literal>$libdir</literal> or through the dynamic library path.
1053     This simplifies version upgrades if the new installation is at a
1054     different location.  The actual directory that
1055     <literal>$libdir</literal> stands for can be found out with the
1056     command <literal>pg_config --pkglibdir</literal>.
1057    </para>
1058
1059    <para>
1060     Before <productname>PostgreSQL</productname> release 7.2, only
1061     exact absolute paths to object files could be specified in
1062     <command>CREATE FUNCTION</>.  This approach is now deprecated
1063     since it makes the function definition unnecessarily unportable.
1064     It's best to specify just the shared library name with no path nor
1065     extension, and let the search mechanism provide that information
1066     instead.
1067    </para>
1068   </sect2>
1069
1070    <sect2 id="xfunc-c-basetype">
1071     <title>Base Types in C-Language Functions</title>
1072
1073     <indexterm zone="xfunc-c-basetype">
1074      <primary>data type</primary>
1075      <secondary>internal organisation</secondary>
1076     </indexterm>
1077
1078     <para>
1079      To know how to write C-language functions, you need to know how
1080      <productname>PostgreSQL</productname> internally represents base
1081      data types and how they can be passed to and from functions.
1082      Internally, <productname>PostgreSQL</productname> regards a base
1083      type as a <quote>blob of memory</quote>.  The user-defined
1084      functions that you define over a type in turn define the way that
1085      <productname>PostgreSQL</productname> can operate on it.  That
1086      is, <productname>PostgreSQL</productname> will only store and
1087      retrieve the data from disk and use your user-defined functions
1088      to input, process, and output the data.
1089     </para>
1090
1091     <para>
1092      Base types can have one of three internal formats:
1093
1094      <itemizedlist>
1095       <listitem>
1096        <para>
1097         pass by value, fixed-length
1098        </para>
1099       </listitem>
1100       <listitem>
1101        <para>
1102         pass by reference, fixed-length
1103        </para>
1104       </listitem>
1105       <listitem>
1106        <para>
1107         pass by reference, variable-length
1108        </para>
1109       </listitem>
1110      </itemizedlist>
1111     </para>
1112
1113     <para>
1114      By-value  types  can  only be 1, 2, or 4 bytes in length
1115      (also 8 bytes, if <literal>sizeof(Datum)</literal> is 8 on your machine).
1116      You should be careful
1117      to define your types such that  they  will  be  the  same
1118      size (in bytes) on all architectures.  For example, the
1119      <literal>long</literal> type is dangerous because  it
1120      is 4 bytes on some machines and 8 bytes on others, whereas
1121      <type>int</type>  type  is  4  bytes  on  most
1122      Unix machines.  A reasonable implementation of
1123      the  <type>int4</type>  type  on  Unix
1124      machines might be:
1125
1126 <programlisting>
1127 /* 4-byte integer, passed by value */
1128 typedef int int4;
1129 </programlisting>
1130     </para>
1131
1132     <para>
1133      On  the  other hand, fixed-length types of any size may
1134      be passed by-reference.  For example, here is a  sample
1135      implementation of a <productname>PostgreSQL</productname> type:
1136
1137 <programlisting>
1138 /* 16-byte structure, passed by reference */
1139 typedef struct
1140 {
1141     double  x, y;
1142 } Point;
1143 </programlisting>
1144
1145      Only  pointers  to  such types can be used when passing
1146      them in and out of <productname>PostgreSQL</productname> functions.
1147      To return a value of such a type, allocate the right amount of
1148      memory with <literal>palloc</literal>, fill in the allocated memory,
1149      and return a pointer to it.  (You can also return an input value
1150      that has the same type as the return value directly by returning
1151      the pointer to the input value.  <emphasis>Never</> modify the
1152      contents of a pass-by-reference input value, however.)
1153     </para>
1154
1155     <para>
1156      Finally, all variable-length types must also be  passed
1157      by  reference.   All  variable-length  types must begin
1158      with a length field of exactly 4 bytes, and all data to
1159      be  stored within that type must be located in the memory
1160      immediately  following  that  length  field.   The
1161      length field contains the total length of the structure,
1162      that is,  it  includes  the  size  of  the  length  field
1163      itself.
1164     </para>
1165
1166     <para>
1167      As an example, we can define the type <type>text</type> as
1168      follows:
1169
1170 <programlisting>
1171 typedef struct {
1172     int4 length;
1173     char data[1];
1174 } text;
1175 </programlisting>
1176
1177      Obviously,  the  data  field declared here is not long enough to hold
1178      all possible strings.  Since it's impossible to declare a variable-size
1179      structure in <acronym>C</acronym>, we rely on the knowledge that the
1180      <acronym>C</acronym> compiler won't range-check array subscripts.  We
1181      just allocate the necessary amount of space and then access the array as
1182      if it were declared the right length.  (This is a common trick, which
1183      you can read about in many textbooks about C.)
1184     </para>
1185
1186     <para>
1187      When manipulating
1188      variable-length types, we must  be  careful  to  allocate
1189      the  correct amount  of memory and set the length field correctly.
1190      For example, if we wanted to  store  40  bytes  in  a <structname>text</>
1191      structure, we might use a code fragment like this:
1192
1193 <programlisting>
1194 #include "postgres.h"
1195 ...
1196 char buffer[40]; /* our source data */
1197 ...
1198 text *destination = (text *) palloc(VARHDRSZ + 40);
1199 destination-&gt;length = VARHDRSZ + 40;
1200 memcpy(destination-&gt;data, buffer, 40);
1201 ...
1202 </programlisting>
1203
1204      <literal>VARHDRSZ</> is the same as <literal>sizeof(int4)</>, but
1205      it's considered good style to use the macro <literal>VARHDRSZ</>
1206      to refer to the size of the overhead for a variable-length type.
1207     </para>
1208
1209     <para>
1210      <xref linkend="xfunc-c-type-table"> specifies which C type
1211      corresponds to which SQL type when writing a C-language function
1212      that uses a built-in type of <productname>PostgreSQL</>.
1213      The <quote>Defined In</quote> column gives the header file that
1214      needs to be included to get the type definition.  (The actual
1215      definition may be in a different file that is included by the
1216      listed file.  It is recommended that users stick to the defined
1217      interface.)  Note that you should always include
1218      <filename>postgres.h</filename> first in any source file, because
1219      it declares a number of things that you will need anyway.
1220     </para>
1221
1222      <table tocentry="1" id="xfunc-c-type-table">
1223       <title>Equivalent C Types for Built-In SQL Types</title>
1224       <tgroup cols="3">
1225        <thead>
1226         <row>
1227          <entry>
1228           SQL Type
1229          </entry>
1230          <entry>
1231           C Type
1232          </entry>
1233          <entry>
1234           Defined In
1235          </entry>
1236         </row>
1237        </thead>
1238        <tbody>
1239         <row>
1240          <entry><type>abstime</type></entry>
1241          <entry><type>AbsoluteTime</type></entry>
1242          <entry><filename>utils/nabstime.h</filename></entry>
1243         </row>
1244         <row>
1245          <entry><type>boolean</type></entry>
1246          <entry><type>bool</type></entry>
1247          <entry><filename>postgres.h</filename> (maybe compiler built-in)</entry>
1248         </row>
1249         <row>
1250          <entry><type>box</type></entry>
1251          <entry><type>BOX*</type></entry>
1252          <entry><filename>utils/geo_decls.h</filename></entry>
1253         </row>
1254         <row>
1255          <entry><type>bytea</type></entry>
1256          <entry><type>bytea*</type></entry>
1257          <entry><filename>postgres.h</filename></entry>
1258         </row>
1259         <row>
1260          <entry><type>"char"</type></entry>
1261          <entry><type>char</type></entry>
1262          <entry>(compiler built-in)</entry>
1263         </row>
1264         <row>
1265          <entry><type>character</type></entry>
1266          <entry><type>BpChar*</type></entry>
1267          <entry><filename>postgres.h</filename></entry>
1268         </row>
1269         <row>
1270          <entry><type>cid</type></entry>
1271          <entry><type>CommandId</type></entry>
1272          <entry><filename>postgres.h</filename></entry>
1273         </row>
1274         <row>
1275          <entry><type>date</type></entry>
1276          <entry><type>DateADT</type></entry>
1277          <entry><filename>utils/date.h</filename></entry>
1278         </row>
1279         <row>
1280          <entry><type>smallint</type> (<type>int2</type>)</entry>
1281          <entry><type>int2</type> or <type>int16</type></entry>
1282          <entry><filename>postgres.h</filename></entry>
1283         </row>
1284         <row>
1285          <entry><type>int2vector</type></entry>
1286          <entry><type>int2vector*</type></entry>
1287          <entry><filename>postgres.h</filename></entry>
1288         </row>
1289         <row>
1290          <entry><type>integer</type> (<type>int4</type>)</entry>
1291          <entry><type>int4</type> or <type>int32</type></entry>
1292          <entry><filename>postgres.h</filename></entry>
1293         </row>
1294         <row>
1295          <entry><type>real</type> (<type>float4</type>)</entry>
1296          <entry><type>float4*</type></entry>
1297         <entry><filename>postgres.h</filename></entry>
1298         </row>
1299         <row>
1300          <entry><type>double precision</type> (<type>float8</type>)</entry>
1301          <entry><type>float8*</type></entry>
1302          <entry><filename>postgres.h</filename></entry>
1303         </row>
1304         <row>
1305          <entry><type>interval</type></entry>
1306          <entry><type>Interval*</type></entry>
1307          <entry><filename>utils/timestamp.h</filename></entry>
1308         </row>
1309         <row>
1310          <entry><type>lseg</type></entry>
1311          <entry><type>LSEG*</type></entry>
1312          <entry><filename>utils/geo_decls.h</filename></entry>
1313         </row>
1314         <row>
1315          <entry><type>name</type></entry>
1316          <entry><type>Name</type></entry>
1317          <entry><filename>postgres.h</filename></entry>
1318         </row>
1319         <row>
1320          <entry><type>oid</type></entry>
1321          <entry><type>Oid</type></entry>
1322          <entry><filename>postgres.h</filename></entry>
1323         </row>
1324         <row>
1325          <entry><type>oidvector</type></entry>
1326          <entry><type>oidvector*</type></entry>
1327          <entry><filename>postgres.h</filename></entry>
1328         </row>
1329         <row>
1330          <entry><type>path</type></entry>
1331          <entry><type>PATH*</type></entry>
1332          <entry><filename>utils/geo_decls.h</filename></entry>
1333         </row>
1334         <row>
1335          <entry><type>point</type></entry>
1336          <entry><type>POINT*</type></entry>
1337          <entry><filename>utils/geo_decls.h</filename></entry>
1338         </row>
1339         <row>
1340          <entry><type>regproc</type></entry>
1341          <entry><type>regproc</type></entry>
1342          <entry><filename>postgres.h</filename></entry>
1343         </row>
1344         <row>
1345          <entry><type>reltime</type></entry>
1346          <entry><type>RelativeTime</type></entry>
1347          <entry><filename>utils/nabstime.h</filename></entry>
1348         </row>
1349         <row>
1350          <entry><type>text</type></entry>
1351          <entry><type>text*</type></entry>
1352          <entry><filename>postgres.h</filename></entry>
1353         </row>
1354         <row>
1355          <entry><type>tid</type></entry>
1356          <entry><type>ItemPointer</type></entry>
1357          <entry><filename>storage/itemptr.h</filename></entry>
1358         </row>
1359         <row>
1360          <entry><type>time</type></entry>
1361          <entry><type>TimeADT</type></entry>
1362          <entry><filename>utils/date.h</filename></entry>
1363         </row>
1364         <row>
1365          <entry><type>time with time zone</type></entry>
1366          <entry><type>TimeTzADT</type></entry>
1367          <entry><filename>utils/date.h</filename></entry>
1368         </row>
1369         <row>
1370          <entry><type>timestamp</type></entry>
1371          <entry><type>Timestamp*</type></entry>
1372          <entry><filename>utils/timestamp.h</filename></entry>
1373         </row>
1374         <row>
1375          <entry><type>tinterval</type></entry>
1376          <entry><type>TimeInterval</type></entry>
1377          <entry><filename>utils/nabstime.h</filename></entry>
1378         </row>
1379         <row>
1380          <entry><type>varchar</type></entry>
1381          <entry><type>VarChar*</type></entry>
1382          <entry><filename>postgres.h</filename></entry>
1383         </row>
1384         <row>
1385          <entry><type>xid</type></entry>
1386          <entry><type>TransactionId</type></entry>
1387          <entry><filename>postgres.h</filename></entry>
1388         </row>
1389        </tbody>
1390       </tgroup>
1391      </table>
1392
1393     <para>
1394      Now that we've gone over all of the possible structures
1395      for base types, we can show some examples of real functions.
1396     </para>
1397    </sect2>
1398
1399    <sect2>
1400     <title>Calling Conventions Version 0 for C-Language Functions</title>
1401
1402     <para>
1403      We present the <quote>old style</quote> calling convention first &mdash; although
1404      this approach is now deprecated, it's easier to get a handle on
1405      initially.  In the version-0 method, the arguments and result
1406      of the C function are just declared in normal C style, but being
1407      careful to use the C representation of each SQL data type as shown
1408      above.
1409     </para>
1410
1411     <para>
1412      Here are some examples:
1413
1414 <programlisting>
1415 #include "postgres.h"
1416 #include &lt;string.h&gt;
1417
1418 /* by value */
1419
1420 int
1421 add_one(int arg)
1422 {
1423     return arg + 1;
1424 }
1425
1426 /* by reference, fixed length */
1427
1428 float8 *
1429 add_one_float8(float8 *arg)
1430 {
1431     float8    *result = (float8 *) palloc(sizeof(float8));
1432
1433     *result = *arg + 1.0;
1434
1435     return result;
1436 }
1437
1438 Point *
1439 makepoint(Point *pointx, Point *pointy)
1440 {
1441     Point     *new_point = (Point *) palloc(sizeof(Point));
1442
1443     new_point->x = pointx->x;
1444     new_point->y = pointy->y;
1445
1446     return new_point;
1447 }
1448
1449 /* by reference, variable length */
1450
1451 text *
1452 copytext(text *t)
1453 {
1454     /*
1455      * VARSIZE is the total size of the struct in bytes.
1456      */
1457     text *new_t = (text *) palloc(VARSIZE(t));
1458     VARATT_SIZEP(new_t) = VARSIZE(t);
1459     /*
1460      * VARDATA is a pointer to the data region of the struct.
1461      */
1462     memcpy((void *) VARDATA(new_t), /* destination */
1463            (void *) VARDATA(t),     /* source */
1464            VARSIZE(t)-VARHDRSZ);    /* how many bytes */
1465     return new_t;
1466 }
1467
1468 text *
1469 concat_text(text *arg1, text *arg2)
1470 {
1471     int32 new_text_size = VARSIZE(arg1) + VARSIZE(arg2) - VARHDRSZ;
1472     text *new_text = (text *) palloc(new_text_size);
1473
1474     VARATT_SIZEP(new_text) = new_text_size;
1475     memcpy(VARDATA(new_text), VARDATA(arg1), VARSIZE(arg1)-VARHDRSZ);
1476     memcpy(VARDATA(new_text) + (VARSIZE(arg1)-VARHDRSZ),
1477            VARDATA(arg2), VARSIZE(arg2)-VARHDRSZ);
1478     return new_text;
1479 }
1480 </programlisting>
1481     </para>
1482
1483     <para>
1484      Supposing that the above code has been prepared in file
1485      <filename>funcs.c</filename> and compiled into a shared object,
1486      we could define the functions to <productname>PostgreSQL</productname>
1487      with commands like this:
1488
1489 <programlisting>
1490 CREATE FUNCTION add_one(integer) RETURNS integer
1491      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'add_one'
1492      LANGUAGE C STRICT;
1493
1494 -- note overloading of SQL function name "add_one"
1495 CREATE FUNCTION add_one(double precision) RETURNS double precision
1496      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'add_one_float8'
1497      LANGUAGE C STRICT;
1498
1499 CREATE FUNCTION makepoint(point, point) RETURNS point
1500      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'makepoint'
1501      LANGUAGE C STRICT;
1502
1503 CREATE FUNCTION copytext(text) RETURNS text
1504      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'copytext'
1505      LANGUAGE C STRICT;
1506
1507 CREATE FUNCTION concat_text(text, text) RETURNS text
1508      AS '<replaceable>DIRECTORY</replaceable>/funcs', 'concat_text',
1509      LANGUAGE C STRICT;
1510 </programlisting>
1511     </para>
1512
1513     <para>
1514      Here, <replaceable>DIRECTORY</replaceable> stands for the
1515      directory of the shared library file (for instance the
1516      <productname>PostgreSQL</productname> tutorial directory, which
1517      contains the code for the examples used in this section).
1518      (Better style would be to use just <literal>'funcs'</> in the
1519      <literal>AS</> clause, after having added
1520      <replaceable>DIRECTORY</replaceable> to the search path.  In any
1521      case, we may omit the system-specific extension for a shared
1522      library, commonly <literal>.so</literal> or
1523      <literal>.sl</literal>.)
1524     </para>
1525
1526     <para>
1527      Notice that we have specified the functions as <quote>strict</quote>,
1528      meaning that
1529      the system should automatically assume a null result if any input
1530      value is null.  By doing this, we avoid having to check for null inputs
1531      in the function code.  Without this, we'd have to check for null values
1532      explicitly, by checking for a null pointer for each
1533      pass-by-reference argument.  (For pass-by-value arguments, we don't
1534      even have a way to check!)
1535     </para>
1536
1537     <para>
1538      Although this calling convention is simple to use,
1539      it is not very portable; on some architectures there are problems
1540      with passing data types that are smaller than <type>int</type> this way.  Also, there is
1541      no simple way to return a null result, nor to cope with null arguments
1542      in any way other than making the function strict.  The version-1
1543      convention, presented next, overcomes these objections.
1544     </para>
1545    </sect2>
1546
1547    <sect2>
1548     <title>Calling Conventions Version 1 for C-Language Functions</title>
1549
1550     <para>
1551      The version-1 calling convention relies on macros to suppress most
1552      of the complexity of passing arguments and results.  The C declaration
1553      of a version-1 function is always
1554 <programlisting>
1555 Datum funcname(PG_FUNCTION_ARGS)
1556 </programlisting>
1557      In addition, the macro call
1558 <programlisting>
1559 PG_FUNCTION_INFO_V1(funcname);
1560 </programlisting>
1561      must appear in the same source file.  (Conventionally. it's
1562      written just before the function itself.)  This macro call is not
1563      needed for <literal>internal</>-language functions, since
1564      <productname>PostgreSQL</> assumes that all internal functions
1565      use the version-1 convention.  It is, however, required for
1566      dynamically-loaded functions.
1567     </para>
1568
1569     <para>
1570      In a version-1 function, each actual argument is fetched using a
1571      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function>
1572      macro that corresponds to the argument's data type, and the
1573      result is returned using a
1574      <function>PG_RETURN_<replaceable>xxx</replaceable>()</function>
1575      macro for the return type.
1576      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function>
1577      takes as its argument the number of the function argument to
1578      fetch, where the count starts at 0.
1579      <function>PG_RETURN_<replaceable>xxx</replaceable>()</function>
1580      takes as its argument the actual value to return.
1581     </para>
1582
1583     <para>
1584      Here we show the same functions as above, coded in version-1 style:
1585
1586 <programlisting>
1587 #include "postgres.h"
1588 #include &lt;string.h&gt;
1589 #include "fmgr.h"
1590
1591 /* by value */
1592
1593 PG_FUNCTION_INFO_V1(add_one);
1594
1595 Datum
1596 add_one(PG_FUNCTION_ARGS)
1597 {
1598     int32   arg = PG_GETARG_INT32(0);
1599
1600     PG_RETURN_INT32(arg + 1);
1601 }
1602
1603 /* b reference, fixed length */
1604
1605 PG_FUNCTION_INFO_V1(add_one_float8);
1606
1607 Datum
1608 add_one_float8(PG_FUNCTION_ARGS)
1609 {
1610     /* The macros for FLOAT8 hide its pass-by-reference nature. */
1611     float8   arg = PG_GETARG_FLOAT8(0);
1612
1613     PG_RETURN_FLOAT8(arg + 1.0);
1614 }
1615
1616 PG_FUNCTION_INFO_V1(makepoint);
1617
1618 Datum
1619 makepoint(PG_FUNCTION_ARGS)
1620 {
1621     /* Here, the pass-by-reference nature of Point is not hidden. */
1622     Point     *pointx = PG_GETARG_POINT_P(0);
1623     Point     *pointy = PG_GETARG_POINT_P(1);
1624     Point     *new_point = (Point *) palloc(sizeof(Point));
1625
1626     new_point->x = pointx->x;
1627     new_point->y = pointy->y;
1628
1629     PG_RETURN_POINT_P(new_point);
1630 }
1631
1632 /* by reference, variable length */
1633
1634 PG_FUNCTION_INFO_V1(copytext);
1635
1636 Datum
1637 copytext(PG_FUNCTION_ARGS)
1638 {
1639     text     *t = PG_GETARG_TEXT_P(0);
1640     /*
1641      * VARSIZE is the total size of the struct in bytes.
1642      */
1643     text     *new_t = (text *) palloc(VARSIZE(t));
1644     VARATT_SIZEP(new_t) = VARSIZE(t);
1645     /*
1646      * VARDATA is a pointer to the data region of the struct.
1647      */
1648     memcpy((void *) VARDATA(new_t), /* destination */
1649            (void *) VARDATA(t),     /* source */
1650            VARSIZE(t)-VARHDRSZ);    /* how many bytes */
1651     PG_RETURN_TEXT_P(new_t);
1652 }
1653
1654 PG_FUNCTION_INFO_V1(concat_text);
1655
1656 Datum
1657 concat_text(PG_FUNCTION_ARGS)
1658 {
1659     text  *arg1 = PG_GETARG_TEXT_P(0);
1660     text  *arg2 = PG_GETARG_TEXT_P(1);
1661     int32 new_text_size = VARSIZE(arg1) + VARSIZE(arg2) - VARHDRSZ;
1662     text *new_text = (text *) palloc(new_text_size);
1663
1664     VARATT_SIZEP(new_text) = new_text_size;
1665     memcpy(VARDATA(new_text), VARDATA(arg1), VARSIZE(arg1)-VARHDRSZ);
1666     memcpy(VARDATA(new_text) + (VARSIZE(arg1)-VARHDRSZ),
1667            VARDATA(arg2), VARSIZE(arg2)-VARHDRSZ);
1668     PG_RETURN_TEXT_P(new_text);
1669 }
1670 </programlisting>
1671     </para>
1672
1673     <para>
1674      The <command>CREATE FUNCTION</command> commands are the same as
1675      for the version-0 equivalents.
1676     </para>
1677
1678     <para>
1679      At first glance, the version-1 coding conventions may appear to
1680      be just pointless obscurantism.  They do, however, offer a number
1681      of improvements, because the macros can hide unnecessary detail.
1682      An example is that in coding <function>add_one_float8</>, we no longer need to
1683      be aware that <type>float8</type> is a pass-by-reference type.  Another
1684      example is that the <literal>GETARG</> macros for variable-length types allow
1685      for more efficient fetching of <quote>toasted</quote> (compressed or
1686      out-of-line) values.
1687     </para>
1688
1689     <para>
1690      One big improvement in version-1 functions is better handling of null
1691      inputs and results.  The macro <function>PG_ARGISNULL(<replaceable>n</>)</function>
1692      allows a function to test whether each input is null.  (Of course, doing
1693      this is only necessary in functions not declared <quote>strict</>.)
1694      As with the
1695      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function> macros,
1696      the input arguments are counted beginning at zero.  Note that one
1697      should refrain from executing
1698      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function> until
1699      one has verified that the argument isn't null.
1700      To return a null result, execute <function>PG_RETURN_NULL()</function>;
1701      this works in both strict and nonstrict functions.
1702     </para>
1703
1704     <para>
1705      Other options provided in the new-style interface are two
1706      variants of the
1707      <function>PG_GETARG_<replaceable>xxx</replaceable>()</function>
1708      macros. The first of these,
1709      <function>PG_GETARG_<replaceable>xxx</replaceable>_COPY()</function>,
1710      guarantees to return a copy of the specified argument that is
1711      safe for writing into. (The normal macros will sometimes return a
1712      pointer to a value that is physically stored in a table, which
1713      must not be written to. Using the
1714      <function>PG_GETARG_<replaceable>xxx</replaceable>_COPY()</function>
1715      macros guarantees a writable result.)
1716     The second variant consists of the
1717     <function>PG_GETARG_<replaceable>xxx</replaceable>_SLICE()</function>
1718     macros which take three arguments. The first is the number of the
1719     function argument (as above). The second and third are the offset and
1720     length of the segment to be returned. Offsets are counted from
1721     zero, and a negative length requests that the remainder of the
1722     value be returned. These macros provide more efficient access to
1723     parts of large values in the case where they have storage type
1724     <quote>external</quote>. (The storage type of a column can be specified using
1725     <literal>ALTER TABLE <replaceable>tablename</replaceable> ALTER
1726     COLUMN <replaceable>colname</replaceable> SET STORAGE
1727     <replaceable>storagetype</replaceable></literal>. <replaceable>storagetype</replaceable> is one of
1728     <literal>plain</>, <literal>external</>, <literal>extended</literal>,
1729      or <literal>main</>.)
1730     </para>
1731
1732     <para>
1733      Finally, the version-1 function call conventions make it possible
1734      to return set results (<xref linkend="xfunc-c-return-set">) and
1735      implement trigger functions (<xref linkend="triggers">) and
1736      procedural-language call handlers (<xref
1737      linkend="plhandler">).  Version-1 code is also more
1738      portable than version-0, because it does not break restrictions
1739      on function call protocol in the C standard.  For more details
1740      see <filename>src/backend/utils/fmgr/README</filename> in the
1741      source distribution.
1742     </para>
1743    </sect2>
1744
1745    <sect2>
1746     <title>Writing Code</title>
1747
1748     <para>
1749      Before we turn to the more advanced topics, we should discuss
1750      some coding rules for <productname>PostgreSQL</productname>
1751      C-language functions.  While it may be possible to load functions
1752      written in languages other than C into
1753      <productname>PostgreSQL</productname>, this is usually difficult
1754      (when it is possible at all) because other languages, such as
1755      C++, FORTRAN, or Pascal often do not follow the same calling
1756      convention as C.  That is, other languages do not pass argument
1757      and return values between functions in the same way.  For this
1758      reason, we will assume that your C-language functions are
1759      actually written in C.
1760     </para>
1761
1762     <para>
1763      The basic rules for writing and building C functions are as follows:
1764
1765      <itemizedlist>
1766       <listitem>
1767        <para>
1768         Use <literal>pg_config
1769         --includedir-server</literal><indexterm><primary>pg_config</><secondary>with user-defined C functions</></>
1770         to find out where the <productname>PostgreSQL</> server header
1771         files are installed on your system (or the system that your
1772         users will be running on).  This option is new with
1773         <productname>PostgreSQL</> 7.2.  For
1774         <productname>PostgreSQL</> 7.1 you should use the option
1775         <option>--includedir</option>.  (<command>pg_config</command>
1776         will exit with a non-zero status if it encounters an unknown
1777         option.)  For releases prior to 7.1 you will have to guess,
1778         but since that was before the current calling conventions were
1779         introduced, it is unlikely that you want to support those
1780         releases.
1781        </para>
1782       </listitem>
1783
1784       <listitem>
1785        <para>
1786         When allocating memory, use the
1787         <productname>PostgreSQL</productname> functions
1788         <function>palloc</function><indexterm><primary>palloc</></> and <function>pfree</function><indexterm><primary>pfree</></>
1789         instead of the corresponding C library functions
1790         <function>malloc</function> and <function>free</function>.
1791         The memory allocated by <function>palloc</function> will be
1792         freed automatically at the end of each transaction, preventing
1793         memory leaks.
1794        </para>
1795       </listitem>
1796
1797       <listitem>
1798        <para>
1799         Always zero the bytes of your structures using
1800         <function>memset</function>.  Without this, it's difficult to
1801         support hash indexes or hash joins, as you must pick out only
1802         the significant bits of your data structure to compute a hash.
1803         Even if you initialize all fields of your structure, there may be
1804         alignment padding (holes in the structure) that may contain
1805         garbage values.
1806        </para>
1807       </listitem>
1808
1809       <listitem>
1810        <para>
1811         Most of the internal <productname>PostgreSQL</productname>
1812         types are declared in <filename>postgres.h</filename>, while
1813         the function manager interfaces
1814         (<symbol>PG_FUNCTION_ARGS</symbol>, etc.)  are in
1815         <filename>fmgr.h</filename>, so you will need to include at
1816         least these two files.  For portability reasons it's best to
1817         include <filename>postgres.h</filename> <emphasis>first</>,
1818         before any other system or user header files.  Including
1819         <filename>postgres.h</filename> will also include
1820         <filename>elog.h</filename> and <filename>palloc.h</filename>
1821         for you.
1822        </para>
1823       </listitem>
1824
1825       <listitem>
1826        <para>
1827         Symbol names defined within object files must not conflict
1828         with each other or with symbols defined in the
1829         <productname>PostgreSQL</productname> server executable.  You
1830         will have to rename your functions or variables if you get
1831         error messages to this effect.
1832        </para>
1833       </listitem>
1834
1835       <listitem>
1836        <para>
1837         Compiling and linking your code so that it can be dynamically
1838         loaded into <productname>PostgreSQL</productname> always
1839         requires special flags.  See <xref linkend="dfunc"> for a
1840         detailed explanation of how to do it for your particular
1841         operating system.
1842        </para>
1843       </listitem>
1844      </itemizedlist>
1845     </para>
1846    </sect2>
1847
1848 &dfunc;
1849
1850    <sect2 id="xfunc-c-pgxs">
1851     <title>Extension Building Infrastructure</title>
1852
1853    <indexterm zone="xfunc-c-pgxs">
1854     <primary>pgxs</primary>
1855    </indexterm>
1856
1857    <para>
1858     If you are thinking about distributing your
1859     <productname>PostgreSQL</> extension modules, setting up a
1860     portable build system for them can be fairly difficult.  Therefore
1861     the <productname>PostgreSQL</> installation provides a build
1862     infrastructure for extensions, called <acronym>PGXS</acronym>, so
1863     that simple extension modules can be built simply against an
1864     already installed server.  Note that this infrastructure is not
1865     intended to be a universal build system framework that can be used
1866     to build all software interfacing to <productname>PostgreSQL</>;
1867     it simply automates common build rules for simple server extension
1868     modules.  For more complicated packages, you need to write your
1869     own build system.
1870    </para>
1871
1872    <para>
1873     To use the infrastructure for your extension, you must write a
1874     simple makefile.  In that makefile, you need to set some variables
1875     and finally include the global <acronym>PGXS</acronym> makefile.
1876     Here is an example that builds an extension module named
1877     <literal>isbn_issn</literal> consisting of a shared library, an
1878     SQL script, and a documentation text file:
1879 <programlisting>
1880 MODULES = isbn_issn
1881 DATA_built = isbn_issn.sql
1882 DOCS = README.isbn_issn
1883
1884 PGXS := $(shell pg_config --pgxs)
1885 include $(PGXS)
1886 </programlisting>
1887     The last two lines should always be the same.  Earlier in the
1888     file, you assign variables or add custom
1889     <application>make</application> rules.
1890    </para>
1891
1892    <para>
1893     The following variables can be set:
1894
1895     <variablelist>
1896      <varlistentry>
1897       <term><varname>MODULES</varname></term>
1898       <listitem>
1899        <para>
1900         list of shared objects to be build from source file with same
1901         stem (do not include suffix in this list)
1902        </para>
1903       </listitem>
1904      </varlistentry>
1905
1906      <varlistentry>
1907       <term><varname>DATA</varname></term>
1908       <listitem>
1909        <para>
1910         random files to install into <literal><replaceable>prefix</replaceable>/share/contrib</literal>
1911        </para>
1912       </listitem>
1913      </varlistentry>
1914
1915      <varlistentry>
1916       <term><varname>DATA_built</varname></term>
1917       <listitem>
1918        <para>
1919         random files to install into
1920         <literal><replaceable>prefix</replaceable>/share/contrib</literal>,
1921         which need to be built first
1922        </para>
1923       </listitem>
1924      </varlistentry>
1925
1926      <varlistentry>
1927       <term><varname>DOCS</varname></term>
1928       <listitem>
1929        <para>
1930         random files to install under
1931         <literal><replaceable>prefix</replaceable>/doc/contrib</literal>
1932        </para>
1933       </listitem>
1934      </varlistentry>
1935
1936      <varlistentry>
1937       <term><varname>SCRIPTS</varname></term>
1938       <listitem>
1939        <para>
1940         script files (not binaries) to install into
1941         <literal><replaceable>prefix</replaceable>/bin</literal>
1942        </para>
1943       </listitem>
1944      </varlistentry>
1945
1946      <varlistentry>
1947       <term><varname>SCRIPTS_built</varname></term>
1948       <listitem>
1949        <para>
1950         script files (not binaries) to install into
1951         <literal><replaceable>prefix</replaceable>/bin</literal>,
1952         which need to be built first
1953        </para>
1954       </listitem>
1955      </varlistentry>
1956
1957      <varlistentry>
1958       <term><varname>REGRESS</varname></term>
1959       <listitem>
1960        <para>
1961         list of regression test cases (without suffix)
1962        </para>
1963       </listitem>
1964      </varlistentry>
1965     </variablelist>
1966
1967     or at most one of these two:
1968
1969     <variablelist>
1970      <varlistentry>
1971       <term><varname>PROGRAM</varname></term>
1972       <listitem>
1973        <para>
1974         a binary program to build (list objects files in <varname>OBJS</varname>)
1975        </para>
1976       </listitem>
1977      </varlistentry>
1978
1979      <varlistentry>
1980       <term><varname>MODULE_big</varname></term>
1981       <listitem>
1982        <para>
1983         a shared object to build (list object files in <varname>OBJS</varname>)
1984        </para>
1985       </listitem>
1986      </varlistentry>
1987     </variablelist>
1988
1989     The following can also be set:
1990
1991     <variablelist>
1992
1993      <varlistentry>
1994       <term><varname>EXTRA_CLEAN</varname></term>
1995       <listitem>
1996        <para>
1997         extra files to remove in <literal>make clean</literal>
1998        </para>
1999       </listitem>
2000      </varlistentry>
2001
2002      <varlistentry>
2003       <term><varname>PG_CPPFLAGS</varname></term>
2004       <listitem>
2005        <para>
2006         will be added to <varname>CPPFLAGS</varname>
2007        </para>
2008       </listitem>
2009      </varlistentry>
2010
2011      <varlistentry>
2012       <term><varname>PG_LIBS</varname></term>
2013       <listitem>
2014        <para>
2015         will be added to <varname>PROGRAM</varname> link line
2016        </para>
2017       </listitem>
2018      </varlistentry>
2019
2020      <varlistentry>
2021       <term><varname>SHLIB_LINK</varname></term>
2022       <listitem>
2023        <para>
2024         will be added to <varname>MODULE_big</varname> link line
2025        </para>
2026       </listitem>
2027      </varlistentry>
2028     </variablelist>
2029    </para>
2030
2031    <para>
2032     Put this makefile as <literal>Makefile</literal> in the directory
2033     which holds your extension. Then you can do
2034     <literal>make</literal> to compile, and later <literal>make
2035     install</literal> to install your module.  The extension is
2036     compiled and installed for the
2037     <productname>PostgreSQL</productname> installation that
2038     corresponds to the first <command>pg_config</command> command
2039     found in your path.
2040    </para>
2041   </sect2>
2042
2043
2044    <sect2>
2045     <title>Composite-Type Arguments in C-Language Functions</title>
2046
2047     <para>
2048      Composite types do not have a fixed layout like C structures.
2049      Instances of a composite type may contain null fields.  In
2050      addition, composite types that are part of an inheritance
2051      hierarchy may have different fields than other members of the
2052      same inheritance hierarchy.  Therefore,
2053      <productname>PostgreSQL</productname> provides a function
2054      interface for accessing fields of composite types from C.
2055     </para>
2056
2057     <para>
2058      Suppose we want to write a function to answer the query
2059
2060 <programlisting>
2061 SELECT name, c_overpaid(emp, 1500) AS overpaid
2062     FROM emp
2063     WHERE name = 'Bill' OR name = 'Sam';
2064 </programlisting>
2065
2066      Using call conventions version 0, we can define
2067      <function>c_overpaid</> as:
2068
2069 <programlisting>
2070 #include "postgres.h"
2071 #include "executor/executor.h"  /* for GetAttributeByName() */
2072
2073 bool
2074 c_overpaid(HeapTupleHeader t, /* the current row of emp */
2075            int32 limit)
2076 {
2077     bool isnull;
2078     int32 salary;
2079
2080     salary = DatumGetInt32(GetAttributeByName(t, "salary", &amp;isnull));
2081     if (isnull)
2082         return false;
2083     return salary &gt; limit;
2084 }
2085 </programlisting>
2086
2087      In version-1 coding, the above would look like this:
2088
2089 <programlisting>
2090 #include "postgres.h"
2091 #include "executor/executor.h"  /* for GetAttributeByName() */
2092
2093 PG_FUNCTION_INFO_V1(c_overpaid);
2094
2095 Datum
2096 c_overpaid(PG_FUNCTION_ARGS)
2097 {
2098     HeapTupleHeader  t = PG_GETARG_HEAPTUPLEHEADER(0);
2099     int32            limit = PG_GETARG_INT32(1);
2100     bool isnull;
2101     Datum salary;
2102
2103     salary = GetAttributeByName(t, "salary", &amp;isnull);
2104     if (isnull)
2105         PG_RETURN_BOOL(false);
2106     /* Alternatively, we might prefer to do PG_RETURN_NULL() for null salary. */
2107
2108     PG_RETURN_BOOL(DatumGetInt32(salary) &gt; limit);
2109 }
2110 </programlisting>
2111     </para>
2112
2113     <para>
2114      <function>GetAttributeByName</function> is the
2115      <productname>PostgreSQL</productname> system function that
2116      returns attributes out of the specified row.  It has
2117      three arguments: the argument of type <type>HeapTupleHeader</type> passed
2118      into
2119      the  function, the name of the desired attribute, and a
2120      return parameter that tells whether  the  attribute
2121      is  null.   <function>GetAttributeByName</function> returns a <type>Datum</type>
2122      value that you can convert to the proper data type by using the
2123      appropriate <function>DatumGet<replaceable>XXX</replaceable>()</function>
2124      macro.  Note that the return value is meaningless if the null flag is
2125      set; always check the null flag before trying to do anything with the
2126      result.
2127     </para>
2128
2129     <para>
2130      There is also <function>GetAttributeByNum</function>, which selects
2131      the target attribute by column number instead of name.
2132     </para>
2133
2134     <para>
2135      The following command declares the function
2136      <function>c_overpaid</function> in SQL:
2137
2138 <programlisting>
2139 CREATE FUNCTION c_overpaid(emp, integer) RETURNS boolean
2140     AS '<replaceable>DIRECTORY</replaceable>/funcs', 'c_overpaid'
2141     LANGUAGE C STRICT;
2142 </programlisting>
2143
2144      Notice we have used <literal>STRICT</> so that we did not have to
2145      check whether the input arguments were NULL.
2146     </para>
2147    </sect2>
2148
2149    <sect2>
2150     <title>Returning Rows (Composite Types) from C-Language Functions</title>
2151
2152     <para>
2153      To return a row or composite-type value from a C-language
2154      function, you can use a special API that provides macros and
2155      functions to hide most of the complexity of building composite
2156      data types.  To use this API, the source file must include:
2157 <programlisting>
2158 #include "funcapi.h"
2159 </programlisting>
2160     </para>
2161
2162     <para>
2163      There are two ways you can build a composite data value (henceforth
2164      a <quote>tuple</>): you can build it from an array of Datum values,
2165      or from an array of C strings that can be passed to the input
2166      conversion functions of the tuple's column data types.  In either
2167      case, you first need to obtain or construct a <structname>TupleDesc</>
2168      descriptor for the tuple structure.  When working with Datums, you
2169      pass the <structname>TupleDesc</> to <function>BlessTupleDesc</>,
2170      and then call <function>heap_formtuple</> for each row.  When working
2171      with C strings, you pass the <structname>TupleDesc</> to
2172      <function>TupleDescGetAttInMetadata</>, and then call
2173      <function>BuildTupleFromCStrings</> for each row.  In the case of a
2174      function returning a set of tuples, the setup steps can all be done
2175      once during the first call of the function.
2176     </para>
2177
2178     <para>
2179      Several helper functions are available for setting up the initial
2180      <structname>TupleDesc</>.  If you want to use a named composite type,
2181      you can fetch the information from the system catalogs.  Use
2182 <programlisting>
2183 TupleDesc RelationNameGetTupleDesc(const char *relname)
2184 </programlisting>
2185      to get a <structname>TupleDesc</> for a named relation, or
2186 <programlisting>
2187 TupleDesc TypeGetTupleDesc(Oid typeoid, List *colaliases)
2188 </programlisting>
2189      to get a <structname>TupleDesc</> based on a type OID. This can
2190      be used to get a <structname>TupleDesc</> for a base or
2191      composite type.  When writing a function that returns
2192      <structname>record</>, the expected <structname>TupleDesc</>
2193      must be passed in by the caller.
2194     </para>
2195
2196     <para>
2197      Once you have a <structname>TupleDesc</>, call
2198 <programlisting>
2199 TupleDesc BlessTupleDesc(TupleDesc tupdesc)
2200 </programlisting>
2201      if you plan to work with Datums, or
2202 <programlisting>
2203 AttInMetadata *TupleDescGetAttInMetadata(TupleDesc tupdesc)
2204 </programlisting>
2205      if you plan to work with C strings.  If you are writing a function
2206      returning set, you can save the results of these functions in the
2207      <structname>FuncCallContext</> structure &mdash; use the
2208      <structfield>tuple_desc</> or <structfield>attinmeta</> field
2209      respectively.
2210     </para>
2211
2212     <para>
2213      When working with Datums, use
2214 <programlisting>
2215 HeapTuple heap_formtuple(TupleDesc tupdesc, Datum *values, char *nulls)
2216 </programlisting>
2217      to build a <structname>HeapTuple</> given user data in Datum form.
2218     </para>
2219
2220     <para>
2221      When working with C strings, use
2222 <programlisting>
2223 HeapTuple BuildTupleFromCStrings(AttInMetadata *attinmeta, char **values)
2224 </programlisting>
2225      to build a <structname>HeapTuple</> given user data
2226      in C string form.  <literal>values</literal> is an array of C strings,
2227      one for each attribute of the return row. Each C string should be in
2228      the form expected by the input function of the attribute data
2229      type. In order to return a null value for one of the attributes,
2230      the corresponding pointer in the <parameter>values</> array
2231      should be set to <symbol>NULL</>.  This function will need to
2232      be called again for each row you return.
2233     </para>
2234
2235     <para>
2236      Once you have built a tuple to return from your function, it
2237      must be converted into a <type>Datum</>. Use
2238 <programlisting>
2239 HeapTupleGetDatum(HeapTuple tuple)
2240 </programlisting>
2241      to convert a <structname>HeapTuple</> into a valid Datum.  This
2242      <type>Datum</> can be returned directly if you intend to return
2243      just a single row, or it can be used as the current return value
2244      in a set-returning function.
2245     </para>
2246
2247     <para>
2248      An example appears in the next section.
2249     </para>
2250
2251    </sect2>
2252
2253    <sect2 id="xfunc-c-return-set">
2254     <title>Returning Sets from C-Language Functions</title>
2255
2256     <para>
2257      There is also a special API that provides support for returning
2258      sets (multiple rows) from a C-language function.  A set-returning
2259      function must follow the version-1 calling conventions.  Also,
2260      source files must include <filename>funcapi.h</filename>, as
2261      above.
2262     </para>
2263
2264     <para>
2265      A set-returning function (<acronym>SRF</>) is called
2266      once for each item it returns.  The <acronym>SRF</> must
2267      therefore save enough state to remember what it was doing and
2268      return the next item on each call.
2269      The structure <structname>FuncCallContext</> is provided to help
2270      control this process.  Within a function, <literal>fcinfo-&gt;flinfo-&gt;fn_extra</>
2271      is used to hold a pointer to <structname>FuncCallContext</>
2272      across calls.
2273 <programlisting>
2274 typedef struct
2275 {
2276     /*
2277      * Number of times we've been called before
2278      *
2279      * call_cntr is initialized to 0 for you by SRF_FIRSTCALL_INIT(), and
2280      * incremented for you every time SRF_RETURN_NEXT() is called.
2281      */
2282     uint32 call_cntr;
2283
2284     /*
2285      * OPTIONAL maximum number of calls
2286      *
2287      * max_calls is here for convenience only and setting it is optional.
2288      * If not set, you must provide alternative means to know when the
2289      * function is done.
2290      */
2291     uint32 max_calls;
2292
2293     /*
2294      * OPTIONAL pointer to result slot
2295      *
2296      * This is obsolete and only present for backwards compatibility, viz,
2297      * user-defined SRFs that use the deprecated TupleDescGetSlot().
2298      */
2299     TupleTableSlot *slot;
2300
2301     /*
2302      * OPTIONAL pointer to miscellaneous user-provided context information
2303      *
2304      * user_fctx is for use as a pointer to your own data to retain
2305      * arbitrary context information between calls of your function.
2306      */
2307     void *user_fctx;
2308
2309     /*
2310      * OPTIONAL pointer to struct containing attribute type input metadata
2311      *
2312      * attinmeta is for use when returning tuples (i.e., composite data types)
2313      * and is not used when returning base data types. It is only needed
2314      * if you intend to use BuildTupleFromCStrings() to create the return
2315      * tuple.
2316      */
2317     AttInMetadata *attinmeta;
2318
2319     /*
2320      * memory context used for structures that must live for multiple calls
2321      *
2322      * multi_call_memory_ctx is set by SRF_FIRSTCALL_INIT() for you, and used
2323      * by SRF_RETURN_DONE() for cleanup. It is the most appropriate memory
2324      * context for any memory that is to be reused across multiple calls
2325      * of the SRF.
2326      */
2327     MemoryContext multi_call_memory_ctx;
2328
2329     /*
2330      * OPTIONAL pointer to struct containing tuple description
2331      *
2332      * tuple_desc is for use when returning tuples (i.e. composite data types)
2333      * and is only needed if you are going to build the tuples with
2334      * heap_formtuple() rather than with BuildTupleFromCStrings().  Note that
2335      * the TupleDesc pointer stored here should usually have been run through
2336      * BlessTupleDesc() first.
2337      */
2338     TupleDesc tuple_desc;
2339
2340 } FuncCallContext;
2341 </programlisting>
2342     </para>
2343
2344     <para>
2345      An <acronym>SRF</> uses several functions and macros that
2346      automatically manipulate the <structname>FuncCallContext</>
2347      structure (and expect to find it via <literal>fn_extra</>).  Use
2348 <programlisting>
2349 SRF_IS_FIRSTCALL()
2350 </programlisting>
2351      to determine if your function is being called for the first or a
2352      subsequent time. On the first call (only) use
2353 <programlisting>
2354 SRF_FIRSTCALL_INIT()
2355 </programlisting>
2356      to initialize the <structname>FuncCallContext</>. On every function call,
2357      including the first, use
2358 <programlisting>
2359 SRF_PERCALL_SETUP()
2360 </programlisting>
2361      to properly set up for using the <structname>FuncCallContext</>
2362      and clearing any previously returned data left over from the
2363      previous pass.
2364     </para>
2365
2366     <para>
2367      If your function has data to return, use
2368 <programlisting>
2369 SRF_RETURN_NEXT(funcctx, result)
2370 </programlisting>
2371      to return it to the caller.  (<literal>result</> must be of type
2372      <type>Datum</>, either a single value or a tuple prepared as
2373      described above.)  Finally, when your function is finished
2374      returning data, use
2375 <programlisting>
2376 SRF_RETURN_DONE(funcctx)
2377 </programlisting>
2378      to clean up and end the <acronym>SRF</>.
2379     </para>
2380
2381     <para>
2382      The memory context that is current when the <acronym>SRF</> is called is
2383      a transient context that will be cleared between calls.  This means
2384      that you do not need to call <function>pfree</> on everything
2385      you allocated using <function>palloc</>; it will go away anyway.  However, if you want to allocate
2386      any data structures to live across calls, you need to put them somewhere
2387      else.  The memory context referenced by
2388      <structfield>multi_call_memory_ctx</> is a suitable location for any
2389      data that needs to survive until the <acronym>SRF</> is finished running.  In most
2390      cases, this means that you should switch into
2391      <structfield>multi_call_memory_ctx</> while doing the first-call setup.
2392     </para>
2393
2394     <para>
2395      A complete pseudo-code example looks like the following:
2396 <programlisting>
2397 Datum
2398 my_set_returning_function(PG_FUNCTION_ARGS)
2399 {
2400     FuncCallContext  *funcctx;
2401     Datum             result;
2402     MemoryContext     oldcontext;
2403     <replaceable>further declarations as needed</replaceable>
2404
2405     if (SRF_IS_FIRSTCALL())
2406     {
2407         funcctx = SRF_FIRSTCALL_INIT();
2408         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2409         /* One-time setup code appears here: */
2410         <replaceable>user code</replaceable>
2411         <replaceable>if returning composite</replaceable>
2412             <replaceable>build TupleDesc, and perhaps AttInMetadata</replaceable>
2413         <replaceable>endif returning composite</replaceable>
2414         <replaceable>user code</replaceable>
2415         MemoryContextSwitchTo(oldcontext);
2416     }
2417
2418     /* Each-time setup code appears here: */
2419     <replaceable>user code</replaceable>
2420     funcctx = SRF_PERCALL_SETUP();
2421     <replaceable>user code</replaceable>
2422
2423     /* this is just one way we might test whether we are done: */
2424     if (funcctx-&gt;call_cntr &lt; funcctx-&gt;max_calls)
2425     {
2426         /* Here we want to return another item: */
2427         <replaceable>user code</replaceable>
2428         <replaceable>obtain result Datum</replaceable>
2429         SRF_RETURN_NEXT(funcctx, result);
2430     }
2431     else
2432     {
2433         /* Here we are done returning items and just need to clean up: */
2434         <replaceable>user code</replaceable>
2435         SRF_RETURN_DONE(funcctx);
2436     }
2437 }
2438 </programlisting>
2439     </para>
2440
2441     <para>
2442      A complete example of a simple <acronym>SRF</> returning a composite type looks like:
2443 <programlisting>
2444 PG_FUNCTION_INFO_V1(testpassbyval);
2445
2446 Datum
2447 testpassbyval(PG_FUNCTION_ARGS)
2448 {
2449     FuncCallContext     *funcctx;
2450     int                  call_cntr;
2451     int                  max_calls;
2452     TupleDesc            tupdesc;
2453     AttInMetadata       *attinmeta;
2454
2455      /* stuff done only on the first call of the function */
2456      if (SRF_IS_FIRSTCALL())
2457      {
2458         MemoryContext   oldcontext;
2459
2460         /* create a function context for cross-call persistence */
2461         funcctx = SRF_FIRSTCALL_INIT();
2462
2463         /* switch to memory context appropriate for multiple function calls */
2464         oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
2465
2466         /* total number of tuples to be returned */
2467         funcctx-&gt;max_calls = PG_GETARG_UINT32(0);
2468
2469         /* Build a tuple description for a __testpassbyval tuple */
2470         tupdesc = RelationNameGetTupleDesc("__testpassbyval");
2471
2472         /*
2473          * generate attribute metadata needed later to produce tuples from raw
2474          * C strings
2475          */
2476         attinmeta = TupleDescGetAttInMetadata(tupdesc);
2477         funcctx-&gt;attinmeta = attinmeta;
2478
2479         MemoryContextSwitchTo(oldcontext);
2480     }
2481
2482     /* stuff done on every call of the function */
2483     funcctx = SRF_PERCALL_SETUP();
2484
2485     call_cntr = funcctx-&gt;call_cntr;
2486     max_calls = funcctx-&gt;max_calls;
2487     attinmeta = funcctx-&gt;attinmeta;
2488
2489     if (call_cntr &lt; max_calls)    /* do when there is more left to send */
2490     {
2491         char       **values;
2492         HeapTuple    tuple;
2493         Datum        result;
2494
2495         /*
2496          * Prepare a values array for building the returned tuple.
2497          * This should be an array of C strings which will
2498          * be processed later by the type input functions.
2499          */
2500         values = (char **) palloc(3 * sizeof(char *));
2501         values[0] = (char *) palloc(16 * sizeof(char));
2502         values[1] = (char *) palloc(16 * sizeof(char));
2503         values[2] = (char *) palloc(16 * sizeof(char));
2504
2505         snprintf(values[0], 16, "%d", 1 * PG_GETARG_INT32(1));
2506         snprintf(values[1], 16, "%d", 2 * PG_GETARG_INT32(1));
2507         snprintf(values[2], 16, "%d", 3 * PG_GETARG_INT32(1));
2508
2509         /* build a tuple */
2510         tuple = BuildTupleFromCStrings(attinmeta, values);
2511
2512         /* make the tuple into a datum */
2513         result = HeapTupleGetDatum(tuple);
2514
2515         /* clean up (this is not really necessary) */
2516         pfree(values[0]);
2517         pfree(values[1]);
2518         pfree(values[2]);
2519         pfree(values);
2520
2521         SRF_RETURN_NEXT(funcctx, result);
2522     }
2523     else    /* do when there is no more left */
2524     {
2525         SRF_RETURN_DONE(funcctx);
2526     }
2527 }
2528 </programlisting>
2529
2530      The SQL code to declare this function is:
2531 <programlisting>
2532 CREATE TYPE __testpassbyval AS (f1 integer, f2 integer, f3 integer);
2533
2534 CREATE OR REPLACE FUNCTION testpassbyval(integer, integer) RETURNS SETOF __testpassbyval
2535     AS '<replaceable>filename</>', 'testpassbyval'
2536     LANGUAGE C IMMUTABLE STRICT;
2537 </programlisting>
2538     </para>
2539
2540     <para>
2541      The directory <filename>contrib/tablefunc</> in the source
2542      distribution contains more examples of set-returning functions.
2543     </para>
2544    </sect2>
2545
2546    <sect2>
2547     <title>Polymorphic Arguments and Return Types</title>
2548
2549     <para>
2550      C-language functions may be declared to accept and
2551      return the polymorphic types
2552      <type>anyelement</type> and <type>anyarray</type>.
2553      See <xref linkend="extend-types-polymorphic"> for a more detailed explanation
2554      of polymorphic functions. When function arguments or return types
2555      are defined as polymorphic types, the function author cannot know
2556      in advance what data type it will be called with, or
2557      need to return. There are two routines provided in <filename>fmgr.h</>
2558      to allow a version-1 C function to discover the actual data types
2559      of its arguments and the type it is expected to return. The routines are
2560      called <literal>get_fn_expr_rettype(FmgrInfo *flinfo)</> and
2561      <literal>get_fn_expr_argtype(FmgrInfo *flinfo, int argnum)</>.
2562      They return the result or argument type OID, or <symbol>InvalidOid</symbol> if the
2563      information is not available.
2564      The structure <literal>flinfo</> is normally accessed as
2565      <literal>fcinfo-&gt;flinfo</>. The parameter <literal>argnum</>
2566      is zero based.
2567     </para>
2568
2569     <para>
2570      For example, suppose we want to write a function to accept a single
2571      element of any type, and return a one-dimensional array of that type:
2572
2573 <programlisting>
2574 PG_FUNCTION_INFO_V1(make_array);
2575 Datum
2576 make_array(PG_FUNCTION_ARGS)
2577 {
2578     ArrayType  *result;
2579     Oid         element_type = get_fn_expr_argtype(fcinfo-&gt;flinfo, 0);
2580     Datum       element;
2581     int16       typlen;
2582     bool        typbyval;
2583     char        typalign;
2584     int         ndims;
2585     int         dims[MAXDIM];
2586     int         lbs[MAXDIM];
2587
2588     if (!OidIsValid(element_type))
2589         elog(ERROR, "could not determine data type of input");
2590
2591     /* get the provided element */
2592     element = PG_GETARG_DATUM(0);
2593
2594     /* we have one dimension */
2595     ndims = 1;
2596     /* and one element */
2597     dims[0] = 1;
2598     /* and lower bound is 1 */
2599     lbs[0] = 1;
2600
2601     /* get required info about the element type */
2602     get_typlenbyvalalign(element_type, &amp;typlen, &amp;typbyval, &amp;typalign);
2603
2604     /* now build the array */
2605     result = construct_md_array(&amp;element, ndims, dims, lbs,
2606                                 element_type, typlen, typbyval, typalign);
2607
2608     PG_RETURN_ARRAYTYPE_P(result);
2609 }
2610 </programlisting>
2611     </para>
2612
2613     <para>
2614      The following command declares the function
2615      <function>make_array</function> in SQL:
2616
2617 <programlisting>
2618 CREATE FUNCTION make_array(anyelement) RETURNS anyarray
2619     AS '<replaceable>DIRECTORY</replaceable>/funcs', 'make_array'
2620     LANGUAGE C STRICT;
2621 </programlisting>
2622
2623      Note the use of <literal>STRICT</literal>; this is essential
2624      since the code is not bothering to test for a null input.
2625     </para>
2626    </sect2>
2627   </sect1>
2628
2629 <!-- Keep this comment at the end of the file
2630 Local variables:
2631 mode:sgml
2632 sgml-omittag:nil
2633 sgml-shorttag:t
2634 sgml-minimize-attributes:nil
2635 sgml-always-quote-attributes:t
2636 sgml-indent-step:1
2637 sgml-indent-data:t
2638 sgml-parent-document:nil
2639 sgml-default-dtd-file:"./reference.ced"
2640 sgml-exposed-tags:nil
2641 sgml-local-catalogs:("/usr/lib/sgml/catalog")
2642 sgml-local-ecat-files:nil
2643 End:
2644 -->