freeswan/pluto/kernel.c

   1 /* routines that interface with the kernel's IPsec mechanism
   2  * Copyright (C) 1997 Angelos D. Keromytis.
   3  * Copyright (C) 1998-2002  D. Hugh Redelmeier.
   4  *
   5  * This program is free software; you can redistribute it and/or modify it
   6  * under the terms of the GNU General Public License as published by the
   7  * Free Software Foundation; either version 2 of the License, or (at your
   8  * option) any later version.  See <http://www.fsf.org/copyleft/gpl.txt>.
   9  *
  10  * This program is distributed in the hope that it will be useful, but
  11  * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
  12  * or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  13  * for more details.
  14  *
  15  * RCSID $Id: kernel.c,v 1.149 2002/04/01 08:46:54 dhr Exp $
  16  */
  17
  18 #include <stddef.h>
  19 #include <string.h>
  20 #include <stdio.h>
  21 #include <stdlib.h>
  22 #include <errno.h>
  23 #include <wait.h>
  24 #include <unistd.h>
  25 #include <fcntl.h>
  26
  27 #include <sys/socket.h>
  28 #include <netinet/in.h>
  29 #include <arpa/inet.h>
  30
  31 #include <freeswan.h>
  32
  33 #ifdef KLIPS
  34 #include <sys/time.h>   /* for select(2) */
  35 #include <sys/types.h>  /* for select(2) */
  36 # include <signal.h>
  37 # include <pfkeyv2.h>
  38 # include <pfkey.h>
  39 #endif /* KLIPS */
  40
  41 #include <linux/config.h>
  42 #ifdef CONFIG_LEDMAN
  43 #include <linux/ledman.h>
  44 #endif
  45
  46 #include "constants.h"
  47 #include "defs.h"
  48 #include "rnd.h"
  49 #include "id.h"
  50 #include "x509.h"
  51 #include "connections.h"        /* needs id.h */
  52 #include "state.h"
  53 #include "timer.h"
  54 #include "kernel.h"
  55 #include "log.h"
  56 #include "server.h"
  57 #include "whack.h"      /* for RC_LOG_SERIOUS */
  58
  59 #include "alg_info.h"
  60 #include "kernel_alg.h"
  61
  62 #ifdef NAT_TRAVERSAL
  63 #include "packet.h"  /* for pb_stream in nat_traversal.h */
  64 #include "nat_traversal.h"
  65 #endif
  66
  67 bool can_do_IPcomp = TRUE;  /* can system actually perform IPCOMP? */
  68
  69 /* How far can IPsec messages arrive out of order before the anti-replay
  70  * logic loses track and swats them?  64 is the best KLIPS can do.
  71  */
  72 #define REPLAY_WINDOW   64
  73
  74 static bool route_and_eroute(struct connection *c
  75     , struct state *st);        /* forward declaration */
  76
  77 /* test if the routes required for two different connections agree
  78  * It is assumed that the destination subnets agree; we are only
  79  * testing that the interfaces and nexthops match.
  80  */
  81 #define routes_agree(c, d) ((c)->interface == (d)->interface \
  82         && sameaddr(&(c)->this.host_nexthop, &(d)->this.host_nexthop))
  83
  84 #ifndef KLIPS
  85
  86 bool no_klips = TRUE;   /* don't actually use KLIPS */
  87
  88 #else /* !KLIPS */
  89
  90 /* Declare eroute things early enough for uses.
  91  *
  92  * Flags are encoded above the low-order byte of verbs.
  93  * "real" eroutes are only outbound.  Inbound eroutes don't exist,
  94  * but an addflow with an INBOUND flag allows IPIP tunnels to be
  95  * limited to appropriate source and destination addresses.
  96  */
  97
  98 #define ERO_MASK        0xFF
  99 #define ERO_FLAG_SHIFT  8
 100
 101 #define ERO_DELETE      SADB_X_DELFLOW
 102 #define ERO_ADD SADB_X_ADDFLOW
 103 #define ERO_REPLACE     (SADB_X_ADDFLOW | (SADB_X_SAFLAGS_REPLACEFLOW << ERO_FLAG_SHIFT))
 104 #define ERO_ADD_INBOUND (SADB_X_ADDFLOW | (SADB_X_SAFLAGS_INFLOW << ERO_FLAG_SHIFT))
 105
 106 /* bare (connectionless) shunt (eroute) table
 107  *
 108  * Bare shunts are those that don't "belong" to a connection.
 109  * This happens because some %trapped traffic hasn't yet or cannot be
 110  * assigned to a connection.  The usual reason is that we cannot discover
 111  * the peer SG.  Another is that even when the peer has been discovered,
 112  * it may be that no connection matches all the particulars.
 113  * Bare shunts are either %hold or %pass.
 114  * We record them so that, with scanning, we can discover
 115  * which %holds are news and which %passes should expire.
 116  */
 117
 118 #define SHUNT_SCAN_INTERVAL     (60 * 2)   /* time between scans of eroutes */
 119
 120 /* SHUNT_PATIENCE only has resolution down to a multiple of the sample rate,
 121  * SHUNT_SCAN_INTERVAL.
 122  * By making SHUNT_PATIENCE an odd multiple of half of SHUNT_SCAN_INTERVAL,
 123  * we minimize the effects of jitter.
 124  */
 125 #define SHUNT_PATIENCE  (SHUNT_SCAN_INTERVAL * 15 / 2)  /* inactivity timeout */
 126
 127 struct bare_shunt {
 128     ip_subnet ours;
 129     ip_subnet his;
 130     ip_said said;
 131     unsigned long count;
 132     time_t last_activity;
 133     struct bare_shunt *next;
 134 };
 135
 136 static struct bare_shunt *bare_shunts = NULL;
 137
 138 #ifdef DEBUG
 139 static void
 140 DBG_bare_shunt(const char *op, const struct bare_shunt *bs)
 141 {
 142     DBG(DBG_KLIPS,
 143         {
 144             char ourst[SUBNETTOT_BUF];
 145             char hist[SUBNETTOT_BUF];
 146             char sat[SATOT_BUF];
 147
 148             subnettot(&(bs)->ours, 0, ourst, sizeof(ourst));
 149             subnettot(&(bs)->his, 0, hist, sizeof(hist));
 150             satot(&(bs)->said, 0, sat, sizeof(sat));
 151             DBG_log("%s bare shunt %p %s -> %s => %s"
 152                 , op, (const void *)(bs), ourst, hist, sat);
 153         });
 154 }
 155 #else /* !DEBUG */
 156 #define DBG_bare_shunt(op, bs) {}
 157 #endif /* !DEBUG */
 158
 159 /* information from /proc/net/ipsec_eroute */
 160
 161 struct eroute_info {
 162     unsigned long count;
 163     ip_subnet ours;
 164     ip_subnet his;
 165     ip_address dst;
 166     ip_said     said;
 167     struct eroute_info *next;
 168 };
 169
 170 /* The orphaned_holds table records %holds for which we
 171  * scan_proc_shunts found no representation of in any connection.
 172  * The corresponding ACQUIRE message might have been lost.
 173  */
 174 struct eroute_info *orphaned_holds = NULL;
 175
 176 static bool shunt_eroute(struct connection *c
 177     , unsigned int op, const char *opname);     /* forward declaration */
 178
 179 bool no_klips = FALSE;  /* don't actually use KLIPS */
 180
 181 int pfkeyfd = NULL_FD;
 182
 183 typedef u_int32_t pfkey_seq_t;
 184 static pfkey_seq_t pfkey_seq = 0;       /* sequence number for our PF_KEY messages */
 185
 186 static pid_t pid;
 187 static void pfkey_register(void);
 188
 189 #define NE(x) { x, #x } /* Name Entry -- shorthand for sparse_names */
 190
 191 static sparse_names pfkey_type_names = {
 192         NE(SADB_RESERVED),
 193         NE(SADB_GETSPI),
 194         NE(SADB_UPDATE),
 195         NE(SADB_ADD),
 196         NE(SADB_DELETE),
 197         NE(SADB_GET),
 198         NE(SADB_ACQUIRE),
 199         NE(SADB_REGISTER),
 200         NE(SADB_EXPIRE),
 201         NE(SADB_FLUSH),
 202         NE(SADB_DUMP),
 203         NE(SADB_X_PROMISC),
 204         NE(SADB_X_PCHANGE),
 205         NE(SADB_X_GRPSA),
 206         NE(SADB_X_ADDFLOW),
 207         NE(SADB_X_DELFLOW),
 208         NE(SADB_X_DEBUG),
 209 #ifdef NAT_TRAVERSAL
 210         NE(SADB_X_NAT_T_NEW_MAPPING),
 211 #endif
 212         NE(SADB_MAX),
 213         { 0, sparse_end }
 214 };
 215
 216 #ifdef NEVER /* not needed yet */
 217 static sparse_names pfkey_ext_names = {
 218         NE(SADB_EXT_RESERVED),
 219         NE(SADB_EXT_SA),
 220         NE(SADB_EXT_LIFETIME_CURRENT),
 221         NE(SADB_EXT_LIFETIME_HARD),
 222         NE(SADB_EXT_LIFETIME_SOFT),
 223         NE(SADB_EXT_ADDRESS_SRC),
 224         NE(SADB_EXT_ADDRESS_DST),
 225         NE(SADB_EXT_ADDRESS_PROXY),
 226         NE(SADB_EXT_KEY_AUTH),
 227         NE(SADB_EXT_KEY_ENCRYPT),
 228         NE(SADB_EXT_IDENTITY_SRC),
 229         NE(SADB_EXT_IDENTITY_DST),
 230         NE(SADB_EXT_SENSITIVITY),
 231         NE(SADB_EXT_PROPOSAL),
 232         NE(SADB_EXT_SUPPORTED_AUTH),
 233         NE(SADB_EXT_SUPPORTED_ENCRYPT),
 234         NE(SADB_EXT_SPIRANGE),
 235         NE(SADB_X_EXT_KMPRIVATE),
 236         NE(SADB_X_EXT_SATYPE2),
 237         NE(SADB_X_EXT_SA2),
 238         NE(SADB_X_EXT_ADDRESS_DST2),
 239         NE(SADB_X_EXT_ADDRESS_SRC_FLOW),
 240         NE(SADB_X_EXT_ADDRESS_DST_FLOW),
 241         NE(SADB_X_EXT_ADDRESS_SRC_MASK),
 242         NE(SADB_X_EXT_ADDRESS_DST_MASK),
 243         NE(SADB_X_EXT_DEBUG),
 244         { 0, sparse_end }
 245 };
 246 #endif /* NEVER */
 247
 248 #undef NE
 249
 250 static void
 251 init_pfkey(void)
 252 {
 253     pid = getpid();
 254
 255     /* open PF_KEY socket */
 256
 257     pfkeyfd = socket(PF_KEY, SOCK_RAW, PF_KEY_V2);
 258
 259     if (pfkeyfd == -1)
 260         exit_log_errno((e, "socket() in init_pfkeyfd()"));
 261
 262 #ifdef NEVER    /* apparently unsupported! */
 263     if (fcntl(pfkeyfd, F_SETFL, O_NONBLOCK) != 0)
 264         exit_log_errno((e, "fcntl() in init_pfkeyfd()"));
 265 #endif
 266
 267     DBG(DBG_KLIPS,
 268         DBG_log("process %u listening for PF_KEY_V2 on file descriptor %d", (unsigned)pid, pfkeyfd));
 269
 270     pfkey_register();   /* register SA types that we can negotiate */
 271 }
 272
 273 /* Kinds of PF_KEY message from the kernel:
 274  * - response to a request from us
 275  *   + ACK/NAK
 276  *   + Register: indicates transforms supported by kernel
 277  *   + SPI requested by getspi
 278  * - Acquire, requesting us to deal with trapped clear packet
 279  * - expiration of of one of our SAs
 280  * - messages to other processes
 281  *
 282  * To minimize the effect on the event-driven structure of Pluto,
 283  * responses are dealt with synchronously.  We hope that the Kernel
 284  * produces them synchronously.  We must "read ahead" in the PF_KEY
 285  * stream, saving Acquire and Expiry messages that are encountered.
 286  * We ignore messages to other processes.
 287  */
 288
 289 typedef union {
 290         unsigned char bytes[PFKEYv2_MAX_MSGSIZE];
 291         struct sadb_msg msg;
 292     } pfkey_buf;
 293
 294 /* queue of unprocessed PF_KEY messages input from kernel
 295  * Note that the pfkey_buf may be partly allocated, reflecting
 296  * the variable length nature of the messages.  So the link field
 297  * must come first.
 298  */
 299 typedef struct pfkey_item {
 300         struct pfkey_item *next;
 301         pfkey_buf buf;
 302     } pfkey_item;
 303
 304 static pfkey_item *pfkey_iq_head = NULL;        /* oldest */
 305 static pfkey_item *pfkey_iq_tail;       /* youngest */
 306
 307 static bool
 308 pfkey_input_ready(void)
 309 {
 310     fd_set readfds;
 311     int ndes;
 312     struct timeval tm;
 313
 314     tm.tv_sec = 0;      /* don't wait at all */
 315     tm.tv_usec = 0;
 316
 317     FD_ZERO(&readfds);  /* we only care about pfkeyfd */
 318     FD_SET(pfkeyfd, &readfds);
 319
 320     do {
 321         ndes = select(pfkeyfd + 1, &readfds, NULL, NULL, &tm);
 322     } while (ndes == -1 && errno == EINTR);
 323
 324     if (ndes < 0)
 325     {
 326         log_errno((e, "select() failed in pfkey_get()"));
 327         return FALSE;
 328     }
 329
 330     if (ndes == 0)
 331         return FALSE;   /* nothing to read */
 332
 333     passert(ndes == 1 && FD_ISSET(pfkeyfd, &readfds));
 334     return TRUE;
 335 }
 336
 337 /* get a PF_KEY message from kernel.
 338  * Returns TRUE is message found, FALSE if no message pending,
 339  * and aborts or keeps trying when an error is encountered.
 340  * The only validation of the message is that the message length
 341  * received matches that in the message header, and that the message
 342  * is for this process.
 343  */
 344 static bool
 345 pfkey_get(pfkey_buf *buf)
 346 {
 347     for (;;)
 348     {
 349         ssize_t len;
 350
 351         if (!pfkey_input_ready())
 352             return FALSE;
 353
 354         len = read(pfkeyfd, buf->bytes, sizeof(buf->bytes));
 355
 356         if (len < 0)
 357         {
 358             if (errno == EAGAIN)
 359                 return FALSE;
 360
 361             log_errno((e, "read() failed in pfkey_get()"));
 362             return FALSE;
 363         }
 364         else if ((size_t) len < sizeof(buf->msg))
 365         {
 366             log("pfkey_get read truncated PF_KEY message: %d bytes; ignoring message", len);
 367         }
 368         else if ((size_t) len != buf->msg.sadb_msg_len * IPSEC_PFKEYv2_ALIGN)
 369         {
 370             log("pfkey_get read PF_KEY message with length %d that doesn't equal sadb_msg_len %u * %d; ignoring message"
 371                 , len, (unsigned) buf->msg.sadb_msg_len, IPSEC_PFKEYv2_ALIGN);
 372         }
 373         /*      for now, unsolicited messages can be:
 374          *      SADB_ACQUIRE, SADB_REGISTER
 375          */
 376         else if (!(buf->msg.sadb_msg_pid == (unsigned)pid
 377         || (buf->msg.sadb_msg_pid == 0 && buf->msg.sadb_msg_type == SADB_ACQUIRE)
 378 #ifdef NAT_TRAVERSAL
 379         || (buf->msg.sadb_msg_pid == 0 && buf->msg.sadb_msg_type == SADB_X_NAT_T_NEW_MAPPING)
 380 #endif
 381         || (buf->msg.sadb_msg_type == SADB_REGISTER)))
 382         {
 383             /* not for us: ignore */
 384             DBG(DBG_KLIPS,
 385                 DBG_log("pfkey_get: ignoring PF_KEY %s message %u for process %u"
 386                     , sparse_val_show(pfkey_type_names, buf->msg.sadb_msg_type)
 387                     , buf->msg.sadb_msg_seq
 388                     , buf->msg.sadb_msg_pid));
 389         }
 390         else
 391         {
 392             DBG(DBG_KLIPS,
 393                 DBG_log("pfkey_get: %s message %u"
 394                     , sparse_val_show(pfkey_type_names, buf->msg.sadb_msg_type)
 395                     , buf->msg.sadb_msg_seq));
 396             return TRUE;
 397         }
 398     }
 399 }
 400
 401 /* get a response to a specific message */
 402 static bool
 403 pfkey_get_response(pfkey_buf *buf, pfkey_seq_t seq)
 404 {
 405     while (pfkey_get(buf))
 406     {
 407         if (buf->msg.sadb_msg_pid == (unsigned)pid
 408         && buf->msg.sadb_msg_seq == seq)
 409         {
 410             return TRUE;
 411         }
 412         else
 413         {
 414             /* Not for us: queue it. */
 415             size_t bl = buf->msg.sadb_msg_len * IPSEC_PFKEYv2_ALIGN;
 416             pfkey_item *it = alloc_bytes(offsetof(pfkey_item, buf) + bl, "pfkey_item");
 417
 418             memcpy(&it->buf, buf, bl);
 419
 420             it->next = NULL;
 421             if (pfkey_iq_head == NULL)
 422             {
 423                 pfkey_iq_head = it;
 424             }
 425             else
 426             {
 427                 pfkey_iq_tail->next = it;
 428             }
 429             pfkey_iq_tail = it;
 430         }
 431     }
 432     return FALSE;
 433 }
 434
 435 /* Process a SADB_REGISTER message from KLIPS.
 436  * This will be a response to one of ours, but it may be asynchronous
 437  * (if KLIPS modules are loaded and unloaded).
 438  * Some sanity checking has already been performed.
 439  */
 440 static void
 441 process_pfkey_register_response(pfkey_buf *buf)
 442 {
 443     /* Find out what the kernel can support.
 444      * In fact, the only question at the moment
 445      * is whether it can support IPcomp.
 446      * So we ignore the rest.
 447      * ??? we really should pay attention to what transforms are supported.
 448      */
 449     switch (buf->msg.sadb_msg_satype)
 450     {
 451     case SADB_SATYPE_AH:
 452         break;
 453     case SADB_SATYPE_ESP:
 454 #ifndef NO_KERNEL_ALG
 455         kernel_alg_register_pfkey(buf, sizeof (pfkey_buf));
 456 #endif
 457         break;
 458     case SADB_X_SATYPE_COMP:
 459         /* ??? There ought to be an extension to list the
 460          * supported algorithms, but RFC 2367 doesn't
 461          * list one for IPcomp.  KLIPS uses SADB_X_CALG_DEFLATE.
 462          * Since we only implement deflate, we'll assume this.
 463          */
 464         can_do_IPcomp = TRUE;
 465         break;
 466     case SADB_X_SATYPE_IPIP:
 467         break;
 468     default:
 469         break;
 470     }
 471 }
 472
 473 /* Create ip_address out of sockaddr.  Ignore port! */
 474 static err_t
 475 sockaddr_to_ip_address(const struct sockaddr *src, ip_address *dest)
 476 {
 477     switch (src->sa_family)
 478     {
 479     case AF_INET:
 480         initaddr((const void *) &((const struct sockaddr_in *)src)->sin_addr
 481             , sizeof(((const struct sockaddr_in *)src)->sin_addr)
 482             , src->sa_family, dest);
 483         return NULL;
 484     case AF_INET6:
 485         initaddr((const void *) &((const struct sockaddr_in6 *)src)->sin6_addr
 486             , sizeof(((const struct sockaddr_in6 *)src)->sin6_addr)
 487             , src->sa_family, dest);
 488         return NULL;
 489     default:
 490         return "unknown address family";
 491     }
 492 }
 493
 494 static void
 495 record_and_initiate_opportunistic(ip_subnet *ours, ip_subnet *his)
 496 {
 497     passert(samesubnettype(ours, his));
 498
 499     /* Add to bare shunt list.
 500      * We need to do this because the shunt was installed by KLIPS
 501      * which can't do this itself.
 502      */
 503     {
 504         struct bare_shunt *bs = alloc_thing(struct bare_shunt, "bare shunt");
 505
 506         bs->ours = *ours;
 507         bs->his = *his;
 508
 509         bs->said.proto = SA_INT;
 510         bs->said.spi = htonl(SPI_HOLD);
 511         bs->said.dst = *aftoinfo(subnettypeof(ours))->any;
 512
 513         bs->count = 0;
 514         bs->last_activity = now();
 515
 516         bs->next = bare_shunts;
 517         bare_shunts = bs;
 518         DBG_bare_shunt("add", bs);
 519     }
 520
 521     /* actually initiate opportunism */
 522     {
 523         ip_address src, dst;
 524
 525         networkof(ours, &src);
 526         networkof(his, &dst);
 527         initiate_opportunistic(&src, &dst, TRUE, NULL_FD);
 528     }
 529
 530     /* if present, remove from orphaned_holds list.
 531      * NOTE: we do this last in case ours or his is a pointer into a member.
 532      */
 533     {
 534         struct eroute_info **pp, *p;
 535
 536         for (pp = &orphaned_holds; (p = *pp) != NULL; pp = &p->next)
 537         {
 538             if (samesubnet(ours, &p->ours) && samesubnet(his, &p->his))
 539             {
 540                 *pp = p->next;
 541                 pfree(p);
 542                 break;
 543             }
 544         }
 545     }
 546 }
 547
 548 /* Processs a SADB_ACQUIRE message from KLIPS.
 549  * Try to build an opportunistic connection!
 550  * See RFC 2367 "PF_KEY Key Management API, Version 2" 3.1.6
 551  * <base, address(SD), (address(P)), (identity(SD),) (sensitivity,) proposal>
 552  * - extensions for source and data IP addresses
 553  * - optional extensions for identity [not useful for us?]
 554  * - optional extension for sensitivity [not useful for us?]
 555  * - expension for proposal [not useful for us?]
 556  *
 557  * ??? We must use the sequence number in creating an SA.
 558  * We actually need to create up to 4 SAs each way.  Which one?
 559  * I guess it depends on the protocol present in the sadb_msg_satype.
 560  * For now, we'll ignore this requirement.
 561  *
 562  * ??? We need some mechanism to make sure that multiple ACQUIRE messages
 563  * don't cause a whole bunch of redundant negotiations.
 564  */
 565 static void
 566 process_pfkey_acquire(pfkey_buf *buf, struct sadb_ext *extensions[SADB_EXT_MAX + 1])
 567 {
 568     struct sadb_address *srcx = (void *) extensions[SADB_EXT_ADDRESS_SRC];
 569     struct sadb_address *dstx = (void *) extensions[SADB_EXT_ADDRESS_DST];
 570     ip_address src, dst;
 571     ip_subnet ours, his;
 572     err_t ugh = NULL;
 573
 574     /* assumption: we're only catching our own outgoing packets
 575      * so source is our end and destination is the other end.
 576      * Verifying this is not actually convenient.
 577      *
 578      * This stylized control structure yields a complaint or
 579      * desired results.  For compactness, a pointer value is
 580      * treated as a boolean.  Logically, the structure is:
 581      * keep going as long as things are OK.
 582      */
 583     if (buf->msg.sadb_msg_pid == 0      /* we only wish to hear from kernel */
 584     && !(ugh = sockaddr_to_ip_address((struct sockaddr *)(void *)&srcx[1], &src))
 585     && !(ugh = sockaddr_to_ip_address((struct sockaddr *)(void *)&dstx[1], &dst))
 586     && !(ugh = addrtypeof(&src) == addrtypeof(&dst)? NULL : "conflicting address types")
 587     && !(ugh = addrtosubnet(&src, &ours))
 588     && !(ugh = addrtosubnet(&dst, &his)))
 589         record_and_initiate_opportunistic(&ours, &his);
 590
 591     if (ugh != NULL)
 592         log("SADB_ACQUIRE message from KLIPS malformed: %s", ugh);
 593
 594 }
 595
 596 /* Handle PF_KEY messages from the kernel that are not dealt with
 597  * synchronously.  In other words, all but responses to PF_KEY messages
 598  * that we sent.
 599  */
 600 static void
 601 pfkey_async(pfkey_buf *buf)
 602 {
 603     struct sadb_ext *extensions[SADB_EXT_MAX + 1];
 604
 605     if (pfkey_msg_parse(&buf->msg, NULL, extensions, EXT_BITS_OUT))
 606     {
 607         log("pfkey_async:"
 608             " unparseable PF_KEY message:"
 609             " %s len=%d, errno=%d, seq=%d, pid=%d; message ignored"
 610             , sparse_val_show(pfkey_type_names, buf->msg.sadb_msg_type)
 611             , buf->msg.sadb_msg_len
 612             , buf->msg.sadb_msg_errno
 613             , buf->msg.sadb_msg_seq
 614             , buf->msg.sadb_msg_pid);
 615     }
 616     else
 617     {
 618         DBG(DBG_CONTROL | DBG_KLIPS, DBG_log("pfkey_async:"
 619             " %s len=%u, errno=%u, satype=%u, seq=%u, pid=%u"
 620             , sparse_val_show(pfkey_type_names, buf->msg.sadb_msg_type)
 621             , buf->msg.sadb_msg_len
 622             , buf->msg.sadb_msg_errno
 623             , buf->msg.sadb_msg_satype
 624             , buf->msg.sadb_msg_seq
 625             , buf->msg.sadb_msg_pid));
 626
 627         switch (buf->msg.sadb_msg_type)
 628         {
 629         case SADB_REGISTER:
 630             process_pfkey_register_response(buf);
 631             break;
 632         case SADB_ACQUIRE:
 633             /* to simulate loss of ACQUIRE, delete this call */
 634             process_pfkey_acquire(buf, extensions);
 635             break;
 636 #ifdef NAT_TRAVERSAL
 637         case SADB_X_NAT_T_NEW_MAPPING:
 638             process_pfkey_nat_t_new_mapping(&(buf->msg), extensions);
 639             break;
 640 #endif
 641         default:
 642             /* ignored */
 643             break;
 644         }
 645     }
 646 }
 647
 648 /* asynchronous messages from our queue */
 649 void
 650 pfkey_dequeue(void)
 651 {
 652     while (pfkey_iq_head != NULL)
 653     {
 654         pfkey_item *it = pfkey_iq_head;
 655
 656         pfkey_async(&it->buf);
 657         pfkey_iq_head = it->next;
 658         pfree(it);
 659     }
 660
 661     /* Handle any orphaned holds, but only if no pfkey input is pending.
 662      * For each, we initiate Opportunistic.
 663      * note: we don't need to advance the pointer because
 664      * record_and_initiate_opportunistic will remove the current
 665      * record each time we call it.
 666      */
 667     while (orphaned_holds != NULL && !pfkey_input_ready())
 668         record_and_initiate_opportunistic(&orphaned_holds->ours
 669             , &orphaned_holds->his);
 670
 671 }
 672
 673 /* asynchronous messages directly from PF_KEY socket */
 674 void
 675 pfkey_event(void)
 676 {
 677     pfkey_buf buf;
 678
 679     if (pfkey_get(&buf))
 680         pfkey_async(&buf);
 681 }
 682
 683 #endif /* KLIPS */
 684
 685 /* Generate Unique SPI numbers.
 686  *
 687  * The specs say that the number must not be less than IPSEC_DOI_SPI_MIN.
 688  * Pluto generates numbers not less than IPSEC_DOI_SPI_OUR_MIN,
 689  * reserving numbers in between for manual keying (but we cannot so
 690  * restrict numbers generated by our peer).
 691  * XXX This should be replaced by a call to the kernel when
 692  * XXX we get an API.
 693  * The returned SPI is in network byte order.
 694  * We use a random number as the initial SPI so that there is
 695  * a good chance that different Pluto instances will choose
 696  * different SPIs.  This is good for two reasons.
 697  * - the keying material for the initiator and responder only
 698  *   differs if the SPIs differ.
 699  * - if Pluto is restarted, it would otherwise recycle the SPI
 700  *   numbers and confuse everything.  When the kernel generates
 701  *   SPIs, this will no longer matter.
 702  * We then allocate numbers sequentially.  Thus we don't have to
 703  * check if the number was previously used (assuming that no
 704  * SPI lives longer than 4G of its successors).
 705  */
 706 ipsec_spi_t
 707 get_ipsec_spi(ipsec_spi_t avoid)
 708 {
 709     static ipsec_spi_t spi = 0; /* host order, so not returned directly! */
 710
 711     spi++;
 712     while (spi < IPSEC_DOI_SPI_OUR_MIN || spi == ntohl(avoid))
 713         get_rnd_bytes((u_char *)&spi, sizeof(spi));
 714
 715     DBG(DBG_CONTROL,
 716         {
 717             ipsec_spi_t spi_net = htonl(spi);
 718
 719             DBG_dump("generate SPI:", (u_char *)&spi_net, sizeof(spi_net));
 720         });
 721
 722     return htonl(spi);
 723 }
 724
 725 /* Generate Unique CPI numbers.
 726  * The result is returned as an SPI (4 bytes) in network order!
 727  * The real bits are in the nework-low-order 2 bytes.
 728  * Modelled on get_ipsec_spi, but range is more limited:
 729  * 256-61439.
 730  * If we can't find one easily, return 0 (a bad SPI,
 731  * no matter what order) indicating failure.
 732  */
 733 ipsec_spi_t
 734 get_my_cpi(void)
 735 {
 736     static cpi_t
 737         first_busy_cpi = 0,
 738         latest_cpi;
 739
 740     while (!(IPCOMP_FIRST_NEGOTIATED <= first_busy_cpi && first_busy_cpi < IPCOMP_LAST_NEGOTIATED))
 741     {
 742         get_rnd_bytes((u_char *)&first_busy_cpi, sizeof(first_busy_cpi));
 743         latest_cpi = first_busy_cpi;
 744     }
 745
 746     latest_cpi++;
 747
 748     if (latest_cpi == first_busy_cpi)
 749         find_my_cpi_gap(&latest_cpi, &first_busy_cpi);
 750
 751     if (latest_cpi > IPCOMP_LAST_NEGOTIATED)
 752         latest_cpi = IPCOMP_FIRST_NEGOTIATED;
 753
 754     return htonl((ipsec_spi_t)latest_cpi);
 755 }
 756
 757 /* invoke the updown script to do the routing and firewall commands required
 758  *
 759  * The user-specified updown script is run.  Parameters are fed to it in
 760  * the form of environment variables.  All such environment variables
 761  * have names starting with "PLUTO_".
 762  *
 763  * The operation to be performed is specified by PLUTO_VERB.  This
 764  * verb has a suffix "-host" if the client on this end is just the
 765  * host; otherwise the suffix is "-client".  If the address family
 766  * of the host is IPv6, an extra suffix of "-v6" is added.
 767  *
 768  * "prepare-host" and "prepare-client" are used to delete a route
 769  * that may exist (due to forces outside of Pluto).  It is used to
 770  * prepare for pluto creating a route.
 771  *
 772  * "route-host" and "route-client" are used to install a route.
 773  * Since routing is based only on destination, the PLUTO_MY_CLIENT_*
 774  * values are probably of no use (using them may signify a bug).
 775  *
 776  * "unroute-host" and "unroute-client" are used to delete a route.
 777  * Since routing is based only on destination, the PLUTO_MY_CLIENT_*
 778  * values are probably of no use (using them may signify a bug).
 779  *
 780  * "up-host" and "up-client" are run when an eroute is added (not replaced).
 781  * They are useful for adjusting a firewall: usually for adding a rule
 782  * to let processed packets flow between clients.  Note that only
 783  * one eroute may exist for a pair of client subnets but inbound
 784  * IPsec SAs may persist without an eroute.
 785  *
 786  * "down-host" and "down-client" are run when an eroute is deleted.
 787  * They are useful for adjusting a firewall.
 788  */
 789
 790 #ifndef DEFAULT_UPDOWN
 791 # define DEFAULT_UPDOWN "ipsec _updown"
 792 #endif
 793
 794 static bool
 795 do_command(struct connection *c, const char *verb)
 796 {
 797     char cmd[1536];     /* arbitrary limit on shell command length */
 798     const char *verb_suffix;
 799
 800     /* figure out which verb suffix applies */
 801     {
 802         const char *hs, *cs;
 803
 804         switch (addrtypeof(&c->this.host_addr))
 805         {
 806             case AF_INET:
 807                 hs = "-host";
 808                 cs = "-client";
 809                 break;
 810             case AF_INET6:
 811                 hs = "-host-v6";
 812                 cs = "-client-v6";
 813                 break;
 814             default:
 815                 loglog(RC_LOG_SERIOUS, "unknown address family");
 816                 return FALSE;
 817         }
 818         verb_suffix = subnetishost(&c->this.client) && addrinsubnet(&c->this.host_addr, &c->this.client)
 819             ? hs : cs;
 820     }
 821
 822     /* form the command string */
 823     {
 824         const ip_subnet *epc = EffectivePeerClient(c);
 825         char
 826             nexthop_str[ADDRTOT_BUF],
 827             me_str[ADDRTOT_BUF],
 828             myid_str[IDTOA_BUF],
 829             myclient_str[SUBNETTOT_BUF],
 830             myclientnet_str[ADDRTOT_BUF],
 831             myclientmask_str[ADDRTOT_BUF],
 832             peer_str[ADDRTOT_BUF],
 833             peerid_str[IDTOA_BUF],
 834             peerclient_str[SUBNETTOT_BUF],
 835             peerclientnet_str[ADDRTOT_BUF],
 836             peerclientmask_str[ADDRTOT_BUF];
 837         ip_address ta;
 838
 839         addrtot(&c->this.host_nexthop, 0, nexthop_str, sizeof(nexthop_str));
 840
 841         addrtot(&c->this.host_addr, 0, me_str, sizeof(me_str));
 842         idtoa(&c->this.id, myid_str, sizeof(myid_str));
 843         subnettot(&c->this.client, 0, myclient_str, sizeof(myclientnet_str));
 844         networkof(&c->this.client, &ta);
 845         addrtot(&ta, 0, myclientnet_str, sizeof(myclientnet_str));
 846         maskof(&c->this.client, &ta);
 847         addrtot(&ta, 0, myclientmask_str, sizeof(myclientmask_str));
 848
 849         addrtot(&c->that.host_addr, 0, peer_str, sizeof(peer_str));
 850         idtoa(&c->that.id, peerid_str, sizeof(peerid_str));
 851         subnettot(epc, 0, peerclient_str, sizeof(peerclientnet_str));
 852         networkof(epc, &ta);
 853         addrtot(&ta, 0, peerclientnet_str, sizeof(peerclientnet_str));
 854         maskof(epc, &ta);
 855         addrtot(&ta, 0, peerclientmask_str, sizeof(peerclientmask_str));
 856
 857 #ifdef SIMPLE_SCRIPTS
 858 # define SE "setenv "
 859 # define EQ " "
 860 # define NL "\n"
 861 #else
 862 # define SE ""
 863 # define EQ "="
 864 # define NL " "
 865 #endif
 866         if (-1 == snprintf(cmd, sizeof(cmd),
 867 #ifdef SIMPLE_SCRIPTS
 868             "#!/bin/sh" NL
 869 #endif
 870             /* change VERSION when interface spec changes */
 871             SE "PLUTO_VERSION"          EQ "'1.1'" NL
 872             SE "PLUTO_VERB"             EQ "'%s%s'" NL
 873             SE "PLUTO_CONNECTION"       EQ "'%s'" NL
 874             SE "PLUTO_NEXT_HOP"         EQ "'%s'" NL
 875             SE "PLUTO_INTERFACE"        EQ "'%s'" NL
 876             SE "PLUTO_ME"               EQ "'%s'" NL
 877             SE "PLUTO_MY_ID"            EQ "'%s'" NL
 878             SE "PLUTO_MY_CLIENT"        EQ "'%s'" NL
 879             SE "PLUTO_MY_CLIENT_NET"    EQ "'%s'" NL
 880             SE "PLUTO_MY_CLIENT_MASK"   EQ "'%s'" NL
 881             SE "PLUTO_MY_PORT"          EQ "'%u'" NL
 882             SE "PLUTO_MY_PROTOCOL"      EQ "'%u'" NL
 883             SE "PLUTO_PEER"             EQ "'%s'" NL
 884             SE "PLUTO_PEER_ID"          EQ "'%s'" NL
 885             SE "PLUTO_PEER_CLIENT"      EQ "'%s'" NL
 886             SE "PLUTO_PEER_CLIENT_NET"  EQ "'%s'" NL
 887             SE "PLUTO_PEER_CLIENT_MASK" EQ "'%s'" NL
 888             SE "PLUTO_PEER_PORT"        EQ "'%u'" NL
 889             SE "PLUTO_PEER_PROTOCOL"    EQ "'%u'" NL
 890             "%s"                                  NL    /* actual script */
 891 #ifndef SIMPLE_SCRIPTS
 892             "2>&1 "     /* capture stderr along with stdout */
 893 #endif
 894             , verb, verb_suffix
 895             , c->name
 896             , nexthop_str
 897             , c->interface->vname
 898             , me_str
 899             , myid_str
 900             , myclient_str
 901             , myclientnet_str
 902             , myclientmask_str
 903             , c->this.port
 904             , c->this.protocol
 905             , peer_str
 906             , peerid_str
 907             , peerclient_str
 908             , peerclientnet_str
 909             , peerclientmask_str
 910             , c->that.port
 911             , c->that.protocol
 912             , c->this.updown == NULL? DEFAULT_UPDOWN : c->this.updown))
 913         {
 914             loglog(RC_LOG_SERIOUS, "%s%s command too long!", verb, verb_suffix);
 915             return FALSE;
 916         }
 917     }
 918
 919     DBG(DBG_CONTROL, DBG_log("executing %s%s: %s"
 920         , verb, verb_suffix, cmd));
 921
 922 #ifdef KLIPS
 923
 924 #ifdef SIMPLE_SCRIPTS
 925     if (!no_klips)
 926     {
 927         const char *temp_name = "/var/log/pluto_script";
 928         int fd;
 929
 930         unlink(temp_name);
 931         fd = open(temp_name, O_WRONLY|O_CREAT|O_TRUNC, 0777);
 932         if (fd == -1) {
 933             loglog(RC_LOG_SERIOUS, "unable to open %s", temp_name);
 934             return FALSE;
 935         }
 936         if (write(fd, cmd, strlen(cmd)) != strlen(cmd)) {
 937             loglog(RC_LOG_SERIOUS, "unable to write to %s", temp_name);
 938             return FALSE;
 939         }
 940         if (close(fd) == -1) {
 941             loglog(RC_LOG_SERIOUS, "unable to close %s", temp_name);
 942             return FALSE;
 943         }
 944         strncpy(cmd, temp_name, sizeof(cmd));
 945     }
 946 #endif
 947
 948     if (!no_klips)
 949     {
 950         /* invoke the script, catching stderr and stdout
 951          * It may be of concern that some file descriptors will
 952          * be inherited.  For the ones under our control, we
 953          * have done fcntl(fd, F_SETFD, FD_CLOEXEC) to prevent this.
 954          * Any used by library routines (perhaps the resolver or syslog)
 955          * will remain.
 956          */
 957         FILE *f = popen(cmd, "r");
 958
 959         if (f == NULL)
 960         {
 961             loglog(RC_LOG_SERIOUS, "unable to popen %s%s command", verb, verb_suffix);
 962 #ifdef SIMPLE_SCRIPTS
 963             unlink(cmd); /* don't need it now */
 964 #endif
 965             return FALSE;
 966         }
 967
 968         /* log any output */
 969         for (;;)
 970         {
 971             /* if response doesn't fit in this buffer, it will be folded */
 972             char resp[256];
 973
 974             if (fgets(resp, sizeof(resp), f) == NULL)
 975             {
 976                 if (ferror(f))
 977                 {
 978                     log_errno((e, "fgets failed on output of %s%s command"
 979                         , verb, verb_suffix));
 980 #ifdef SIMPLE_SCRIPTS
 981                     unlink(cmd); /* don't need it now */
 982 #endif
 983                     return FALSE;
 984                 }
 985                 else
 986                 {
 987                     passert(feof(f));
 988                     break;
 989                 }
 990             }
 991             else
 992             {
 993                 char *e = resp + strlen(resp);
 994
 995                 if (e > resp && e[-1] == '\n')
 996                     e[-1] = '\0';       /* trim trailing '\n' */
 997                 //log("%s%s output: %s", verb, verb_suffix, resp);
 998             }
 999         }
1000
1001 #ifdef SIMPLE_SCRIPTS
1002         unlink(cmd); /* don't need it now */
1003 #endif
1004         /* report on and react to return code */
1005         {
1006             int r = pclose(f);
1007
1008             if (r == -1)
1009             {
1010                 if (errno ==  ECHILD)
1011                         return TRUE;
1012                 log_errno((e, "pclose failed for %s%s command"
1013                     , verb, verb_suffix));
1014                 return FALSE;
1015             }
1016             else if (WIFEXITED(r))
1017             {
1018                 if (WEXITSTATUS(r) != 0)
1019                 {
1020                     loglog(RC_LOG_SERIOUS, "%s%s command exited with status %d"
1021                         , verb, verb_suffix, WEXITSTATUS(r));
1022                     return FALSE;
1023                 }
1024             }
1025             else if (WIFSIGNALED(r))
1026             {
1027                 loglog(RC_LOG_SERIOUS, "%s%s command exited with signal %d"
1028                     , verb, verb_suffix, WTERMSIG(r));
1029                 return FALSE;
1030             }
1031             else
1032             {
1033                 loglog(RC_LOG_SERIOUS, "%s%s command exited with unknown status %d"
1034                     , verb, verb_suffix, r);
1035                 return FALSE;
1036             }
1037         }
1038     }
1039 #endif /* KLIPS */
1040     return TRUE;
1041 }
1042
1043 /* Check that we can route (and eroute).  Diagnose if we cannot. */
1044
1045 static bool
1046 could_route(struct connection *c)
1047 {
1048     struct connection *ero      /* who, if anyone, owns our eroute? */
1049         , *ro = route_owner(c, &ero);   /* who owns our route? */
1050
1051 #if 0
1052     /* if this is a Road Warrior template, we cannot route.
1053      * Opportunistic template is OK.
1054      */
1055     if (c->kind == CK_TEMPLATE && !(c->policy & POLICY_OPPO))
1056     {
1057         loglog(RC_ROUTE, "cannot route Road Warrior template");
1058         return FALSE;
1059     }
1060 #endif
1061
1062     /* if we don't know nexthop, we cannot route */
1063     if (isanyaddr(&c->this.host_nexthop))
1064     {
1065         loglog(RC_ROUTE, "cannot route connection without knowing our nexthop");
1066         return FALSE;
1067     }
1068
1069     /* if routing would affect IKE messages, reject */
1070     if (!no_klips
1071 #ifdef NAT_TRAVERSAL
1072     && c->this.host_port != NAT_T_IKE_FLOAT_PORT
1073 #endif
1074     && c->this.host_port != IKE_UDP_PORT
1075     && addrinsubnet(&c->that.host_addr, EffectivePeerClient(c)))
1076     {
1077         loglog(RC_LOG_SERIOUS, "cannot install route: peer is within its client");
1078         return FALSE;
1079     }
1080
1081     /* If there is already a route for peer's client subnet
1082      * and it disagrees about interface or nexthop, we cannot steal it.
1083      * Note: if this connection is already routed (perhaps for another
1084      * state object), the route will agree.
1085      * This is as it should be -- it will arise during rekeying.
1086      */
1087     if (ro != NULL && !routes_agree(ro, c))
1088     {
1089         loglog(RC_LOG_SERIOUS, "cannot route -- route already in use for \"%s\""
1090             , ro->name);
1091         return FALSE;   /* another connection already using the eroute */
1092     }
1093
1094 #ifdef KLIPS
1095     /* if there is an eroute for another connection, there is a problem */
1096     if (ero != NULL && ero != c)
1097     {
1098         char inst[CONN_INST_BUF];
1099
1100         fmt_conn_instance(ero, inst);
1101
1102         loglog(RC_LOG_SERIOUS
1103             , "cannot install eroute -- it is in use for \"%s\"%s #%lu"
1104             , ero->name, inst, ero->eroute_owner);
1105         return FALSE;   /* another connection already using the eroute */
1106     }
1107 #endif /* KLIPS */
1108     return TRUE;
1109 }
1110
1111 bool
1112 trap_connection(struct connection *c)
1113 {
1114     /* RT_ROUTED_TUNNEL is treated specially: we don't override
1115      * because we don't want to lose track of the IPSEC_SAs etc.
1116      */
1117     return could_route(c)
1118         && (c->routing == RT_ROUTED_TUNNEL || route_and_eroute(c, NULL));
1119 }
1120
1121 /* delete any eroute for a connection and unroute it if route isn't shared */
1122 void
1123 unroute_connection(struct connection *c)
1124 {
1125     enum routing_t cr = c->routing;
1126
1127     if (erouted(cr))
1128     {
1129         passert(cr != RT_ROUTED_TUNNEL);        /* cannot handle a live one */
1130 #ifdef KLIPS
1131         shunt_eroute(c, ERO_DELETE, "delete");
1132 #endif
1133     }
1134
1135     c->routing = RT_UNROUTED;   /* do now so route_owner won't find us */
1136
1137     /* only unroute if no other connection shares it */
1138     if (routed(cr)
1139     && route_owner(c, NULL) == NULL)
1140         (void) do_command(c, "unroute");
1141 }
1142
1143
1144 #ifdef KLIPS
1145
1146 static void
1147 set_text_said(char *text_said, const ip_address *dst, ipsec_spi_t spi, int proto)
1148 {
1149     ip_said said;
1150
1151     initsaid(dst, spi, proto, &said);
1152     satot(&said, 0, text_said, SATOT_BUF);
1153 }
1154
1155 static bool
1156 pfkey_build(int error
1157 , const char *description
1158 , const char *text_said
1159 , struct sadb_ext *extensions[SADB_EXT_MAX + 1])
1160 {
1161     if (error == 0)
1162     {
1163         return TRUE;
1164     }
1165     else
1166     {
1167         loglog(RC_LOG_SERIOUS, "building of %s %s failed, code %d"
1168             , description, text_said, error);
1169         pfkey_extensions_free(extensions);
1170         return FALSE;
1171     }
1172 }
1173
1174 /* pfkey_extensions_init + pfkey_build + pfkey_msg_hdr_build */
1175 static bool
1176 pfkey_msg_start(u_int8_t msg_type
1177 , u_int8_t satype
1178 , const char *description
1179 , const char *text_said
1180 , struct sadb_ext *extensions[SADB_EXT_MAX + 1])
1181 {
1182     pfkey_extensions_init(extensions);
1183     return pfkey_build(pfkey_msg_hdr_build(&extensions[0], msg_type
1184             , satype, 0, ++pfkey_seq, pid)
1185         , description, text_said, extensions);
1186 }
1187
1188 /* pfkey_build + pfkey_address_build */
1189 static bool
1190 pfkeyext_address(u_int16_t exttype
1191 , const ip_address *address
1192 , const char *description
1193 , const char *text_said
1194 , struct sadb_ext *extensions[SADB_EXT_MAX + 1])
1195 {
1196     /* the following variable is only needed to silence
1197      * a warning caused by the fact that the argument
1198      * to sockaddrof is NOT pointer to const!
1199      */
1200     ip_address t = *address;
1201
1202     return pfkey_build(pfkey_address_build(extensions + exttype
1203             , exttype, 0, 0, sockaddrof(&t))
1204         , description, text_said, extensions);
1205 }
1206
1207 /* Finish (building, sending, accepting response for) PF_KEY message.
1208  * If response isn't NULL, the response from the kernel will be
1209  * placed there (and its errno field will not be examined).
1210  * Returns TRUE iff all appears well.
1211  */
1212 static bool
1213 finish_pfkey_msg(struct sadb_ext *extensions[SADB_EXT_MAX + 1]
1214 , const char *description
1215 , const char *text_said
1216 , pfkey_buf *response)
1217 {
1218     struct sadb_msg *pfkey_msg;
1219     bool success = TRUE;
1220     int error = pfkey_msg_build(&pfkey_msg, extensions, EXT_BITS_IN);
1221
1222     if (error != 0)
1223     {
1224         loglog(RC_LOG_SERIOUS, "pfkey_msg_build of %s %s failed, code %d"
1225             , description, text_said, error);
1226         success = FALSE;
1227     }
1228     else
1229     {
1230         size_t len = pfkey_msg->sadb_msg_len * IPSEC_PFKEYv2_ALIGN;
1231
1232         DBG(DBG_KLIPS,
1233             DBG_log("finish_pfkey_msg: %s message %u for %s %s"
1234                 , sparse_val_show(pfkey_type_names, pfkey_msg->sadb_msg_type)
1235                 , pfkey_msg->sadb_msg_seq
1236                 , description, text_said);
1237             DBG_dump(NULL, (void *) pfkey_msg, len));
1238
1239         if (!no_klips)
1240         {
1241             ssize_t r = write(pfkeyfd, pfkey_msg, len);
1242
1243             if (r != (ssize_t)len)
1244             {
1245                 if (r < 0)
1246                 {
1247                     if (!strcmp("SADB_DELETE", sparse_val_show(pfkey_type_names
1248                             , pfkey_msg->sadb_msg_type))) {
1249                             log("This connection is probably expecting a road warrior or the IP address"
1250                                 " of the remote host's DNS hostname has changed");
1251                     }
1252                     log_errno((e
1253                         , "pfkey write() of %s message %u"
1254                           " for %s %s failed"
1255                         , sparse_val_show(pfkey_type_names
1256                             , pfkey_msg->sadb_msg_type)
1257                         , pfkey_msg->sadb_msg_seq
1258                         , description, text_said));
1259                 }
1260                 else
1261                 {
1262                     loglog(RC_LOG_SERIOUS
1263                         , "ERROR: pfkey write() of %s message %u"
1264                           " for %s %s truncated: %ld instead of %ld"
1265                         , sparse_val_show(pfkey_type_names
1266                             , pfkey_msg->sadb_msg_type)
1267                         , pfkey_msg->sadb_msg_seq
1268                         , description, text_said
1269                         , (long)r, (long)len);
1270                 }
1271                 success = FALSE;
1272
1273                 /* if we were compiled with debugging, but we haven't already
1274                  * dumped the KLIPS command, do so.
1275                  */
1276 #ifdef DEBUG
1277                 if ((cur_debugging & DBG_KLIPS) == 0)
1278                     DBG_dump(NULL, (void *) pfkey_msg, len);
1279 #endif
1280             }
1281             else
1282             {
1283                 /* Check response from KLIPS.
1284                  * It ought to be an echo, perhaps with additional info.
1285                  * If the caller wants it, response will point to space.
1286                  */
1287                 pfkey_buf b;
1288                 pfkey_buf *bp = response != NULL? response : &b;
1289
1290                 if (!pfkey_get_response(bp, ((struct sadb_msg *) extensions[0])->sadb_msg_seq))
1291                 {
1292                     loglog(RC_LOG_SERIOUS
1293                         , "ERROR: no response to our PF_KEY %s message for %s %s"
1294                         , sparse_val_show(pfkey_type_names, pfkey_msg->sadb_msg_type)
1295                         , description, text_said);
1296                     success = FALSE;
1297                 }
1298                 else if (pfkey_msg->sadb_msg_type != bp->msg.sadb_msg_type)
1299                 {
1300                     loglog(RC_LOG_SERIOUS
1301                         , "FreeS/WAN ERROR: response to our PF_KEY %s message for %s %s was of wrong type (%s)"
1302                         , sparse_name(pfkey_type_names, pfkey_msg->sadb_msg_type)
1303                         , description, text_said
1304                         , sparse_val_show(pfkey_type_names, bp->msg.sadb_msg_type));
1305                     success = FALSE;
1306                 }
1307                 else if (response == NULL && bp->msg.sadb_msg_errno != 0)
1308                 {
1309                     /* KLIPS is signalling a problem */
1310                     loglog(RC_LOG_SERIOUS
1311                         , "ERROR: PF_KEY %s response for %s %s included errno %u: %s"
1312                         , sparse_val_show(pfkey_type_names, pfkey_msg->sadb_msg_type)
1313                         , description, text_said
1314                         , (unsigned) bp->msg.sadb_msg_errno
1315                         , strerror(bp->msg.sadb_msg_errno));
1316                     success = FALSE;
1317                 }
1318             }
1319         }
1320     }
1321
1322     /* all paths must exit this way to free resources */
1323     pfkey_extensions_free(extensions);
1324     pfkey_msg_free(&pfkey_msg);
1325     return success;
1326 }
1327
1328 /*  register SA types that can be negotiated */
1329 static void
1330 pfkey_register_proto(uint8_t satype, const char *satypename)
1331 {
1332     struct sadb_ext *extensions[SADB_EXT_MAX + 1];
1333     pfkey_buf pfb;
1334
1335     if (!(pfkey_msg_start(SADB_REGISTER
1336       , satype
1337       , satypename, NULL, extensions)
1338     && finish_pfkey_msg(extensions, satypename, "", &pfb)))
1339     {
1340         /* ??? should this be loglog */
1341         log("no KLIPS support for %s", satypename);
1342     }
1343     else
1344     {
1345         process_pfkey_register_response(&pfb);
1346         DBG(DBG_KLIPS,
1347             DBG_log("%s registered with kernel.", satypename));
1348     }
1349 }
1350
1351 void
1352 pfkey_register(void)
1353 {
1354     pfkey_register_proto(SADB_SATYPE_AH, "AH");
1355     pfkey_register_proto(SADB_SATYPE_ESP, "ESP");
1356     can_do_IPcomp = FALSE;  /* until we get a response from KLIPS */
1357     pfkey_register_proto(SADB_X_SATYPE_COMP, "IPCOMP");
1358     pfkey_register_proto(SADB_X_SATYPE_IPIP, "IPIP");
1359 }
1360
1361 /* find an entry in the bare_shunt table.
1362  * Trick: return a pointer to the pointer to the entry;
1363  * this allows the entry to be deleted.
1364  */
1365 static struct bare_shunt **
1366 bare_shunt_ptr(const ip_subnet *ours, const ip_subnet *his)
1367 {
1368     struct bare_shunt *p, **pp;
1369
1370     for (pp = &bare_shunts; (p = *pp) != NULL; pp = &p->next)
1371         if (samesubnet(ours, &p->ours)
1372         && samesubnet(his, &p->his))
1373             return pp;
1374     return NULL;
1375 }
1376
1377 /* free a bare_shunt entry, given a pointer to the pointer */
1378 static void
1379 free_bare_shunt(struct bare_shunt **pp)
1380 {
1381     struct bare_shunt *p = *pp;
1382
1383     *pp = p->next;
1384     DBG_bare_shunt("delete", p);
1385     pfree(p);
1386 }
1387
1388 /* Setup an IPsec route entry. Code taken from addrt.c.
1389  * We are only dealing with outbound SAs.
1390  * op is one of the ERO_* operators.
1391  */
1392
1393 static bool
1394 raw_eroute(const ip_address *this_host, const ip_subnet *this_client
1395 , const ip_address *that_host, const ip_subnet *that_client
1396 , ipsec_spi_t spi, unsigned int proto, unsigned int satype
1397 , unsigned int op, const char *opname USED_BY_DEBUG)
1398 {
1399     struct sadb_ext *extensions[SADB_EXT_MAX + 1];
1400     ip_address
1401         sflow_ska,
1402         dflow_ska,
1403         smask_ska,
1404         dmask_ska;
1405     char text_said[SATOT_BUF];
1406
1407     set_text_said(text_said, that_host, spi, proto);
1408
1409     networkof(this_client, &sflow_ska);
1410     maskof(this_client, &smask_ska);
1411
1412     networkof(that_client, &dflow_ska);
1413     maskof(that_client, &dmask_ska);
1414
1415     DBG(DBG_CONTROL | DBG_KLIPS,
1416         {
1417             char mybuf[SUBNETTOT_BUF];
1418             char peerbuf[SUBNETTOT_BUF];
1419
1420             subnettot(this_client, 0, mybuf, sizeof(mybuf));
1421             subnettot(that_client, 0, peerbuf, sizeof(peerbuf));
1422             DBG_log("%s eroute %s -> %s => %s"
1423                 , opname, mybuf, peerbuf, text_said);
1424         });
1425
1426     return pfkey_msg_start(op & ERO_MASK, satype
1427         , "pfkey_msg_hdr flow", text_said, extensions)
1428
1429     && (op == ERO_DELETE
1430         || (pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
1431                 , SADB_EXT_SA
1432                 , spi   /* in network order */
1433                 , 0, 0, 0, 0, op >> ERO_FLAG_SHIFT)
1434             , "pfkey_sa add flow", text_said, extensions)
1435
1436             && pfkeyext_address(SADB_EXT_ADDRESS_SRC, this_host
1437                 , "pfkey_addr_s add flow", text_said, extensions)
1438
1439             && pfkeyext_address(SADB_EXT_ADDRESS_DST, that_host
1440                 , "pfkey_addr_d add flow", text_said, extensions)))
1441
1442     && pfkeyext_address(SADB_X_EXT_ADDRESS_SRC_FLOW, &sflow_ska
1443         , "pfkey_addr_sflow", text_said, extensions)
1444
1445     && pfkeyext_address(SADB_X_EXT_ADDRESS_DST_FLOW, &dflow_ska
1446         , "pfkey_addr_dflow", text_said, extensions)
1447
1448     && pfkeyext_address(SADB_X_EXT_ADDRESS_SRC_MASK, &smask_ska
1449         , "pfkey_addr_smask", text_said, extensions)
1450
1451     && pfkeyext_address(SADB_X_EXT_ADDRESS_DST_MASK, &dmask_ska
1452         , "pfkey_addr_dmask", text_said, extensions)
1453
1454     && finish_pfkey_msg(extensions, "flow", text_said, NULL);
1455 }
1456
1457 /* Replace (or delete) a shunt that is in the bare_shunts table.
1458  * Issues the PF_KEY commands and updates the bare_shunts table.
1459  */
1460 bool
1461 replace_bare_shunt(const ip_address *src, const ip_address *dst
1462 , ipsec_spi_t shunt_spi /* in host order! */
1463 , bool repl, const char *opname)
1464 {
1465     ip_subnet this_client, that_client;
1466     const ip_address *null_host = aftoinfo(addrtypeof(src))->any;
1467     unsigned int op = repl? ERO_REPLACE : ERO_DELETE;
1468
1469     passert(addrtypeof(src) == addrtypeof(dst));
1470     happy(addrtosubnet(src, &this_client));
1471     happy(addrtosubnet(dst, &that_client));
1472
1473     if (raw_eroute(null_host, &this_client, null_host, &that_client
1474     , htonl(shunt_spi), SA_INT, SADB_X_SATYPE_INT, op, opname))
1475     {
1476         struct bare_shunt **bs_pp = bare_shunt_ptr(&this_client, &that_client);
1477
1478         if (repl)
1479         {
1480             /* change over to new bare eroute */
1481             struct bare_shunt *bs = *bs_pp;
1482
1483             bs->said.spi = htonl(shunt_spi);
1484             bs->said.proto = SA_INT;
1485             bs->said.dst = *null_host;
1486             bs->count = 0;
1487             bs->last_activity = now();
1488             DBG_bare_shunt("change", bs);
1489         }
1490         else
1491         {
1492             /* delete bare eroute */
1493             free_bare_shunt(bs_pp);
1494         }
1495         return TRUE;
1496     }
1497     else
1498     {
1499         return FALSE;
1500     }
1501 }
1502
1503 static bool
1504 eroute_connection(struct connection *c
1505 , ipsec_spi_t spi, unsigned int proto, unsigned int satype
1506 , unsigned int op, const char *opname)
1507 {
1508     const ip_address *peer = &c->that.host_addr;
1509
1510     return raw_eroute(&c->this.host_addr, &c->this.client
1511         , proto == SA_INT? aftoinfo(addrtypeof(peer))->any : peer
1512         , EffectivePeerClient(c)
1513         , spi, proto, satype, op, opname);
1514 }
1515
1516 /* assign a bare hold to a connection */
1517
1518 bool
1519 assign_hold(struct connection *c
1520 , const ip_address *src, const ip_address *dst)
1521 {
1522     /* either the automatically installed %hold eroute is broad enough
1523      * or we try to add a broader one and delete the automatic one.
1524      * Beware: this %hold might be already handled, but still squeak
1525      * through because of a race.
1526      */
1527     enum routing_t ro = c->routing      /* routing, old */
1528         , rn = ro;      /* routing, new */
1529
1530     /* figure out what routing should become */
1531     switch (ro)
1532     {
1533     case RT_UNROUTED:
1534         rn = RT_UNROUTED_HOLD;
1535         break;
1536     case RT_ROUTED_PROSPECTIVE:
1537         rn = RT_ROUTED_HOLD;
1538         break;
1539     default:
1540         /* no change: this %hold is old news and should just be deleted */
1541         break;
1542     }
1543
1544     if (subnetishost(&c->this.client) && subnetishost(&c->that.client))
1545     {
1546         /* although %hold is appropriately broad, it will no longer be bare
1547          * so we must ditch it from the bare table.
1548          */
1549         free_bare_shunt(bare_shunt_ptr(&c->this.client, &c->that.client));
1550     }
1551     else
1552     {
1553         /* we need a broad %hold, not the narrow one.
1554          * First we ensure that there is a broad %hold.
1555          * There may already be one (race condition): no need to create one.
1556          * There may already be a %trap: replace it.
1557          * There may not be any broad eroute: add %hold.
1558          * Once the broad %hold is in place, delete the narrow one.
1559          */
1560         if (rn != ro)
1561         {
1562             if (erouted(ro)
1563             ? !eroute_connection(c, htonl(SPI_HOLD), SA_INT, SADB_X_SATYPE_INT
1564                 , ERO_REPLACE, "replace %trap with broad %hold")
1565             : !eroute_connection(c, htonl(SPI_HOLD), SA_INT, SADB_X_SATYPE_INT
1566                 , ERO_ADD, "add broad %hold"))
1567             {
1568                 return FALSE;
1569             }
1570         }
1571         if (!replace_bare_shunt(src, dst, SPI_HOLD, FALSE, "delete narrow %hold"))
1572             return FALSE;
1573     }
1574     c->routing = rn;
1575     return TRUE;
1576 }
1577
1578 /* install or remove eroute for SA Group */
1579 static bool
1580 sag_eroute(struct state *st, unsigned op, const char *opname)
1581 {
1582     struct connection *c = st->st_connection;
1583     unsigned int
1584         inner_proto,
1585         inner_satype;
1586     ipsec_spi_t inner_spi;
1587
1588     /* figure out the SPI and protocol (in two forms)
1589      * for the innermost transformation.
1590      */
1591
1592     if (st->st_ah.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
1593     || st->st_esp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
1594     || st->st_ipcomp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL)
1595     {
1596         inner_spi = st->st_tunnel_out_spi;
1597         inner_proto = SA_IPIP;
1598         inner_satype = SADB_X_SATYPE_IPIP;
1599     }
1600     else if (st->st_ipcomp.present)
1601     {
1602         inner_spi = st->st_ipcomp.attrs.spi;
1603         inner_proto = SA_COMP;
1604         inner_satype = SADB_X_SATYPE_COMP;
1605     }
1606     else if (st->st_esp.present)
1607     {
1608         inner_spi = st->st_esp.attrs.spi;
1609         inner_proto = SA_ESP;
1610         inner_satype = SADB_SATYPE_ESP;
1611     }
1612     else if (st->st_ah.present)
1613     {
1614         inner_spi = st->st_ah.attrs.spi;
1615         inner_proto = SA_AH;
1616         inner_satype = SADB_SATYPE_AH;
1617     }
1618     else
1619     {
1620         impossible();   /* no transform at all! */
1621     }
1622
1623     return eroute_connection(c
1624         , inner_spi, inner_proto, inner_satype
1625         , op, opname);
1626 }
1627
1628 /* Add/replace/delete a shunt eroute.
1629  * Such an eroute determines the fate of packets without the use
1630  * of any SAs.  These are defaults, in effect.
1631  * If a negotiation has not been attempted, use %trap.
1632  * If negotiation has failed, the choice between %trap/%pass/%drop/%reject
1633  * is specified in the policy of connection c.
1634  */
1635 static bool
1636 shunt_eroute(struct connection *c, unsigned int op, const char *opname)
1637 {
1638     /* We are constructing a special SAID for the eroute.
1639      * The destination doesn't seem to matter, but the family does.
1640      * The protocol is SA_INT -- mark this as shunt.
1641      * The satype has no meaning, but is required for PF_KEY header!
1642      * The SPI signifies the kind of shunt.
1643      */
1644
1645     /* note: these are in host order :-( */
1646     static ipsec_spi_t shunt_spi[] = { SPI_TRAP /* */
1647         , SPI_PASS      /* --pass */
1648         , SPI_DROP      /* --drop */
1649         , SPI_REJECT }; /* --pass --drop */
1650
1651     ipsec_spi_t spi = c->routing == RT_ROUTED_PROSPECTIVE? SPI_TRAP
1652         : shunt_spi[(c->policy & POLICY_OPPO_MASK) >> POLICY_OPPO_SHIFT];
1653
1654     return eroute_connection(c, htonl(spi), SA_INT, SADB_X_SATYPE_INT, op, opname);
1655 }
1656
1657 /* scan /proc/net/ipsec_eroute every once in a while, looking for:
1658  *
1659  * - %hold shunts of which Pluto isn't aware.  This situation could
1660  *   be caused by lost ACQUIRE messages.  When found, they will
1661  *   added to orphan_holds.  This in turn will lead to Opportunistic
1662  *   initiation.
1663  *
1664  * - %pass shunts that haven't been used recently.  These will be
1665  *   deleted.
1666  *
1667  * - (eventually) other shunt eroutes that haven't been used in a while.
1668  *
1669  * Here are some sample lines:
1670  * 10         10.3.2.1.0/24    -> 0.0.0.0/0          => %trap
1671  * 259        10.3.2.1.115/32  -> 10.19.75.161/32    => tun0x1002@10.19.75.145
1672  * 71         10.44.73.97/32   -> 0.0.0.0/0          => %trap
1673  * 4119       10.44.73.97/32   -> 10.114.121.41/32   => %pass
1674  * Newer versions of KLIPS start each line with a 32-bit packet count.
1675  * If available, the count is used to detect whether a %pass shunt is in use.
1676  */
1677 void
1678 scan_proc_shunts(void)
1679 {
1680     static const char procname[] = "/proc/net/ipsec_eroute";
1681     FILE *f;
1682     time_t nw = now();
1683     int lino;
1684     struct eroute_info *expired = NULL;
1685
1686     event_schedule(EVENT_SHUNT_SCAN, SHUNT_SCAN_INTERVAL, NULL);
1687
1688     /* free any leftover entries: they will be refreshed if still current */
1689     while (orphaned_holds != NULL)
1690     {
1691         struct eroute_info *p = orphaned_holds;
1692
1693         orphaned_holds = p->next;
1694         pfree(p);
1695     }
1696
1697     /* decode the /proc file.  Don't do anything strenuous to it
1698      * (certainly no PF_KEY stuff) to minimize the chance that it
1699      * might change underfoot.
1700      */
1701
1702     f = fopen(procname, "r");
1703     if (f == NULL)
1704         return;
1705
1706     /* for each line... */
1707     for (lino = 1; ; lino++)
1708     {
1709         unsigned char buf[1024];        /* should be big enough */
1710         chunk_t field[10];      /* 10 is loose upper bound */
1711         chunk_t *ff;    /* fixed fields (excluding optional count) */
1712         int fi;
1713         struct eroute_info eri;
1714         char *cp;
1715         err_t context = ""
1716             , ugh = NULL;
1717
1718         cp = fgets(buf, sizeof(buf), f);
1719         if (cp == NULL)
1720             break;
1721
1722         /* break out each field
1723          * Note: if there are too many fields, just stop;
1724          * it will be diagnosed a little later.
1725          */
1726         for (fi = 0; fi < (int)elemsof(field); fi++)
1727         {
1728             static const char sep[] = " \t\n";  /* field-separating whitespace */
1729             size_t w;
1730
1731             cp += strspn(cp, sep);      /* find start of field */
1732             w = strcspn(cp, sep);       /* find width of field */
1733             setchunk(field[fi], cp, w);
1734             cp += w;
1735             if (w == 0)
1736                 break;
1737         }
1738
1739         /* This odd do-hickey is to share error reporting code.
1740          * A break will get to that common code.  The setting
1741          * of "ugh" and "context" parameterize it.
1742          */
1743         do {
1744             /* Old entries have no packet count; new ones do.
1745              * check if things are as they should be.
1746              */
1747             if (fi == 5)
1748                 ff = &field[0]; /* old form, with no count */
1749             else if (fi == 6)
1750                 ff = &field[1]; /* new form, with count */
1751             else
1752             {
1753                 ugh = "has wrong number of fields";
1754                 break;
1755             }
1756
1757             if (ff[1].len != 2
1758             || strncmp(ff[1].ptr, "->", 2) != 0
1759             || ff[3].len != 2
1760             || strncmp(ff[3].ptr, "=>", 2) != 0)
1761             {
1762                 ugh = "is missing -> or =>";
1763                 break;
1764             }
1765
1766             /* actually digest fields of interest */
1767
1768             /* packet count */
1769
1770             eri.count = 0;
1771             if (ff != field)
1772             {
1773                 context = "count field is malformed: ";
1774                 ugh = ttoul(field[0].ptr, field[0].len, 10, &eri.count);
1775                 if (ugh != NULL)
1776                     break;
1777             }
1778
1779             /* our client */
1780
1781             context = "source subnet field malformed: ";
1782             ugh = ttosubnet(ff[0].ptr, ff[0].len, AF_INET, &eri.ours);
1783             if (ugh != NULL)
1784                 break;
1785
1786             /* his client */
1787
1788             context = "source subnet field malformed: ";
1789             ugh = ttosubnet(ff[2].ptr, ff[2].len, AF_INET, &eri.his);
1790             if (ugh != NULL)
1791                 break;
1792
1793             /* SAID */
1794
1795             context = "SA ID field malformed: ";
1796             ugh = ttosa(ff[4].ptr, ff[4].len, &eri.said);
1797         } while (FALSE);
1798
1799         if (ugh != NULL)
1800         {
1801             log("INTERNAL ERROR: %s line %d %s%s"
1802                 , procname, lino, context, ugh);
1803             continue;   /* ignore rest of line */
1804         }
1805
1806         /* Now we have decoded eroute, let's consider it.
1807          * We only care about shunt eroutes.
1808          *
1809          * %hold: if not known, add to orphaned_holds list for initiation
1810          *    because ACQUIRE might have been lost.
1811          *
1812          * %pass: determine if idle; if so, blast it away.
1813          *    Can occur bare (if DNS provided insufficient information)
1814          *    or with a connection (failure context).
1815          *    Could even be installed by ipsec manual.
1816          *
1817          * %trap: always welcome.
1818          *
1819          * others: handling as yet undesigned.  Generally associated
1820          *    with a failure context.
1821          */
1822         if (eri.said.proto == SA_INT)
1823         {
1824             switch (ntohl(eri.said.spi))
1825             {
1826             case SPI_HOLD:
1827                 if (bare_shunt_ptr(&eri.ours, &eri.his) == NULL
1828                 && shunt_owner(&eri.ours, &eri.his) == NULL)
1829                 {
1830                     eri.next = orphaned_holds;
1831                     orphaned_holds = clone_thing(eri, "orphaned %hold");
1832                 }
1833                 break;
1834
1835             case SPI_PASS:
1836                 /* nothing sensible to do if we don't have counts */
1837                 if (ff != field)
1838                 {
1839                     struct bare_shunt **bs_pp
1840                         = bare_shunt_ptr(&eri.ours, &eri.his);
1841
1842                     if (bs_pp != NULL)
1843                     {
1844                         struct bare_shunt *bs = *bs_pp;
1845
1846                         if (eri.count != bs->count)
1847                         {
1848                             bs->count = eri.count;
1849                             bs->last_activity = nw;
1850                         }
1851                         else if (nw - bs->last_activity > SHUNT_PATIENCE)
1852                         {
1853                             eri.next = expired;
1854                             expired = clone_thing(eri, "expired %pass");
1855                         }
1856                     }
1857                 }
1858                 break;
1859
1860             case SPI_DROP:
1861             case SPI_REJECT:
1862             case SPI_TRAP:
1863                 break;
1864
1865             default:
1866                 impossible();
1867             }
1868         }
1869     }   /* for each line */
1870     fclose(f);
1871
1872     /* Now that we've finished processing the /proc file,
1873      * it is safe to delete the expired %pass shunts.
1874      */
1875     while (expired != NULL)
1876     {
1877         struct eroute_info *p = expired;
1878         ip_address src, dst;
1879
1880         networkof(&p->ours, &src);
1881         networkof(&p->his, &dst);
1882         (void) replace_bare_shunt(&src, &dst
1883             , SPI_PASS, FALSE, "delete expired %pass");
1884         expired = p->next;
1885         pfree(p);
1886     }
1887 }
1888
1889 static bool
1890 del_spi(ipsec_spi_t spi, int proto
1891 , const ip_address *src, const ip_address *dest)
1892 {
1893     struct sadb_ext *extensions[SADB_EXT_MAX + 1];
1894     char text_said[SATOT_BUF];
1895
1896     set_text_said(text_said, dest, spi, proto);
1897
1898     DBG(DBG_KLIPS, DBG_log("delete %s", text_said));
1899
1900     return pfkey_msg_start(SADB_DELETE, proto2satype(proto)
1901         , "pfkey_msg_hdr delete SA", text_said, extensions)
1902
1903     && pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
1904             , SADB_EXT_SA
1905             , spi       /* in host order */
1906             , 0, SADB_SASTATE_MATURE, 0, 0, 0)
1907         , "pfkey_sa delete SA", text_said, extensions)
1908
1909     && pfkeyext_address(SADB_EXT_ADDRESS_SRC, src
1910         , "pfkey_addr_s delete SA", text_said, extensions)
1911
1912     && pfkeyext_address(SADB_EXT_ADDRESS_DST, dest
1913         , "pfkey_addr_d delete SA", text_said, extensions)
1914
1915     && finish_pfkey_msg(extensions, "Delete SA", text_said, NULL);
1916 }
1917
1918 /* Setup a pair of SAs. Code taken from setsa.c and spigrp.c, in
1919  * ipsec-0.5.
1920  */
1921
1922 static bool
1923 setup_half_ipsec_sa(struct state *st, bool inbound)
1924 {
1925     /* Build an inbound or outbound SA */
1926
1927     struct connection *c = st->st_connection;
1928     ip_address
1929         src = inbound? c->that.host_addr : c->this.host_addr,
1930         dst = inbound? c->this.host_addr : c->that.host_addr;
1931
1932     /* SPIs, saved for spigrouping or undoing, if necessary */
1933     ip_said
1934         said[EM_MAXRELSPIS],
1935         *said_next = said;
1936
1937     struct sadb_ext *extensions[SADB_EXT_MAX + 1];
1938     char text_said[SATOT_BUF];
1939
1940     /* set up AH SA, if any */
1941
1942     if (st->st_ah.present)
1943     {
1944         ipsec_spi_t ah_spi = inbound? st->st_ah.our_spi : st->st_ah.attrs.spi;
1945         u_char *ah_dst_keymat = inbound? st->st_ah.our_keymat : st->st_ah.peer_keymat;
1946
1947         unsigned char authalg;
1948
1949         switch (st->st_ah.attrs.auth)
1950         {
1951         case AUTH_ALGORITHM_HMAC_MD5:
1952             authalg = SADB_AALG_MD5HMAC;
1953             break;
1954
1955         case AUTH_ALGORITHM_HMAC_SHA1:
1956             authalg = SADB_AALG_SHA1HMAC;
1957             break;
1958
1959         case AUTH_ALGORITHM_KPDK:
1960         case AUTH_ALGORITHM_DES_MAC:
1961         default:
1962             loglog(RC_LOG_SERIOUS, "%s not implemented yet"
1963                 , enum_show(&auth_alg_names, st->st_ah.attrs.auth));
1964             goto fail;
1965         }
1966
1967         set_text_said(text_said, &dst, ah_spi, SA_AH);
1968
1969         if (!(pfkey_msg_start(SADB_ADD, SADB_SATYPE_AH
1970             , "pfkey_msg_hdr Add AH SA", text_said, extensions)
1971
1972         && pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
1973                 , SADB_EXT_SA
1974                 , ah_spi        /* in network order */
1975                 , REPLAY_WINDOW, SADB_SASTATE_MATURE, authalg, 0, 0)
1976             , "pfkey_sa Add AH SA", text_said, extensions)
1977
1978         && pfkeyext_address(SADB_EXT_ADDRESS_SRC, &src
1979             , "pfkey_addr_s Add AH SA", text_said, extensions)
1980
1981         && pfkeyext_address(SADB_EXT_ADDRESS_DST, &dst
1982             , "pfkey_addr_d Add AH SA", text_said, extensions)
1983
1984         && pfkey_build(pfkey_key_build(&extensions[SADB_EXT_KEY_AUTH]
1985                 , SADB_EXT_KEY_AUTH, st->st_ah.keymat_len * IPSEC_PFKEYv2_ALIGN
1986                 , ah_dst_keymat)
1987             , "pfkey_key_a Add AH SA", text_said, extensions)
1988
1989         && finish_pfkey_msg(extensions, "Add AH SA", text_said, NULL)))
1990
1991             goto fail;
1992
1993         initsaid(&dst, ah_spi, SA_AH, said_next);
1994         said_next++;
1995     }
1996
1997     /* set up ESP SA, if any */
1998
1999     if (st->st_esp.present)
2000     {
2001         ipsec_spi_t esp_spi = inbound? st->st_esp.our_spi : st->st_esp.attrs.spi;
2002         u_char *esp_dst_keymat = inbound? st->st_esp.our_keymat : st->st_esp.peer_keymat;
2003
2004 #if 0+MOVED_TO_alg_info_h
2005         struct esp_info {
2006             u_int8_t transid;   /* negotiated ESP transform */
2007             u_int16_t auth;     /* negotiated AUTH */
2008
2009             size_t enckeylen;   /* keylength for ESP transform */
2010             size_t authkeylen;  /* keylength for AUTH */
2011             u_int8_t encryptalg;
2012             u_int8_t authalg;
2013         };
2014 #endif
2015
2016         const struct esp_info *ei;
2017         u_int16_t key_len;
2018
2019         static const struct esp_info esp_info[] = {
2020             { ESP_NULL, AUTH_ALGORITHM_HMAC_MD5,
2021                 0, HMAC_MD5_KEY_LEN,
2022                 SADB_EALG_NULL, SADB_AALG_MD5HMAC },
2023             { ESP_NULL, AUTH_ALGORITHM_HMAC_SHA1,
2024                 0, HMAC_SHA1_KEY_LEN,
2025                 SADB_EALG_NULL, SADB_AALG_SHA1HMAC },
2026
2027             { ESP_DES, AUTH_ALGORITHM_NONE,
2028                 DES_CBC_BLOCK_SIZE, 0,
2029                 SADB_EALG_DESCBC, SADB_AALG_NONE },
2030             { ESP_DES, AUTH_ALGORITHM_HMAC_MD5,
2031                 DES_CBC_BLOCK_SIZE, HMAC_MD5_KEY_LEN,
2032                 SADB_EALG_DESCBC, SADB_AALG_MD5HMAC },
2033             { ESP_DES, AUTH_ALGORITHM_HMAC_SHA1,
2034                 DES_CBC_BLOCK_SIZE,
2035                 HMAC_SHA1_KEY_LEN, SADB_EALG_DESCBC, SADB_AALG_SHA1HMAC },
2036
2037             { ESP_3DES, AUTH_ALGORITHM_NONE,
2038                 DES_CBC_BLOCK_SIZE * 3, 0,
2039                 SADB_EALG_3DESCBC, SADB_AALG_NONE },
2040             { ESP_3DES, AUTH_ALGORITHM_HMAC_MD5,
2041                 DES_CBC_BLOCK_SIZE * 3, HMAC_MD5_KEY_LEN,
2042                 SADB_EALG_3DESCBC, SADB_AALG_MD5HMAC },
2043             { ESP_3DES, AUTH_ALGORITHM_HMAC_SHA1,
2044                 DES_CBC_BLOCK_SIZE * 3, HMAC_SHA1_KEY_LEN,
2045                 SADB_EALG_3DESCBC, SADB_AALG_SHA1HMAC },
2046         };
2047
2048 #ifdef NAT_TRAVERSAL
2049         u_int8_t natt_type = 0;
2050         u_int16_t natt_sport = 0, natt_dport = 0;
2051         ip_address natt_oa;
2052
2053         if (st->nat_traversal & NAT_T_DETECTED) {
2054             natt_type = (st->nat_traversal & NAT_T_WITH_PORT_FLOATING) ?
2055                 ESPINUDP_WITH_NON_ESP : ESPINUDP_WITH_NON_IKE;
2056             natt_sport = inbound? c->that.host_port : c->this.host_port;
2057             natt_dport = inbound? c->this.host_port : c->that.host_port;
2058             natt_oa = st->nat_oa;
2059         }
2060 #endif
2061
2062         for (ei = esp_info; ; ei++)
2063         {
2064             if (ei == &esp_info[elemsof(esp_info)])
2065             {
2066                 /* Check for additional kernel alg */
2067 #ifndef NO_KERNEL_ALG
2068                 if ((ei=kernel_alg_esp_info(st->st_esp.attrs.transid,
2069                                         st->st_esp.attrs.auth))!=NULL) {
2070                         log("ESP transform %s / auth %s implemented ",
2071                     enum_name(&esp_transformid_names, st->st_esp.attrs.transid),
2072                     enum_name(&auth_alg_names, st->st_esp.attrs.auth));
2073                         break;
2074                 }
2075 #endif
2076
2077                 /* note: enum_show may use a static buffer, so two
2078                  * calls in one printf would be a mistake.
2079                  * enum_name does the same job, without a static buffer,
2080                  * assuming the name will be found.
2081                  */
2082                 loglog(RC_LOG_SERIOUS, "ESP transform %s / auth %s not implemented yet",
2083                     enum_name(&esp_transformid_names, st->st_esp.attrs.transid),
2084                     enum_name(&auth_alg_names, st->st_esp.attrs.auth));
2085                 goto fail;
2086             }
2087
2088             if (st->st_esp.attrs.transid == ei->transid
2089             && st->st_esp.attrs.auth == ei->auth)
2090                 break;
2091         }
2092
2093         key_len = st->st_esp.attrs.key_len/8;
2094
2095         if (key_len) {
2096                 /* XXX: must change to check valid _range_ key_len */
2097                 if (key_len > ei->enckeylen) {
2098                         loglog(RC_LOG_SERIOUS, "ESP transform %s passed key_len=%d > %d",
2099                         enum_name(&esp_transformid_names, st->st_esp.attrs.transid),
2100                         key_len, ei->enckeylen);
2101                         goto fail;
2102                 }
2103         } else {
2104                 key_len = ei->enckeylen;
2105                 /* Grrrrr.... f*cking 7 bits jurassic algos
2106                 * 168 bits in kernel, need 192 bits for keymat_len */
2107                 if (ei->transid == ESP_3DES && key_len == 21)
2108                         key_len = 24;
2109                 if (ei->transid == ESP_DES && key_len == 21)
2110                         key_len = 24;
2111
2112         }
2113
2114         /* divide up keying material */
2115         DBG(DBG_KLIPS|DBG_CONTROL|DBG_PARSING,
2116                 if(st->st_esp.keymat_len != key_len + ei->authkeylen)
2117                         DBG_log("keymat_len=%d key_len=%d authkeylen=%d",
2118                                 st->st_esp.keymat_len, key_len, ei->authkeylen);
2119         );
2120
2121         passert(st->st_esp.keymat_len == key_len + ei->authkeylen);
2122
2123         set_text_said(text_said, &dst, esp_spi, SA_ESP);
2124
2125         if (!(pfkey_msg_start(SADB_ADD, SADB_SATYPE_ESP
2126             , "pfkey_msg_hdr Add ESP SA", text_said, extensions)
2127
2128         && pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
2129                 , SADB_EXT_SA
2130                 , esp_spi       /* in network order */
2131                 , REPLAY_WINDOW, SADB_SASTATE_MATURE, ei->authalg, ei->encryptalg, 0)
2132             , "pfkey_sa Add ESP SA", text_said, extensions)
2133
2134         && pfkeyext_address(SADB_EXT_ADDRESS_SRC, &src
2135             , "pfkey_addr_s Add ESP SA", text_said, extensions)
2136
2137         && pfkeyext_address(SADB_EXT_ADDRESS_DST, &dst
2138             , "pfkey_addr_d Add ESP SA", text_said, extensions)
2139
2140         && (ei->authkeylen == 0
2141             || pfkey_build(pfkey_key_build(&extensions[SADB_EXT_KEY_AUTH]
2142                     , SADB_EXT_KEY_AUTH, ei->authkeylen * IPSEC_PFKEYv2_ALIGN
2143                     , esp_dst_keymat + key_len)
2144                 , "pfkey_key_a Add ESP SA", text_said, extensions))
2145
2146         && (key_len == 0
2147             || pfkey_build(pfkey_key_build(&extensions[SADB_EXT_KEY_ENCRYPT]
2148                     , SADB_EXT_KEY_ENCRYPT, key_len * IPSEC_PFKEYv2_ALIGN
2149                     , esp_dst_keymat)
2150                 , "pfkey_key_a Add ESP SA", text_said, extensions))
2151
2152 #ifdef NAT_TRAVERSAL
2153         && (natt_type == 0
2154                 || pfkey_build(pfkey_x_nat_t_type_build(
2155                         &extensions[SADB_X_EXT_NAT_T_TYPE], natt_type),
2156                         "pfkey_nat_t_type Add ESP SA", text_said, extensions))
2157         && (natt_sport == 0
2158                 || pfkey_build(pfkey_x_nat_t_port_build(
2159                         &extensions[SADB_X_EXT_NAT_T_SPORT], SADB_X_EXT_NAT_T_SPORT,
2160                         natt_sport), "pfkey_nat_t_sport Add ESP SA", text_said,
2161                         extensions))
2162         && (natt_dport == 0
2163                 || pfkey_build(pfkey_x_nat_t_port_build(
2164                         &extensions[SADB_X_EXT_NAT_T_DPORT], SADB_X_EXT_NAT_T_DPORT,
2165                         natt_dport), "pfkey_nat_t_dport Add ESP SA", text_said,
2166                         extensions))
2167         && (natt_type ==0 || isanyaddr(&natt_oa)
2168                 || pfkeyext_address(SADB_X_EXT_NAT_T_OA, &natt_oa
2169             , "pfkey_nat_t_oa Add ESP SA", text_said, extensions))
2170 #endif
2171
2172         && finish_pfkey_msg(extensions, "Add ESP SA", text_said, NULL)))
2173
2174             goto fail;
2175
2176         initsaid(&dst, esp_spi, SA_ESP, said_next);
2177         said_next++;
2178     }
2179
2180     /* set up IPCOMP SA, if any */
2181
2182     if (st->st_ipcomp.present)
2183     {
2184         ipsec_spi_t ipcomp_spi = inbound? st->st_ipcomp.our_spi : st->st_ipcomp.attrs.spi;
2185         u_int8_t compalg;
2186
2187         switch (st->st_ipcomp.attrs.transid)
2188         {
2189             case IPCOMP_DEFLATE:
2190                 compalg = SADB_X_CALG_DEFLATE;
2191                 break;
2192
2193             case IPCOMP_LZS:
2194                 compalg = SADB_X_CALG_LZS;
2195                 break;
2196
2197             default:
2198                 loglog(RC_LOG_SERIOUS, "IPCOMP transform %s not implemented",
2199                     enum_name(&ipcomp_transformid_names, st->st_ipcomp.attrs.transid));
2200                 goto fail;
2201         }
2202
2203         set_text_said(text_said, &dst, ipcomp_spi, SA_COMP);
2204
2205         if (!(pfkey_msg_start(SADB_ADD, SADB_X_SATYPE_COMP
2206             , "pfkey_msg_hdr Add IPCOMP SA", text_said, extensions)
2207
2208         && pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
2209                 , SADB_EXT_SA
2210                 , ipcomp_spi    /* in network order */
2211                 , 0, SADB_SASTATE_MATURE, 0, compalg, 0)
2212             , "pfkey_sa Add IPCOMP SA", text_said, extensions)
2213
2214         && pfkeyext_address(SADB_EXT_ADDRESS_SRC, &src
2215             , "pfkey_addr_s Add IPCOMP SA", text_said, extensions)
2216
2217         && pfkeyext_address(SADB_EXT_ADDRESS_DST, &dst
2218             , "pfkey_addr_d Add IPCOMP SA", text_said, extensions)
2219
2220         && finish_pfkey_msg(extensions, "Add IPCOMP SA", text_said, NULL)))
2221
2222             goto fail;
2223
2224         initsaid(&dst, ipcomp_spi, SA_COMP, said_next);
2225         said_next++;
2226     }
2227
2228     /* If we are tunnelling, set up IP in IP pseudo SA */
2229
2230     if (st->st_ah.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
2231     || st->st_esp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL
2232     || st->st_ipcomp.attrs.encapsulation == ENCAPSULATION_MODE_TUNNEL)
2233     {
2234         /* XXX hack alert -- we SHOULD NOT HAVE TO HAVE A DIFFERENT SPI
2235          * XXX FOR IP-in-IP ENCAPSULATION!
2236          */
2237
2238         ipsec_spi_t ipip_spi;
2239
2240         /* Allocate an SPI for the tunnel.
2241          * Since our peer will never see this,
2242          * and it comes from its own number space,
2243          * it is purely a local implementation wart.
2244          */
2245         {
2246             static ipsec_spi_t last_tunnel_spi = IPSEC_DOI_SPI_OUR_MIN;
2247
2248             ipip_spi = htonl(++last_tunnel_spi);
2249             if (inbound)
2250                 st->st_tunnel_in_spi = ipip_spi;
2251             else
2252                 st->st_tunnel_out_spi = ipip_spi;
2253         }
2254
2255         set_text_said(text_said
2256             , &c->that.host_addr, ipip_spi, SA_IPIP);
2257
2258         if (!(pfkey_msg_start(SADB_ADD, SADB_X_SATYPE_IPIP
2259             , "pfkey_msg_hdr Add IPIP SA", text_said, extensions)
2260
2261         && pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
2262                 , SADB_EXT_SA
2263                 , ipip_spi              /* in network order */
2264                 , 0, SADB_SASTATE_MATURE
2265                 , 0
2266                 , 0
2267                 , 0)
2268             , "pfkey_sa Add IPIP SA", text_said, extensions)
2269
2270         && pfkeyext_address(SADB_EXT_ADDRESS_SRC, &src
2271             , "pfkey_addr_s Add IPIP SA", text_said, extensions)
2272
2273         && pfkeyext_address(SADB_EXT_ADDRESS_DST, &dst
2274             , "pfkey_addr_d Add IPIP SA", text_said, extensions)
2275
2276         && finish_pfkey_msg(extensions, "Add IPIP SA", text_said, NULL)))
2277
2278             goto fail;
2279
2280         initsaid(&dst, ipip_spi, SA_IPIP, said_next);
2281         said_next++;
2282
2283         /* If inbound, and policy does not specifie DISABLEARRIVALCHECK,
2284          * tell KLIPS to enforce the IP addresses appropriate for this tunnel.
2285          * Note reversed ends.
2286          * Not much to be done on failure.
2287          */
2288         if (inbound && (c->policy & POLICY_DISABLEARRIVALCHECK) == 0)
2289             (void) raw_eroute(&c->that.host_addr, &c->that.client
2290                 , &c->this.host_addr, &c->this.client
2291                 , ipip_spi, SA_IPIP, SADB_X_SATYPE_IPIP
2292                 , ERO_ADD_INBOUND, "add inbound");
2293     }
2294
2295     /* If there are multiple SPIs, group them. */
2296
2297     if (said_next > &said[1])
2298     {
2299         ip_said *s;
2300
2301         /* group SAs, two at a time, inner to outer (backwards in said[])
2302          * The grouping is by pairs.  So if said[] contains ah esp ipip,
2303          * the grouping would be ipip:esp, esp:ah.
2304          */
2305         for (s = said_next-1; s != said; )
2306         {
2307             struct sadb_ext *extensions[SADB_EXT_MAX + 1];
2308             char
2309                 text_said0[SATOT_BUF],
2310                 text_said1[SATOT_BUF];
2311
2312             s--;
2313
2314             /* group s[1] and s[0], in that order */
2315
2316             set_text_said(text_said0, &s[0].dst, s[0].spi, s[0].proto);
2317             set_text_said(text_said1, &s[1].dst, s[1].spi, s[1].proto);
2318
2319             DBG(DBG_KLIPS, DBG_log("grouping %s and %s", text_said1, text_said0));
2320
2321             if (!(pfkey_msg_start(SADB_X_GRPSA, proto2satype(s[1].proto)
2322                 , "pfkey_msg_hdr group", text_said1, extensions)
2323
2324             && pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
2325                     , SADB_EXT_SA
2326                     , s[1].spi  /* in network order */
2327                     , 0, 0, 0, 0, 0)
2328                 , "pfkey_sa group", text_said1, extensions)
2329
2330             && pfkeyext_address(SADB_EXT_ADDRESS_DST, &s[1].dst
2331                 , "pfkey_addr_d group", text_said1, extensions)
2332
2333             && pfkey_build(pfkey_x_satype_build(&extensions[SADB_X_EXT_SATYPE2]
2334                     , proto2satype(s[0].proto))
2335                 , "pfkey_satype group", text_said0, extensions)
2336
2337             && pfkey_build(pfkey_sa_build(&extensions[SADB_X_EXT_SA2]
2338                     , SADB_X_EXT_SA2
2339                     , s[0].spi  /* in network order */
2340                     , 0, 0, 0, 0, 0)
2341                 , "pfkey_sa2 group", text_said0, extensions)
2342
2343             && pfkeyext_address(SADB_X_EXT_ADDRESS_DST2, &s[0].dst
2344                 , "pfkey_addr_d2 group", text_said0, extensions)
2345
2346             && finish_pfkey_msg(extensions, "group", text_said1, NULL)))
2347                 goto fail;
2348         }
2349         /* could update said, but it will not be used */
2350     }
2351
2352     return TRUE;
2353
2354 fail:
2355     {
2356         /* undo the done SPIs */
2357         while (said_next-- != said)
2358             (void) del_spi(said_next->spi, said_next->proto
2359                 , &src, &said_next->dst);
2360         return FALSE;
2361     }
2362 }
2363
2364 /* teardown_ipsec_sa is a canibalized version of setup_ipsec_sa */
2365
2366 static bool
2367 teardown_half_ipsec_sa(struct state *st, bool inbound)
2368 {
2369     /* We need to delete AH, ESP, and IP in IP SPIs.
2370      * But if there is more than one, they have been grouped
2371      * so deleting any one will do.  So we just delete the
2372      * first one found.  It may or may not be the only one.
2373      */
2374     struct connection *c = st->st_connection;
2375     struct ipsec_proto_info *f; /* first SA found */
2376     unsigned proto;
2377
2378     if (st->st_ah.present)
2379     {
2380         f = &st->st_ah;
2381         proto = SA_AH;
2382     }
2383     else if (st->st_esp.present)
2384     {
2385         f = &st->st_esp;
2386         proto = SA_ESP;
2387     }
2388     else
2389     {
2390         impossible();   /* neither AH nor ESP in outbound SA bundle! */
2391     }
2392
2393     if (c->dnshostname != NULL)
2394     {
2395         ip_address new_addr;
2396
2397         if (ttoaddr(c->dnshostname, 0, c->addr_family, &new_addr) == NULL
2398         && !sameaddr(&new_addr, &c->that.host_addr))
2399         {
2400             c->that.host_addr = new_addr;
2401             state_rehash(c);
2402         }
2403     }
2404
2405     return inbound
2406         ? del_spi(f->our_spi, proto, &c->that.host_addr, &c->this.host_addr)
2407         : del_spi(f->attrs.spi, proto, &c->this.host_addr, &c->that.host_addr);
2408 }
2409 #endif /* KLIPS */
2410
2411
2412 void
2413 init_kernel(void)
2414 {
2415 #ifdef KLIPS
2416     if (!no_klips)
2417     {
2418         init_pfkey();
2419         event_schedule(EVENT_SHUNT_SCAN, SHUNT_SCAN_INTERVAL, NULL);
2420     }
2421 #endif
2422 }
2423
2424 /* Note: install_inbound_ipsec_sa is only used by the Responder.
2425  * The Responder will subsequently use install_ipsec_sa for the outbound.
2426  * The Initiator uses install_ipsec_sa to install both at once.
2427  */
2428 bool
2429 install_inbound_ipsec_sa(struct state *st)
2430 {
2431     struct connection *const c = st->st_connection;
2432
2433     /* If our peer has a fixed-address client, check if we already
2434      * have a route for that client that conflicts.  We will take this
2435      * as proof that that route and the connections using it are
2436      * obsolete and should be eliminated.  Interestingly, this is
2437      * the only case in which we can tell that a connection is obsolete.
2438      */
2439     passert(c->kind != CK_TEMPLATE);
2440     if (c->that.has_client)
2441     {
2442         for (;;)
2443         {
2444             struct connection *o = route_owner(c, NULL);
2445
2446             if (o == NULL)
2447                 break;  /* nobody has a route */
2448
2449             /* note: we ignore the client addresses at this end */
2450             if (sameaddr(&o->that.host_addr, &c->that.host_addr)
2451             && o->interface == c->interface)
2452                 break;  /* existing route is compatible */
2453
2454             loglog(RC_LOG_SERIOUS, "route to peer's client conflicts with \"%s\" %s; releasing old connection to free the route"
2455                 , o->name, ip_str(&o->that.host_addr));
2456             release_connection(o);
2457         }
2458     }
2459
2460     /* check that we will be able to route and eroute */
2461     if (!could_route(c))
2462         return FALSE;
2463
2464 #ifdef KLIPS
2465     /* (attempt to) actually set up the SAs */
2466     return setup_half_ipsec_sa(st, TRUE);
2467 #else /* !KLIPS */
2468     DBG(DBG_CONTROL, DBG_log("install_inbound_ipsec_sa()"));
2469     return TRUE;
2470 #endif /* !KLIPS */
2471 }
2472
2473 /* Install a route and then a prospective shunt eroute or an SA group eroute.
2474  * Assumption: could_route gave a go-ahead.
2475  * Any SA Group must have already been created.
2476  * On failure, steps will be unwound (where feasible: old shunt
2477  * will be lost -- this may matter).
2478  */
2479 static bool
2480 route_and_eroute(struct connection *c USED_BY_KLIPS
2481 , struct state *st USED_BY_KLIPS)
2482 {
2483 #ifdef KLIPS
2484     struct connection *ero      /* who, if anyone, owns our eroute? */
2485         , *ro = route_owner(c, &ero);
2486     bool eroute_installed = FALSE
2487         , firewall_notified = FALSE
2488         , route_installed = FALSE;
2489
2490     struct bare_shunt **bspp = (ero == NULL)
2491         ? bare_shunt_ptr(&c->this.client, EffectivePeerClient(c)) : NULL;
2492
2493     /* install the eroute */
2494
2495     passert(ero == NULL || ero == c);
2496     if (ero != NULL || bspp != NULL)
2497     {
2498         /* We're replacing an eroute */
2499
2500         eroute_installed = st == NULL
2501             ? shunt_eroute(c, ERO_REPLACE, "replace")
2502             : sag_eroute(st, ERO_REPLACE, "replace");
2503
2504         if (eroute_installed && bspp != NULL)
2505             free_bare_shunt(bspp);
2506     }
2507     else
2508     {
2509         /* we're adding an eroute */
2510         eroute_installed = st == NULL
2511             ? shunt_eroute(c, ERO_ADD, "add")
2512             : sag_eroute(st, ERO_ADD, "add");
2513     }
2514
2515     /* notify the firewall of a new tunnel */
2516
2517     if (eroute_installed)
2518     {
2519         /* do we have to notify the firewall?  Yes, if we are installing
2520          * a tunnel eroute and the firewall wasn't notified
2521          * for a previous tunnel with the same clients.  Any Previous
2522          * tunnel would have to be for our connection, so the actual
2523          * test is simple.
2524          */
2525         firewall_notified = st == NULL  /* not a tunnel eroute */
2526             || c->eroute_owner != SOS_NOBODY    /* already notified */
2527             || do_command(c, "up");     /* go ahead and notify */
2528     }
2529
2530     /* install the route */
2531
2532     if (!firewall_notified)
2533     {
2534         /* we're in trouble -- don't do routing */
2535     }
2536     else if (ro == NULL)
2537     {
2538         /* a new route: no deletion required, but preparation is */
2539         (void) do_command(c, "prepare");        /* just in case; ignore failure */
2540         route_installed = do_command(c, "route");
2541     }
2542     else if (routed(c->routing)
2543     || routes_agree(ro, c))
2544     {
2545         route_installed = TRUE; /* nothing to be done */
2546     }
2547     else
2548     {
2549         /* Some other connection must own the route
2550          * and the route must disagree.  But since could_route
2551          * must have allowed our stealing it, we'll do so.
2552          *
2553          * A feature of LINUX allows us to install the new route
2554          * before deleting the old if the nexthops differ.
2555          * This reduces the "window of vulnerability" when packets
2556          * might flow in the clear.
2557          */
2558         if (sameaddr(&c->this.host_nexthop, &ro->this.host_nexthop))
2559         {
2560             (void) do_command(ro, "unroute");
2561             route_installed = do_command(c, "route");
2562         }
2563         else
2564         {
2565             route_installed = do_command(c, "route");
2566             (void) do_command(ro, "unroute");
2567         }
2568
2569         /* record unrouting */
2570         if (route_installed)
2571         {
2572             do {
2573                 passert(!erouted(ro->routing));
2574                 ro->routing = RT_UNROUTED;
2575                 ro = route_owner(c, NULL);      /* no need to keep old value */
2576             } while (ro != NULL);
2577         }
2578     }
2579
2580     /* all done -- clean up */
2581     if (route_installed)
2582     {
2583         /* Success! */
2584         if (st == NULL)
2585         {
2586             passert(c->eroute_owner == SOS_NOBODY);
2587             c->routing = RT_ROUTED_PROSPECTIVE;
2588         }
2589         else
2590         {
2591             c->routing = RT_ROUTED_TUNNEL;
2592             c->eroute_owner = st->st_serialno;
2593         }
2594
2595         return TRUE;
2596     }
2597     else
2598     {
2599         /* Failure!  Unwind our work. */
2600         if (firewall_notified && c->eroute_owner == SOS_NOBODY)
2601             (void) do_command(c, "down");
2602
2603         if (eroute_installed)
2604         {
2605             if (st == NULL)
2606                 (void) shunt_eroute(c, ERO_DELETE, "delete");
2607             else
2608                 (void) sag_eroute(st, ERO_DELETE, "delete");
2609         }
2610
2611         return FALSE;
2612     }
2613 #else /* !KLIPS */
2614     return TRUE;
2615 #endif /* !KLIPS */
2616 }
2617
2618 #ifdef CONFIG_LEDMAN
2619 static int num_ipsec_sa = 0;
2620 #endif
2621
2622 bool
2623 install_ipsec_sa(struct state *st, bool inbound_also USED_BY_KLIPS)
2624 {
2625     if (!could_route(st->st_connection))
2626         return FALSE;
2627
2628 #ifdef KLIPS
2629     /* (attempt to) actually set up the SA group */
2630     if ((inbound_also && !setup_half_ipsec_sa(st, TRUE))
2631     || !setup_half_ipsec_sa(st, FALSE))
2632         return FALSE;
2633
2634     if (!route_and_eroute(st->st_connection, st))
2635     {
2636         delete_ipsec_sa(st, FALSE);
2637         return FALSE;
2638     }
2639
2640 #ifdef CONFIG_LEDMAN
2641     num_ipsec_sa++;
2642     ledman_cmd(LEDMAN_CMD_ON, LEDMAN_VPN);
2643 #endif
2644
2645 #else /* !KLIPS */
2646     if (!could_route(st->st_connection))
2647         return FALSE;
2648
2649     DBG(DBG_CONTROL, DBG_log("install_ipsec_sa() %s"
2650         , inbound_also? "inbound and oubound" : "outbound only"));
2651 #endif /* !KLIPS */
2652
2653     return TRUE;
2654 }
2655
2656 /* delete an IPSEC SA.
2657  * we may not succeed, but we bull ahead anyway because
2658  * we cannot do anything better by recognizing failure
2659  */
2660 void
2661 delete_ipsec_sa(struct state *st USED_BY_KLIPS, bool inbound_only USED_BY_KLIPS)
2662 {
2663 #ifdef KLIPS
2664     if (inbound_only)
2665     {
2666         (void) teardown_half_ipsec_sa(st, TRUE);
2667     }
2668     else
2669     {
2670         struct connection *c = st->st_connection;
2671
2672         if (c->eroute_owner == st->st_serialno)
2673         {
2674             passert(c->routing == RT_ROUTED_TUNNEL);
2675             c->eroute_owner = SOS_NOBODY;
2676
2677             /* strictly speaking, routing should become RT_ROUTED_FAILURE,
2678              * but it is perhaps simpler to use RT_ROUTED_PROSPECTIVE
2679              * if the shunt will be TRAP.
2680              */
2681             c->routing = (c->policy & POLICY_OPPO_MASK) == LEMPTY
2682                 ? RT_ROUTED_PROSPECTIVE : RT_ROUTED_FAILURE;
2683
2684             (void) do_command(c, "down");
2685             (void) shunt_eroute(c, ERO_REPLACE, "replace with shunt");
2686         }
2687         (void) teardown_half_ipsec_sa(st, FALSE);
2688         (void) teardown_half_ipsec_sa(st, TRUE);
2689
2690 #ifdef CONFIG_LEDMAN
2691         num_ipsec_sa--;
2692         if (num_ipsec_sa == 0)
2693             ledman_cmd(LEDMAN_CMD_OFF, LEDMAN_VPN);
2694 #endif
2695     }
2696
2697     return;
2698 #else /* !KLIPS */
2699     DBG(DBG_CONTROL, DBG_log("if I knew how, I'd eroute() and teardown_ipsec_sa()"));
2700     return;
2701 #endif /* !KLIPS */
2702 }
2703
2704 #ifdef NAT_TRAVERSAL
2705 #ifdef KLIPS
2706 static bool update_nat_t_ipsec_esp_sa (struct state *st, bool inbound)
2707 {
2708         struct connection *c = st->st_connection;
2709         ip_address
2710                 src = inbound? c->that.host_addr : c->this.host_addr,
2711                 dst = inbound? c->this.host_addr : c->that.host_addr;
2712
2713         struct sadb_ext *extensions[SADB_EXT_MAX + 1];
2714         char text_said[SATOT_BUF];
2715
2716         ipsec_spi_t esp_spi = inbound? st->st_esp.our_spi : st->st_esp.attrs.spi;
2717
2718         u_int16_t
2719                 natt_sport = inbound? c->that.host_port : c->this.host_port,
2720                 natt_dport = inbound? c->this.host_port : c->that.host_port;
2721
2722         set_text_said(text_said, &dst, esp_spi, SA_ESP);
2723
2724         return (!(pfkey_msg_start(SADB_UPDATE, SADB_SATYPE_ESP
2725                 , "pfkey_msg_hdr Update ESP SA", text_said, extensions)
2726
2727         && pfkey_build(pfkey_sa_build(&extensions[SADB_EXT_SA]
2728                 , SADB_EXT_SA
2729                 , esp_spi   /* in network order */
2730                 , 0, SADB_SASTATE_MATURE, st->st_esp.attrs.auth /* dummy, not used */,
2731                         st->st_esp.attrs.transid /* dummy, not used */, 0)
2732                 , "pfkey_sa Update ESP SA", text_said, extensions)
2733
2734         && pfkeyext_address(SADB_EXT_ADDRESS_SRC, &src
2735                 , "pfkey_addr_s Update ESP SA", text_said, extensions)
2736
2737         && pfkeyext_address(SADB_EXT_ADDRESS_DST, &dst
2738                 , "pfkey_addr_d Update ESP SA", text_said, extensions)
2739
2740         && pfkey_build(pfkey_x_nat_t_port_build(
2741                 &extensions[SADB_X_EXT_NAT_T_SPORT], SADB_X_EXT_NAT_T_SPORT,
2742                 natt_sport), "pfkey_nat_t_sport Update ESP SA", text_said,
2743                 extensions)
2744
2745         && pfkey_build(pfkey_x_nat_t_port_build(
2746                 &extensions[SADB_X_EXT_NAT_T_DPORT], SADB_X_EXT_NAT_T_DPORT,
2747                 natt_dport), "pfkey_nat_t_dport Update ESP SA", text_said,
2748                 extensions)
2749
2750         && finish_pfkey_msg(extensions, "Update ESP SA", text_said, NULL)))
2751
2752         ? FALSE : TRUE;
2753 }
2754 #endif
2755
2756 bool update_ipsec_sa (struct state *st USED_BY_KLIPS)
2757 {
2758 #ifdef KLIPS
2759         if (IS_IPSEC_SA_ESTABLISHED(st->st_state)) {
2760                 if ((st->st_esp.present) && (
2761                         (!update_nat_t_ipsec_esp_sa (st, TRUE)) ||
2762                         (!update_nat_t_ipsec_esp_sa (st, FALSE)))) {
2763                         return FALSE;
2764                 }
2765         }
2766         else if (IS_ONLY_INBOUND_IPSEC_SA_ESTABLISHED(st->st_state)) {
2767                 if ((st->st_esp.present) && (!update_nat_t_ipsec_esp_sa (st, FALSE))) {
2768                         return FALSE;
2769                 }
2770         }
2771         else {
2772                 DBG_log("assert failed at %s:%d st_state=%d", __FILE__, __LINE__,
2773                         st->st_state);
2774                 return FALSE;
2775         }
2776         return TRUE;
2777 #else /* !KLIPS */
2778     DBG(DBG_CONTROL, DBG_log("if I knew how, I'd update_ipsec_sa()"));
2779     return TRUE;
2780 #endif /* !KLIPS */
2781 }
2782 #endif
2783