net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <asm/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120
 121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 122 EXPORT_SYMBOL_GPL(unix_socket_table);
 123 DEFINE_SPINLOCK(unix_table_lock);
 124 EXPORT_SYMBOL_GPL(unix_table_lock);
 125 static atomic_long_t unix_nr_socks;
 126
 127
 128 static struct hlist_head *unix_sockets_unbound(void *addr)
 129 {
 130         unsigned long hash = (unsigned long)addr;
 131
 132         hash ^= hash >> 16;
 133         hash ^= hash >> 8;
 134         hash %= UNIX_HASH_SIZE;
 135         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 136 }
 137
 138 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 139
 140 #ifdef CONFIG_SECURITY_NETWORK
 141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 142 {
 143         UNIXCB(skb).secid = scm->secid;
 144 }
 145
 146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 147 {
 148         scm->secid = UNIXCB(skb).secid;
 149 }
 150
 151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 152 {
 153         return (scm->secid == UNIXCB(skb).secid);
 154 }
 155 #else
 156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 157 { }
 158
 159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 160 { }
 161
 162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 163 {
 164         return true;
 165 }
 166 #endif /* CONFIG_SECURITY_NETWORK */
 167
 168 /*
 169  *  SMP locking strategy:
 170  *    hash table is protected with spinlock unix_table_lock
 171  *    each socket state is protected by separate spin lock.
 172  */
 173
 174 static inline unsigned int unix_hash_fold(__wsum n)
 175 {
 176         unsigned int hash = (__force unsigned int)csum_fold(n);
 177
 178         hash ^= hash>>8;
 179         return hash&(UNIX_HASH_SIZE-1);
 180 }
 181
 182 #define unix_peer(sk) (unix_sk(sk)->peer)
 183
 184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 185 {
 186         return unix_peer(osk) == sk;
 187 }
 188
 189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 190 {
 191         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 192 }
 193
 194 static inline int unix_recvq_full(struct sock const *sk)
 195 {
 196         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 197 }
 198
 199 struct sock *unix_peer_get(struct sock *s)
 200 {
 201         struct sock *peer;
 202
 203         unix_state_lock(s);
 204         peer = unix_peer(s);
 205         if (peer)
 206                 sock_hold(peer);
 207         unix_state_unlock(s);
 208         return peer;
 209 }
 210 EXPORT_SYMBOL_GPL(unix_peer_get);
 211
 212 static inline void unix_release_addr(struct unix_address *addr)
 213 {
 214         if (atomic_dec_and_test(&addr->refcnt))
 215                 kfree(addr);
 216 }
 217
 218 /*
 219  *      Check unix socket name:
 220  *              - should be not zero length.
 221  *              - if started by not zero, should be NULL terminated (FS object)
 222  *              - if started by zero, it is abstract name.
 223  */
 224
 225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 226 {
 227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 228                 return -EINVAL;
 229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 230                 return -EINVAL;
 231         if (sunaddr->sun_path[0]) {
 232                 /*
 233                  * This may look like an off by one error but it is a bit more
 234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 235                  * sun_path[108] doesn't as such exist.  However in kernel space
 236                  * we are guaranteed that it is a valid memory location in our
 237                  * kernel address buffer.
 238                  */
 239                 ((char *)sunaddr)[len] = 0;
 240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 241                 return len;
 242         }
 243
 244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 245         return len;
 246 }
 247
 248 static void __unix_remove_socket(struct sock *sk)
 249 {
 250         sk_del_node_init(sk);
 251 }
 252
 253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 254 {
 255         WARN_ON(!sk_unhashed(sk));
 256         sk_add_node(sk, list);
 257 }
 258
 259 static inline void unix_remove_socket(struct sock *sk)
 260 {
 261         spin_lock(&unix_table_lock);
 262         __unix_remove_socket(sk);
 263         spin_unlock(&unix_table_lock);
 264 }
 265
 266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 267 {
 268         spin_lock(&unix_table_lock);
 269         __unix_insert_socket(list, sk);
 270         spin_unlock(&unix_table_lock);
 271 }
 272
 273 static struct sock *__unix_find_socket_byname(struct net *net,
 274                                               struct sockaddr_un *sunname,
 275                                               int len, int type, unsigned int hash)
 276 {
 277         struct sock *s;
 278
 279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 280                 struct unix_sock *u = unix_sk(s);
 281
 282                 if (!net_eq(sock_net(s), net))
 283                         continue;
 284
 285                 if (u->addr->len == len &&
 286                     !memcmp(u->addr->name, sunname, len))
 287                         goto found;
 288         }
 289         s = NULL;
 290 found:
 291         return s;
 292 }
 293
 294 static inline struct sock *unix_find_socket_byname(struct net *net,
 295                                                    struct sockaddr_un *sunname,
 296                                                    int len, int type,
 297                                                    unsigned int hash)
 298 {
 299         struct sock *s;
 300
 301         spin_lock(&unix_table_lock);
 302         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 303         if (s)
 304                 sock_hold(s);
 305         spin_unlock(&unix_table_lock);
 306         return s;
 307 }
 308
 309 static struct sock *unix_find_socket_byinode(struct inode *i)
 310 {
 311         struct sock *s;
 312
 313         spin_lock(&unix_table_lock);
 314         sk_for_each(s,
 315                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 316                 struct dentry *dentry = unix_sk(s)->path.dentry;
 317
 318                 if (dentry && d_real_inode(dentry) == i) {
 319                         sock_hold(s);
 320                         goto found;
 321                 }
 322         }
 323         s = NULL;
 324 found:
 325         spin_unlock(&unix_table_lock);
 326         return s;
 327 }
 328
 329 /* Support code for asymmetrically connected dgram sockets
 330  *
 331  * If a datagram socket is connected to a socket not itself connected
 332  * to the first socket (eg, /dev/log), clients may only enqueue more
 333  * messages if the present receive queue of the server socket is not
 334  * "too large". This means there's a second writeability condition
 335  * poll and sendmsg need to test. The dgram recv code will do a wake
 336  * up on the peer_wait wait queue of a socket upon reception of a
 337  * datagram which needs to be propagated to sleeping would-be writers
 338  * since these might not have sent anything so far. This can't be
 339  * accomplished via poll_wait because the lifetime of the server
 340  * socket might be less than that of its clients if these break their
 341  * association with it or if the server socket is closed while clients
 342  * are still connected to it and there's no way to inform "a polling
 343  * implementation" that it should let go of a certain wait queue
 344  *
 345  * In order to propagate a wake up, a wait_queue_t of the client
 346  * socket is enqueued on the peer_wait queue of the server socket
 347  * whose wake function does a wake_up on the ordinary client socket
 348  * wait queue. This connection is established whenever a write (or
 349  * poll for write) hit the flow control condition and broken when the
 350  * association to the server socket is dissolved or after a wake up
 351  * was relayed.
 352  */
 353
 354 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
 355                                       void *key)
 356 {
 357         struct unix_sock *u;
 358         wait_queue_head_t *u_sleep;
 359
 360         u = container_of(q, struct unix_sock, peer_wake);
 361
 362         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 363                             q);
 364         u->peer_wake.private = NULL;
 365
 366         /* relaying can only happen while the wq still exists */
 367         u_sleep = sk_sleep(&u->sk);
 368         if (u_sleep)
 369                 wake_up_interruptible_poll(u_sleep, key);
 370
 371         return 0;
 372 }
 373
 374 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 375 {
 376         struct unix_sock *u, *u_other;
 377         int rc;
 378
 379         u = unix_sk(sk);
 380         u_other = unix_sk(other);
 381         rc = 0;
 382         spin_lock(&u_other->peer_wait.lock);
 383
 384         if (!u->peer_wake.private) {
 385                 u->peer_wake.private = other;
 386                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 387
 388                 rc = 1;
 389         }
 390
 391         spin_unlock(&u_other->peer_wait.lock);
 392         return rc;
 393 }
 394
 395 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 396                                             struct sock *other)
 397 {
 398         struct unix_sock *u, *u_other;
 399
 400         u = unix_sk(sk);
 401         u_other = unix_sk(other);
 402         spin_lock(&u_other->peer_wait.lock);
 403
 404         if (u->peer_wake.private == other) {
 405                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 406                 u->peer_wake.private = NULL;
 407         }
 408
 409         spin_unlock(&u_other->peer_wait.lock);
 410 }
 411
 412 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 413                                                    struct sock *other)
 414 {
 415         unix_dgram_peer_wake_disconnect(sk, other);
 416         wake_up_interruptible_poll(sk_sleep(sk),
 417                                    POLLOUT |
 418                                    POLLWRNORM |
 419                                    POLLWRBAND);
 420 }
 421
 422 /* preconditions:
 423  *      - unix_peer(sk) == other
 424  *      - association is stable
 425  */
 426 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 427 {
 428         int connected;
 429
 430         connected = unix_dgram_peer_wake_connect(sk, other);
 431
 432         if (unix_recvq_full(other))
 433                 return 1;
 434
 435         if (connected)
 436                 unix_dgram_peer_wake_disconnect(sk, other);
 437
 438         return 0;
 439 }
 440
 441 static int unix_writable(const struct sock *sk)
 442 {
 443         return sk->sk_state != TCP_LISTEN &&
 444                (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 445 }
 446
 447 static void unix_write_space(struct sock *sk)
 448 {
 449         struct socket_wq *wq;
 450
 451         rcu_read_lock();
 452         if (unix_writable(sk)) {
 453                 wq = rcu_dereference(sk->sk_wq);
 454                 if (wq_has_sleeper(wq))
 455                         wake_up_interruptible_sync_poll(&wq->wait,
 456                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 457                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 458         }
 459         rcu_read_unlock();
 460 }
 461
 462 /* When dgram socket disconnects (or changes its peer), we clear its receive
 463  * queue of packets arrived from previous peer. First, it allows to do
 464  * flow control based only on wmem_alloc; second, sk connected to peer
 465  * may receive messages only from that peer. */
 466 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 467 {
 468         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 469                 skb_queue_purge(&sk->sk_receive_queue);
 470                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 471
 472                 /* If one link of bidirectional dgram pipe is disconnected,
 473                  * we signal error. Messages are lost. Do not make this,
 474                  * when peer was not connected to us.
 475                  */
 476                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 477                         other->sk_err = ECONNRESET;
 478                         other->sk_error_report(other);
 479                 }
 480         }
 481 }
 482
 483 static void unix_sock_destructor(struct sock *sk)
 484 {
 485         struct unix_sock *u = unix_sk(sk);
 486
 487         skb_queue_purge(&sk->sk_receive_queue);
 488
 489         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 490         WARN_ON(!sk_unhashed(sk));
 491         WARN_ON(sk->sk_socket);
 492         if (!sock_flag(sk, SOCK_DEAD)) {
 493                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 494                 return;
 495         }
 496
 497         if (u->addr)
 498                 unix_release_addr(u->addr);
 499
 500         atomic_long_dec(&unix_nr_socks);
 501         local_bh_disable();
 502         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 503         local_bh_enable();
 504 #ifdef UNIX_REFCNT_DEBUG
 505         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 506                 atomic_long_read(&unix_nr_socks));
 507 #endif
 508 }
 509
 510 static void unix_release_sock(struct sock *sk, int embrion)
 511 {
 512         struct unix_sock *u = unix_sk(sk);
 513         struct path path;
 514         struct sock *skpair;
 515         struct sk_buff *skb;
 516         int state;
 517
 518         unix_remove_socket(sk);
 519
 520         /* Clear state */
 521         unix_state_lock(sk);
 522         sock_orphan(sk);
 523         sk->sk_shutdown = SHUTDOWN_MASK;
 524         path         = u->path;
 525         u->path.dentry = NULL;
 526         u->path.mnt = NULL;
 527         state = sk->sk_state;
 528         sk->sk_state = TCP_CLOSE;
 529         unix_state_unlock(sk);
 530
 531         wake_up_interruptible_all(&u->peer_wait);
 532
 533         skpair = unix_peer(sk);
 534
 535         if (skpair != NULL) {
 536                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 537                         unix_state_lock(skpair);
 538                         /* No more writes */
 539                         skpair->sk_shutdown = SHUTDOWN_MASK;
 540                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 541                                 skpair->sk_err = ECONNRESET;
 542                         unix_state_unlock(skpair);
 543                         skpair->sk_state_change(skpair);
 544                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 545                 }
 546
 547                 unix_dgram_peer_wake_disconnect(sk, skpair);
 548                 sock_put(skpair); /* It may now die */
 549                 unix_peer(sk) = NULL;
 550         }
 551
 552         /* Try to flush out this socket. Throw out buffers at least */
 553
 554         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 555                 if (state == TCP_LISTEN)
 556                         unix_release_sock(skb->sk, 1);
 557                 /* passed fds are erased in the kfree_skb hook        */
 558                 UNIXCB(skb).consumed = skb->len;
 559                 kfree_skb(skb);
 560         }
 561
 562         if (path.dentry)
 563                 path_put(&path);
 564
 565         sock_put(sk);
 566
 567         /* ---- Socket is dead now and most probably destroyed ---- */
 568
 569         /*
 570          * Fixme: BSD difference: In BSD all sockets connected to us get
 571          *        ECONNRESET and we die on the spot. In Linux we behave
 572          *        like files and pipes do and wait for the last
 573          *        dereference.
 574          *
 575          * Can't we simply set sock->err?
 576          *
 577          *        What the above comment does talk about? --ANK(980817)
 578          */
 579
 580         if (unix_tot_inflight)
 581                 unix_gc();              /* Garbage collect fds */
 582 }
 583
 584 static void init_peercred(struct sock *sk)
 585 {
 586         put_pid(sk->sk_peer_pid);
 587         if (sk->sk_peer_cred)
 588                 put_cred(sk->sk_peer_cred);
 589         sk->sk_peer_pid  = get_pid(task_tgid(current));
 590         sk->sk_peer_cred = get_current_cred();
 591 }
 592
 593 static void copy_peercred(struct sock *sk, struct sock *peersk)
 594 {
 595         put_pid(sk->sk_peer_pid);
 596         if (sk->sk_peer_cred)
 597                 put_cred(sk->sk_peer_cred);
 598         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 599         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 600 }
 601
 602 static int unix_listen(struct socket *sock, int backlog)
 603 {
 604         int err;
 605         struct sock *sk = sock->sk;
 606         struct unix_sock *u = unix_sk(sk);
 607         struct pid *old_pid = NULL;
 608
 609         err = -EOPNOTSUPP;
 610         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 611                 goto out;       /* Only stream/seqpacket sockets accept */
 612         err = -EINVAL;
 613         if (!u->addr)
 614                 goto out;       /* No listens on an unbound socket */
 615         unix_state_lock(sk);
 616         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 617                 goto out_unlock;
 618         if (backlog > sk->sk_max_ack_backlog)
 619                 wake_up_interruptible_all(&u->peer_wait);
 620         sk->sk_max_ack_backlog  = backlog;
 621         sk->sk_state            = TCP_LISTEN;
 622         /* set credentials so connect can copy them */
 623         init_peercred(sk);
 624         err = 0;
 625
 626 out_unlock:
 627         unix_state_unlock(sk);
 628         put_pid(old_pid);
 629 out:
 630         return err;
 631 }
 632
 633 static int unix_release(struct socket *);
 634 static int unix_bind(struct socket *, struct sockaddr *, int);
 635 static int unix_stream_connect(struct socket *, struct sockaddr *,
 636                                int addr_len, int flags);
 637 static int unix_socketpair(struct socket *, struct socket *);
 638 static int unix_accept(struct socket *, struct socket *, int);
 639 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 640 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 641 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 642                                     poll_table *);
 643 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 644 static int unix_shutdown(struct socket *, int);
 645 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 646 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 647 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 648                                     size_t size, int flags);
 649 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 650                                        struct pipe_inode_info *, size_t size,
 651                                        unsigned int flags);
 652 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 653 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 654 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 655                               int, int);
 656 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 657 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 658                                   int);
 659
 660 static int unix_set_peek_off(struct sock *sk, int val)
 661 {
 662         struct unix_sock *u = unix_sk(sk);
 663
 664         if (mutex_lock_interruptible(&u->iolock))
 665                 return -EINTR;
 666
 667         sk->sk_peek_off = val;
 668         mutex_unlock(&u->iolock);
 669
 670         return 0;
 671 }
 672
 673
 674 static const struct proto_ops unix_stream_ops = {
 675         .family =       PF_UNIX,
 676         .owner =        THIS_MODULE,
 677         .release =      unix_release,
 678         .bind =         unix_bind,
 679         .connect =      unix_stream_connect,
 680         .socketpair =   unix_socketpair,
 681         .accept =       unix_accept,
 682         .getname =      unix_getname,
 683         .poll =         unix_poll,
 684         .ioctl =        unix_ioctl,
 685         .listen =       unix_listen,
 686         .shutdown =     unix_shutdown,
 687         .setsockopt =   sock_no_setsockopt,
 688         .getsockopt =   sock_no_getsockopt,
 689         .sendmsg =      unix_stream_sendmsg,
 690         .recvmsg =      unix_stream_recvmsg,
 691         .mmap =         sock_no_mmap,
 692         .sendpage =     unix_stream_sendpage,
 693         .splice_read =  unix_stream_splice_read,
 694         .set_peek_off = unix_set_peek_off,
 695 };
 696
 697 static const struct proto_ops unix_dgram_ops = {
 698         .family =       PF_UNIX,
 699         .owner =        THIS_MODULE,
 700         .release =      unix_release,
 701         .bind =         unix_bind,
 702         .connect =      unix_dgram_connect,
 703         .socketpair =   unix_socketpair,
 704         .accept =       sock_no_accept,
 705         .getname =      unix_getname,
 706         .poll =         unix_dgram_poll,
 707         .ioctl =        unix_ioctl,
 708         .listen =       sock_no_listen,
 709         .shutdown =     unix_shutdown,
 710         .setsockopt =   sock_no_setsockopt,
 711         .getsockopt =   sock_no_getsockopt,
 712         .sendmsg =      unix_dgram_sendmsg,
 713         .recvmsg =      unix_dgram_recvmsg,
 714         .mmap =         sock_no_mmap,
 715         .sendpage =     sock_no_sendpage,
 716         .set_peek_off = unix_set_peek_off,
 717 };
 718
 719 static const struct proto_ops unix_seqpacket_ops = {
 720         .family =       PF_UNIX,
 721         .owner =        THIS_MODULE,
 722         .release =      unix_release,
 723         .bind =         unix_bind,
 724         .connect =      unix_stream_connect,
 725         .socketpair =   unix_socketpair,
 726         .accept =       unix_accept,
 727         .getname =      unix_getname,
 728         .poll =         unix_dgram_poll,
 729         .ioctl =        unix_ioctl,
 730         .listen =       unix_listen,
 731         .shutdown =     unix_shutdown,
 732         .setsockopt =   sock_no_setsockopt,
 733         .getsockopt =   sock_no_getsockopt,
 734         .sendmsg =      unix_seqpacket_sendmsg,
 735         .recvmsg =      unix_seqpacket_recvmsg,
 736         .mmap =         sock_no_mmap,
 737         .sendpage =     sock_no_sendpage,
 738         .set_peek_off = unix_set_peek_off,
 739 };
 740
 741 static struct proto unix_proto = {
 742         .name                   = "UNIX",
 743         .owner                  = THIS_MODULE,
 744         .obj_size               = sizeof(struct unix_sock),
 745 };
 746
 747 /*
 748  * AF_UNIX sockets do not interact with hardware, hence they
 749  * dont trigger interrupts - so it's safe for them to have
 750  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 751  * this special lock-class by reinitializing the spinlock key:
 752  */
 753 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 754
 755 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 756 {
 757         struct sock *sk = NULL;
 758         struct unix_sock *u;
 759
 760         atomic_long_inc(&unix_nr_socks);
 761         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 762                 goto out;
 763
 764         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 765         if (!sk)
 766                 goto out;
 767
 768         sock_init_data(sock, sk);
 769         lockdep_set_class(&sk->sk_receive_queue.lock,
 770                                 &af_unix_sk_receive_queue_lock_key);
 771
 772         sk->sk_write_space      = unix_write_space;
 773         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 774         sk->sk_destruct         = unix_sock_destructor;
 775         u         = unix_sk(sk);
 776         u->path.dentry = NULL;
 777         u->path.mnt = NULL;
 778         spin_lock_init(&u->lock);
 779         atomic_long_set(&u->inflight, 0);
 780         INIT_LIST_HEAD(&u->link);
 781         mutex_init(&u->iolock); /* single task reading lock */
 782         mutex_init(&u->bindlock); /* single task binding lock */
 783         init_waitqueue_head(&u->peer_wait);
 784         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 785         unix_insert_socket(unix_sockets_unbound(sk), sk);
 786 out:
 787         if (sk == NULL)
 788                 atomic_long_dec(&unix_nr_socks);
 789         else {
 790                 local_bh_disable();
 791                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 792                 local_bh_enable();
 793         }
 794         return sk;
 795 }
 796
 797 static int unix_create(struct net *net, struct socket *sock, int protocol,
 798                        int kern)
 799 {
 800         if (protocol && protocol != PF_UNIX)
 801                 return -EPROTONOSUPPORT;
 802
 803         sock->state = SS_UNCONNECTED;
 804
 805         switch (sock->type) {
 806         case SOCK_STREAM:
 807                 sock->ops = &unix_stream_ops;
 808                 break;
 809                 /*
 810                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 811                  *      nothing uses it.
 812                  */
 813         case SOCK_RAW:
 814                 sock->type = SOCK_DGRAM;
 815         case SOCK_DGRAM:
 816                 sock->ops = &unix_dgram_ops;
 817                 break;
 818         case SOCK_SEQPACKET:
 819                 sock->ops = &unix_seqpacket_ops;
 820                 break;
 821         default:
 822                 return -ESOCKTNOSUPPORT;
 823         }
 824
 825         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 826 }
 827
 828 static int unix_release(struct socket *sock)
 829 {
 830         struct sock *sk = sock->sk;
 831
 832         if (!sk)
 833                 return 0;
 834
 835         unix_release_sock(sk, 0);
 836         sock->sk = NULL;
 837
 838         return 0;
 839 }
 840
 841 static int unix_autobind(struct socket *sock)
 842 {
 843         struct sock *sk = sock->sk;
 844         struct net *net = sock_net(sk);
 845         struct unix_sock *u = unix_sk(sk);
 846         static u32 ordernum = 1;
 847         struct unix_address *addr;
 848         int err;
 849         unsigned int retries = 0;
 850
 851         err = mutex_lock_interruptible(&u->bindlock);
 852         if (err)
 853                 return err;
 854
 855         err = 0;
 856         if (u->addr)
 857                 goto out;
 858
 859         err = -ENOMEM;
 860         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 861         if (!addr)
 862                 goto out;
 863
 864         addr->name->sun_family = AF_UNIX;
 865         atomic_set(&addr->refcnt, 1);
 866
 867 retry:
 868         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 869         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 870
 871         spin_lock(&unix_table_lock);
 872         ordernum = (ordernum+1)&0xFFFFF;
 873
 874         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 875                                       addr->hash)) {
 876                 spin_unlock(&unix_table_lock);
 877                 /*
 878                  * __unix_find_socket_byname() may take long time if many names
 879                  * are already in use.
 880                  */
 881                 cond_resched();
 882                 /* Give up if all names seems to be in use. */
 883                 if (retries++ == 0xFFFFF) {
 884                         err = -ENOSPC;
 885                         kfree(addr);
 886                         goto out;
 887                 }
 888                 goto retry;
 889         }
 890         addr->hash ^= sk->sk_type;
 891
 892         __unix_remove_socket(sk);
 893         u->addr = addr;
 894         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 895         spin_unlock(&unix_table_lock);
 896         err = 0;
 897
 898 out:    mutex_unlock(&u->bindlock);
 899         return err;
 900 }
 901
 902 static struct sock *unix_find_other(struct net *net,
 903                                     struct sockaddr_un *sunname, int len,
 904                                     int type, unsigned int hash, int *error)
 905 {
 906         struct sock *u;
 907         struct path path;
 908         int err = 0;
 909
 910         if (sunname->sun_path[0]) {
 911                 struct inode *inode;
 912                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 913                 if (err)
 914                         goto fail;
 915                 inode = d_real_inode(path.dentry);
 916                 err = inode_permission(inode, MAY_WRITE);
 917                 if (err)
 918                         goto put_fail;
 919
 920                 err = -ECONNREFUSED;
 921                 if (!S_ISSOCK(inode->i_mode))
 922                         goto put_fail;
 923                 u = unix_find_socket_byinode(inode);
 924                 if (!u)
 925                         goto put_fail;
 926
 927                 if (u->sk_type == type)
 928                         touch_atime(&path);
 929
 930                 path_put(&path);
 931
 932                 err = -EPROTOTYPE;
 933                 if (u->sk_type != type) {
 934                         sock_put(u);
 935                         goto fail;
 936                 }
 937         } else {
 938                 err = -ECONNREFUSED;
 939                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 940                 if (u) {
 941                         struct dentry *dentry;
 942                         dentry = unix_sk(u)->path.dentry;
 943                         if (dentry)
 944                                 touch_atime(&unix_sk(u)->path);
 945                 } else
 946                         goto fail;
 947         }
 948         return u;
 949
 950 put_fail:
 951         path_put(&path);
 952 fail:
 953         *error = err;
 954         return NULL;
 955 }
 956
 957 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 958 {
 959         struct dentry *dentry;
 960         struct path path;
 961         int err = 0;
 962         /*
 963          * Get the parent directory, calculate the hash for last
 964          * component.
 965          */
 966         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 967         err = PTR_ERR(dentry);
 968         if (IS_ERR(dentry))
 969                 return err;
 970
 971         /*
 972          * All right, let's create it.
 973          */
 974         err = security_path_mknod(&path, dentry, mode, 0);
 975         if (!err) {
 976                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 977                 if (!err) {
 978                         res->mnt = mntget(path.mnt);
 979                         res->dentry = dget(dentry);
 980                 }
 981         }
 982         done_path_create(&path, dentry);
 983         return err;
 984 }
 985
 986 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 987 {
 988         struct sock *sk = sock->sk;
 989         struct net *net = sock_net(sk);
 990         struct unix_sock *u = unix_sk(sk);
 991         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 992         char *sun_path = sunaddr->sun_path;
 993         int err;
 994         unsigned int hash;
 995         struct unix_address *addr;
 996         struct hlist_head *list;
 997         struct path path = { NULL, NULL };
 998
 999         err = -EINVAL;
1000         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1001             sunaddr->sun_family != AF_UNIX)
1002                 goto out;
1003
1004         if (addr_len == sizeof(short)) {
1005                 err = unix_autobind(sock);
1006                 goto out;
1007         }
1008
1009         err = unix_mkname(sunaddr, addr_len, &hash);
1010         if (err < 0)
1011                 goto out;
1012         addr_len = err;
1013
1014         if (sun_path[0]) {
1015                 umode_t mode = S_IFSOCK |
1016                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1017                 err = unix_mknod(sun_path, mode, &path);
1018                 if (err) {
1019                         if (err == -EEXIST)
1020                                 err = -EADDRINUSE;
1021                         goto out;
1022                 }
1023         }
1024
1025         err = mutex_lock_interruptible(&u->bindlock);
1026         if (err)
1027                 goto out_put;
1028
1029         err = -EINVAL;
1030         if (u->addr)
1031                 goto out_up;
1032
1033         err = -ENOMEM;
1034         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1035         if (!addr)
1036                 goto out_up;
1037
1038         memcpy(addr->name, sunaddr, addr_len);
1039         addr->len = addr_len;
1040         addr->hash = hash ^ sk->sk_type;
1041         atomic_set(&addr->refcnt, 1);
1042
1043         if (sun_path[0]) {
1044                 addr->hash = UNIX_HASH_SIZE;
1045                 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1046                 spin_lock(&unix_table_lock);
1047                 u->path = path;
1048                 list = &unix_socket_table[hash];
1049         } else {
1050                 spin_lock(&unix_table_lock);
1051                 err = -EADDRINUSE;
1052                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1053                                               sk->sk_type, hash)) {
1054                         unix_release_addr(addr);
1055                         goto out_unlock;
1056                 }
1057
1058                 list = &unix_socket_table[addr->hash];
1059         }
1060
1061         err = 0;
1062         __unix_remove_socket(sk);
1063         u->addr = addr;
1064         __unix_insert_socket(list, sk);
1065
1066 out_unlock:
1067         spin_unlock(&unix_table_lock);
1068 out_up:
1069         mutex_unlock(&u->bindlock);
1070 out_put:
1071         if (err)
1072                 path_put(&path);
1073 out:
1074         return err;
1075 }
1076
1077 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1078 {
1079         if (unlikely(sk1 == sk2) || !sk2) {
1080                 unix_state_lock(sk1);
1081                 return;
1082         }
1083         if (sk1 < sk2) {
1084                 unix_state_lock(sk1);
1085                 unix_state_lock_nested(sk2);
1086         } else {
1087                 unix_state_lock(sk2);
1088                 unix_state_lock_nested(sk1);
1089         }
1090 }
1091
1092 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1093 {
1094         if (unlikely(sk1 == sk2) || !sk2) {
1095                 unix_state_unlock(sk1);
1096                 return;
1097         }
1098         unix_state_unlock(sk1);
1099         unix_state_unlock(sk2);
1100 }
1101
1102 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1103                               int alen, int flags)
1104 {
1105         struct sock *sk = sock->sk;
1106         struct net *net = sock_net(sk);
1107         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1108         struct sock *other;
1109         unsigned int hash;
1110         int err;
1111
1112         err = -EINVAL;
1113         if (alen < offsetofend(struct sockaddr, sa_family))
1114                 goto out;
1115
1116         if (addr->sa_family != AF_UNSPEC) {
1117                 err = unix_mkname(sunaddr, alen, &hash);
1118                 if (err < 0)
1119                         goto out;
1120                 alen = err;
1121
1122                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1123                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1124                         goto out;
1125
1126 restart:
1127                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1128                 if (!other)
1129                         goto out;
1130
1131                 unix_state_double_lock(sk, other);
1132
1133                 /* Apparently VFS overslept socket death. Retry. */
1134                 if (sock_flag(other, SOCK_DEAD)) {
1135                         unix_state_double_unlock(sk, other);
1136                         sock_put(other);
1137                         goto restart;
1138                 }
1139
1140                 err = -EPERM;
1141                 if (!unix_may_send(sk, other))
1142                         goto out_unlock;
1143
1144                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1145                 if (err)
1146                         goto out_unlock;
1147
1148         } else {
1149                 /*
1150                  *      1003.1g breaking connected state with AF_UNSPEC
1151                  */
1152                 other = NULL;
1153                 unix_state_double_lock(sk, other);
1154         }
1155
1156         /*
1157          * If it was connected, reconnect.
1158          */
1159         if (unix_peer(sk)) {
1160                 struct sock *old_peer = unix_peer(sk);
1161                 unix_peer(sk) = other;
1162                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1163
1164                 unix_state_double_unlock(sk, other);
1165
1166                 if (other != old_peer)
1167                         unix_dgram_disconnected(sk, old_peer);
1168                 sock_put(old_peer);
1169         } else {
1170                 unix_peer(sk) = other;
1171                 unix_state_double_unlock(sk, other);
1172         }
1173         return 0;
1174
1175 out_unlock:
1176         unix_state_double_unlock(sk, other);
1177         sock_put(other);
1178 out:
1179         return err;
1180 }
1181
1182 static long unix_wait_for_peer(struct sock *other, long timeo)
1183 {
1184         struct unix_sock *u = unix_sk(other);
1185         int sched;
1186         DEFINE_WAIT(wait);
1187
1188         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1189
1190         sched = !sock_flag(other, SOCK_DEAD) &&
1191                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1192                 unix_recvq_full(other);
1193
1194         unix_state_unlock(other);
1195
1196         if (sched)
1197                 timeo = schedule_timeout(timeo);
1198
1199         finish_wait(&u->peer_wait, &wait);
1200         return timeo;
1201 }
1202
1203 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1204                                int addr_len, int flags)
1205 {
1206         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1207         struct sock *sk = sock->sk;
1208         struct net *net = sock_net(sk);
1209         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1210         struct sock *newsk = NULL;
1211         struct sock *other = NULL;
1212         struct sk_buff *skb = NULL;
1213         unsigned int hash;
1214         int st;
1215         int err;
1216         long timeo;
1217
1218         err = unix_mkname(sunaddr, addr_len, &hash);
1219         if (err < 0)
1220                 goto out;
1221         addr_len = err;
1222
1223         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1224             (err = unix_autobind(sock)) != 0)
1225                 goto out;
1226
1227         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1228
1229         /* First of all allocate resources.
1230            If we will make it after state is locked,
1231            we will have to recheck all again in any case.
1232          */
1233
1234         err = -ENOMEM;
1235
1236         /* create new sock for complete connection */
1237         newsk = unix_create1(sock_net(sk), NULL, 0);
1238         if (newsk == NULL)
1239                 goto out;
1240
1241         /* Allocate skb for sending to listening sock */
1242         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1243         if (skb == NULL)
1244                 goto out;
1245
1246 restart:
1247         /*  Find listening sock. */
1248         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1249         if (!other)
1250                 goto out;
1251
1252         /* Latch state of peer */
1253         unix_state_lock(other);
1254
1255         /* Apparently VFS overslept socket death. Retry. */
1256         if (sock_flag(other, SOCK_DEAD)) {
1257                 unix_state_unlock(other);
1258                 sock_put(other);
1259                 goto restart;
1260         }
1261
1262         err = -ECONNREFUSED;
1263         if (other->sk_state != TCP_LISTEN)
1264                 goto out_unlock;
1265         if (other->sk_shutdown & RCV_SHUTDOWN)
1266                 goto out_unlock;
1267
1268         if (unix_recvq_full(other)) {
1269                 err = -EAGAIN;
1270                 if (!timeo)
1271                         goto out_unlock;
1272
1273                 timeo = unix_wait_for_peer(other, timeo);
1274
1275                 err = sock_intr_errno(timeo);
1276                 if (signal_pending(current))
1277                         goto out;
1278                 sock_put(other);
1279                 goto restart;
1280         }
1281
1282         /* Latch our state.
1283
1284            It is tricky place. We need to grab our state lock and cannot
1285            drop lock on peer. It is dangerous because deadlock is
1286            possible. Connect to self case and simultaneous
1287            attempt to connect are eliminated by checking socket
1288            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1289            check this before attempt to grab lock.
1290
1291            Well, and we have to recheck the state after socket locked.
1292          */
1293         st = sk->sk_state;
1294
1295         switch (st) {
1296         case TCP_CLOSE:
1297                 /* This is ok... continue with connect */
1298                 break;
1299         case TCP_ESTABLISHED:
1300                 /* Socket is already connected */
1301                 err = -EISCONN;
1302                 goto out_unlock;
1303         default:
1304                 err = -EINVAL;
1305                 goto out_unlock;
1306         }
1307
1308         unix_state_lock_nested(sk);
1309
1310         if (sk->sk_state != st) {
1311                 unix_state_unlock(sk);
1312                 unix_state_unlock(other);
1313                 sock_put(other);
1314                 goto restart;
1315         }
1316
1317         err = security_unix_stream_connect(sk, other, newsk);
1318         if (err) {
1319                 unix_state_unlock(sk);
1320                 goto out_unlock;
1321         }
1322
1323         /* The way is open! Fastly set all the necessary fields... */
1324
1325         sock_hold(sk);
1326         unix_peer(newsk)        = sk;
1327         newsk->sk_state         = TCP_ESTABLISHED;
1328         newsk->sk_type          = sk->sk_type;
1329         init_peercred(newsk);
1330         newu = unix_sk(newsk);
1331         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1332         otheru = unix_sk(other);
1333
1334         /* copy address information from listening to new sock*/
1335         if (otheru->addr) {
1336                 atomic_inc(&otheru->addr->refcnt);
1337                 newu->addr = otheru->addr;
1338         }
1339         if (otheru->path.dentry) {
1340                 path_get(&otheru->path);
1341                 newu->path = otheru->path;
1342         }
1343
1344         /* Set credentials */
1345         copy_peercred(sk, other);
1346
1347         sock->state     = SS_CONNECTED;
1348         sk->sk_state    = TCP_ESTABLISHED;
1349         sock_hold(newsk);
1350
1351         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1352         unix_peer(sk)   = newsk;
1353
1354         unix_state_unlock(sk);
1355
1356         /* take ten and and send info to listening sock */
1357         spin_lock(&other->sk_receive_queue.lock);
1358         __skb_queue_tail(&other->sk_receive_queue, skb);
1359         spin_unlock(&other->sk_receive_queue.lock);
1360         unix_state_unlock(other);
1361         other->sk_data_ready(other);
1362         sock_put(other);
1363         return 0;
1364
1365 out_unlock:
1366         if (other)
1367                 unix_state_unlock(other);
1368
1369 out:
1370         kfree_skb(skb);
1371         if (newsk)
1372                 unix_release_sock(newsk, 0);
1373         if (other)
1374                 sock_put(other);
1375         return err;
1376 }
1377
1378 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1379 {
1380         struct sock *ska = socka->sk, *skb = sockb->sk;
1381
1382         /* Join our sockets back to back */
1383         sock_hold(ska);
1384         sock_hold(skb);
1385         unix_peer(ska) = skb;
1386         unix_peer(skb) = ska;
1387         init_peercred(ska);
1388         init_peercred(skb);
1389
1390         if (ska->sk_type != SOCK_DGRAM) {
1391                 ska->sk_state = TCP_ESTABLISHED;
1392                 skb->sk_state = TCP_ESTABLISHED;
1393                 socka->state  = SS_CONNECTED;
1394                 sockb->state  = SS_CONNECTED;
1395         }
1396         return 0;
1397 }
1398
1399 static void unix_sock_inherit_flags(const struct socket *old,
1400                                     struct socket *new)
1401 {
1402         if (test_bit(SOCK_PASSCRED, &old->flags))
1403                 set_bit(SOCK_PASSCRED, &new->flags);
1404         if (test_bit(SOCK_PASSSEC, &old->flags))
1405                 set_bit(SOCK_PASSSEC, &new->flags);
1406 }
1407
1408 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1409 {
1410         struct sock *sk = sock->sk;
1411         struct sock *tsk;
1412         struct sk_buff *skb;
1413         int err;
1414
1415         err = -EOPNOTSUPP;
1416         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1417                 goto out;
1418
1419         err = -EINVAL;
1420         if (sk->sk_state != TCP_LISTEN)
1421                 goto out;
1422
1423         /* If socket state is TCP_LISTEN it cannot change (for now...),
1424          * so that no locks are necessary.
1425          */
1426
1427         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1428         if (!skb) {
1429                 /* This means receive shutdown. */
1430                 if (err == 0)
1431                         err = -EINVAL;
1432                 goto out;
1433         }
1434
1435         tsk = skb->sk;
1436         skb_free_datagram(sk, skb);
1437         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1438
1439         /* attach accepted sock to socket */
1440         unix_state_lock(tsk);
1441         newsock->state = SS_CONNECTED;
1442         unix_sock_inherit_flags(sock, newsock);
1443         sock_graft(tsk, newsock);
1444         unix_state_unlock(tsk);
1445         return 0;
1446
1447 out:
1448         return err;
1449 }
1450
1451
1452 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1453 {
1454         struct sock *sk = sock->sk;
1455         struct unix_sock *u;
1456         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1457         int err = 0;
1458
1459         if (peer) {
1460                 sk = unix_peer_get(sk);
1461
1462                 err = -ENOTCONN;
1463                 if (!sk)
1464                         goto out;
1465                 err = 0;
1466         } else {
1467                 sock_hold(sk);
1468         }
1469
1470         u = unix_sk(sk);
1471         unix_state_lock(sk);
1472         if (!u->addr) {
1473                 sunaddr->sun_family = AF_UNIX;
1474                 sunaddr->sun_path[0] = 0;
1475                 *uaddr_len = sizeof(short);
1476         } else {
1477                 struct unix_address *addr = u->addr;
1478
1479                 *uaddr_len = addr->len;
1480                 memcpy(sunaddr, addr->name, *uaddr_len);
1481         }
1482         unix_state_unlock(sk);
1483         sock_put(sk);
1484 out:
1485         return err;
1486 }
1487
1488 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1489 {
1490         int i;
1491
1492         scm->fp = UNIXCB(skb).fp;
1493         UNIXCB(skb).fp = NULL;
1494
1495         for (i = scm->fp->count-1; i >= 0; i--)
1496                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1497 }
1498
1499 static void unix_destruct_scm(struct sk_buff *skb)
1500 {
1501         struct scm_cookie scm;
1502         memset(&scm, 0, sizeof(scm));
1503         scm.pid  = UNIXCB(skb).pid;
1504         if (UNIXCB(skb).fp)
1505                 unix_detach_fds(&scm, skb);
1506
1507         /* Alas, it calls VFS */
1508         /* So fscking what? fput() had been SMP-safe since the last Summer */
1509         scm_destroy(&scm);
1510         sock_wfree(skb);
1511 }
1512
1513 /*
1514  * The "user->unix_inflight" variable is protected by the garbage
1515  * collection lock, and we just read it locklessly here. If you go
1516  * over the limit, there might be a tiny race in actually noticing
1517  * it across threads. Tough.
1518  */
1519 static inline bool too_many_unix_fds(struct task_struct *p)
1520 {
1521         struct user_struct *user = current_user();
1522
1523         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1524                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1525         return false;
1526 }
1527
1528 #define MAX_RECURSION_LEVEL 4
1529
1530 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1531 {
1532         int i;
1533         unsigned char max_level = 0;
1534         int unix_sock_count = 0;
1535
1536         if (too_many_unix_fds(current))
1537                 return -ETOOMANYREFS;
1538
1539         for (i = scm->fp->count - 1; i >= 0; i--) {
1540                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1541
1542                 if (sk) {
1543                         unix_sock_count++;
1544                         max_level = max(max_level,
1545                                         unix_sk(sk)->recursion_level);
1546                 }
1547         }
1548         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1549                 return -ETOOMANYREFS;
1550
1551         /*
1552          * Need to duplicate file references for the sake of garbage
1553          * collection.  Otherwise a socket in the fps might become a
1554          * candidate for GC while the skb is not yet queued.
1555          */
1556         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1557         if (!UNIXCB(skb).fp)
1558                 return -ENOMEM;
1559
1560         for (i = scm->fp->count - 1; i >= 0; i--)
1561                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1562         return max_level;
1563 }
1564
1565 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1566 {
1567         int err = 0;
1568
1569         UNIXCB(skb).pid  = get_pid(scm->pid);
1570         UNIXCB(skb).uid = scm->creds.uid;
1571         UNIXCB(skb).gid = scm->creds.gid;
1572         UNIXCB(skb).fp = NULL;
1573         unix_get_secdata(scm, skb);
1574         if (scm->fp && send_fds)
1575                 err = unix_attach_fds(scm, skb);
1576
1577         skb->destructor = unix_destruct_scm;
1578         return err;
1579 }
1580
1581 static bool unix_passcred_enabled(const struct socket *sock,
1582                                   const struct sock *other)
1583 {
1584         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1585                !other->sk_socket ||
1586                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1587 }
1588
1589 /*
1590  * Some apps rely on write() giving SCM_CREDENTIALS
1591  * We include credentials if source or destination socket
1592  * asserted SOCK_PASSCRED.
1593  */
1594 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1595                             const struct sock *other)
1596 {
1597         if (UNIXCB(skb).pid)
1598                 return;
1599         if (unix_passcred_enabled(sock, other)) {
1600                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1601                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1602         }
1603 }
1604
1605 static int maybe_init_creds(struct scm_cookie *scm,
1606                             struct socket *socket,
1607                             const struct sock *other)
1608 {
1609         int err;
1610         struct msghdr msg = { .msg_controllen = 0 };
1611
1612         err = scm_send(socket, &msg, scm, false);
1613         if (err)
1614                 return err;
1615
1616         if (unix_passcred_enabled(socket, other)) {
1617                 scm->pid = get_pid(task_tgid(current));
1618                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1619         }
1620         return err;
1621 }
1622
1623 static bool unix_skb_scm_eq(struct sk_buff *skb,
1624                             struct scm_cookie *scm)
1625 {
1626         const struct unix_skb_parms *u = &UNIXCB(skb);
1627
1628         return u->pid == scm->pid &&
1629                uid_eq(u->uid, scm->creds.uid) &&
1630                gid_eq(u->gid, scm->creds.gid) &&
1631                unix_secdata_eq(scm, skb);
1632 }
1633
1634 /*
1635  *      Send AF_UNIX data.
1636  */
1637
1638 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1639                               size_t len)
1640 {
1641         struct sock *sk = sock->sk;
1642         struct net *net = sock_net(sk);
1643         struct unix_sock *u = unix_sk(sk);
1644         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1645         struct sock *other = NULL;
1646         int namelen = 0; /* fake GCC */
1647         int err;
1648         unsigned int hash;
1649         struct sk_buff *skb;
1650         long timeo;
1651         struct scm_cookie scm;
1652         int max_level;
1653         int data_len = 0;
1654         int sk_locked;
1655
1656         wait_for_unix_gc();
1657         err = scm_send(sock, msg, &scm, false);
1658         if (err < 0)
1659                 return err;
1660
1661         err = -EOPNOTSUPP;
1662         if (msg->msg_flags&MSG_OOB)
1663                 goto out;
1664
1665         if (msg->msg_namelen) {
1666                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1667                 if (err < 0)
1668                         goto out;
1669                 namelen = err;
1670         } else {
1671                 sunaddr = NULL;
1672                 err = -ENOTCONN;
1673                 other = unix_peer_get(sk);
1674                 if (!other)
1675                         goto out;
1676         }
1677
1678         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1679             && (err = unix_autobind(sock)) != 0)
1680                 goto out;
1681
1682         err = -EMSGSIZE;
1683         if (len > sk->sk_sndbuf - 32)
1684                 goto out;
1685
1686         if (len > SKB_MAX_ALLOC) {
1687                 data_len = min_t(size_t,
1688                                  len - SKB_MAX_ALLOC,
1689                                  MAX_SKB_FRAGS * PAGE_SIZE);
1690                 data_len = PAGE_ALIGN(data_len);
1691
1692                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1693         }
1694
1695         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1696                                    msg->msg_flags & MSG_DONTWAIT, &err,
1697                                    PAGE_ALLOC_COSTLY_ORDER);
1698         if (skb == NULL)
1699                 goto out;
1700
1701         err = unix_scm_to_skb(&scm, skb, true);
1702         if (err < 0)
1703                 goto out_free;
1704         max_level = err + 1;
1705
1706         skb_put(skb, len - data_len);
1707         skb->data_len = data_len;
1708         skb->len = len;
1709         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1710         if (err)
1711                 goto out_free;
1712
1713         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1714
1715 restart:
1716         if (!other) {
1717                 err = -ECONNRESET;
1718                 if (sunaddr == NULL)
1719                         goto out_free;
1720
1721                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1722                                         hash, &err);
1723                 if (other == NULL)
1724                         goto out_free;
1725         }
1726
1727         if (sk_filter(other, skb) < 0) {
1728                 /* Toss the packet but do not return any error to the sender */
1729                 err = len;
1730                 goto out_free;
1731         }
1732
1733         sk_locked = 0;
1734         unix_state_lock(other);
1735 restart_locked:
1736         err = -EPERM;
1737         if (!unix_may_send(sk, other))
1738                 goto out_unlock;
1739
1740         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1741                 /*
1742                  *      Check with 1003.1g - what should
1743                  *      datagram error
1744                  */
1745                 unix_state_unlock(other);
1746                 sock_put(other);
1747
1748                 if (!sk_locked)
1749                         unix_state_lock(sk);
1750
1751                 err = 0;
1752                 if (unix_peer(sk) == other) {
1753                         unix_peer(sk) = NULL;
1754                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1755
1756                         unix_state_unlock(sk);
1757
1758                         unix_dgram_disconnected(sk, other);
1759                         sock_put(other);
1760                         err = -ECONNREFUSED;
1761                 } else {
1762                         unix_state_unlock(sk);
1763                 }
1764
1765                 other = NULL;
1766                 if (err)
1767                         goto out_free;
1768                 goto restart;
1769         }
1770
1771         err = -EPIPE;
1772         if (other->sk_shutdown & RCV_SHUTDOWN)
1773                 goto out_unlock;
1774
1775         if (sk->sk_type != SOCK_SEQPACKET) {
1776                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1777                 if (err)
1778                         goto out_unlock;
1779         }
1780
1781         /* other == sk && unix_peer(other) != sk if
1782          * - unix_peer(sk) == NULL, destination address bound to sk
1783          * - unix_peer(sk) == sk by time of get but disconnected before lock
1784          */
1785         if (other != sk &&
1786             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1787                 if (timeo) {
1788                         timeo = unix_wait_for_peer(other, timeo);
1789
1790                         err = sock_intr_errno(timeo);
1791                         if (signal_pending(current))
1792                                 goto out_free;
1793
1794                         goto restart;
1795                 }
1796
1797                 if (!sk_locked) {
1798                         unix_state_unlock(other);
1799                         unix_state_double_lock(sk, other);
1800                 }
1801
1802                 if (unix_peer(sk) != other ||
1803                     unix_dgram_peer_wake_me(sk, other)) {
1804                         err = -EAGAIN;
1805                         sk_locked = 1;
1806                         goto out_unlock;
1807                 }
1808
1809                 if (!sk_locked) {
1810                         sk_locked = 1;
1811                         goto restart_locked;
1812                 }
1813         }
1814
1815         if (unlikely(sk_locked))
1816                 unix_state_unlock(sk);
1817
1818         if (sock_flag(other, SOCK_RCVTSTAMP))
1819                 __net_timestamp(skb);
1820         maybe_add_creds(skb, sock, other);
1821         skb_queue_tail(&other->sk_receive_queue, skb);
1822         if (max_level > unix_sk(other)->recursion_level)
1823                 unix_sk(other)->recursion_level = max_level;
1824         unix_state_unlock(other);
1825         other->sk_data_ready(other);
1826         sock_put(other);
1827         scm_destroy(&scm);
1828         return len;
1829
1830 out_unlock:
1831         if (sk_locked)
1832                 unix_state_unlock(sk);
1833         unix_state_unlock(other);
1834 out_free:
1835         kfree_skb(skb);
1836 out:
1837         if (other)
1838                 sock_put(other);
1839         scm_destroy(&scm);
1840         return err;
1841 }
1842
1843 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1844  * bytes, and a minimun of a full page.
1845  */
1846 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1847
1848 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1849                                size_t len)
1850 {
1851         struct sock *sk = sock->sk;
1852         struct sock *other = NULL;
1853         int err, size;
1854         struct sk_buff *skb;
1855         int sent = 0;
1856         struct scm_cookie scm;
1857         bool fds_sent = false;
1858         int max_level;
1859         int data_len;
1860
1861         wait_for_unix_gc();
1862         err = scm_send(sock, msg, &scm, false);
1863         if (err < 0)
1864                 return err;
1865
1866         err = -EOPNOTSUPP;
1867         if (msg->msg_flags&MSG_OOB)
1868                 goto out_err;
1869
1870         if (msg->msg_namelen) {
1871                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1872                 goto out_err;
1873         } else {
1874                 err = -ENOTCONN;
1875                 other = unix_peer(sk);
1876                 if (!other)
1877                         goto out_err;
1878         }
1879
1880         if (sk->sk_shutdown & SEND_SHUTDOWN)
1881                 goto pipe_err;
1882
1883         while (sent < len) {
1884                 size = len - sent;
1885
1886                 /* Keep two messages in the pipe so it schedules better */
1887                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1888
1889                 /* allow fallback to order-0 allocations */
1890                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1891
1892                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1893
1894                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1895
1896                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1897                                            msg->msg_flags & MSG_DONTWAIT, &err,
1898                                            get_order(UNIX_SKB_FRAGS_SZ));
1899                 if (!skb)
1900                         goto out_err;
1901
1902                 /* Only send the fds in the first buffer */
1903                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1904                 if (err < 0) {
1905                         kfree_skb(skb);
1906                         goto out_err;
1907                 }
1908                 max_level = err + 1;
1909                 fds_sent = true;
1910
1911                 skb_put(skb, size - data_len);
1912                 skb->data_len = data_len;
1913                 skb->len = size;
1914                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1915                 if (err) {
1916                         kfree_skb(skb);
1917                         goto out_err;
1918                 }
1919
1920                 unix_state_lock(other);
1921
1922                 if (sock_flag(other, SOCK_DEAD) ||
1923                     (other->sk_shutdown & RCV_SHUTDOWN))
1924                         goto pipe_err_free;
1925
1926                 maybe_add_creds(skb, sock, other);
1927                 skb_queue_tail(&other->sk_receive_queue, skb);
1928                 if (max_level > unix_sk(other)->recursion_level)
1929                         unix_sk(other)->recursion_level = max_level;
1930                 unix_state_unlock(other);
1931                 other->sk_data_ready(other);
1932                 sent += size;
1933         }
1934
1935         scm_destroy(&scm);
1936
1937         return sent;
1938
1939 pipe_err_free:
1940         unix_state_unlock(other);
1941         kfree_skb(skb);
1942 pipe_err:
1943         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1944                 send_sig(SIGPIPE, current, 0);
1945         err = -EPIPE;
1946 out_err:
1947         scm_destroy(&scm);
1948         return sent ? : err;
1949 }
1950
1951 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1952                                     int offset, size_t size, int flags)
1953 {
1954         int err;
1955         bool send_sigpipe = false;
1956         bool init_scm = true;
1957         struct scm_cookie scm;
1958         struct sock *other, *sk = socket->sk;
1959         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1960
1961         if (flags & MSG_OOB)
1962                 return -EOPNOTSUPP;
1963
1964         other = unix_peer(sk);
1965         if (!other || sk->sk_state != TCP_ESTABLISHED)
1966                 return -ENOTCONN;
1967
1968         if (false) {
1969 alloc_skb:
1970                 unix_state_unlock(other);
1971                 mutex_unlock(&unix_sk(other)->iolock);
1972                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1973                                               &err, 0);
1974                 if (!newskb)
1975                         goto err;
1976         }
1977
1978         /* we must acquire iolock as we modify already present
1979          * skbs in the sk_receive_queue and mess with skb->len
1980          */
1981         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1982         if (err) {
1983                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1984                 goto err;
1985         }
1986
1987         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1988                 err = -EPIPE;
1989                 send_sigpipe = true;
1990                 goto err_unlock;
1991         }
1992
1993         unix_state_lock(other);
1994
1995         if (sock_flag(other, SOCK_DEAD) ||
1996             other->sk_shutdown & RCV_SHUTDOWN) {
1997                 err = -EPIPE;
1998                 send_sigpipe = true;
1999                 goto err_state_unlock;
2000         }
2001
2002         if (init_scm) {
2003                 err = maybe_init_creds(&scm, socket, other);
2004                 if (err)
2005                         goto err_state_unlock;
2006                 init_scm = false;
2007         }
2008
2009         skb = skb_peek_tail(&other->sk_receive_queue);
2010         if (tail && tail == skb) {
2011                 skb = newskb;
2012         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2013                 if (newskb) {
2014                         skb = newskb;
2015                 } else {
2016                         tail = skb;
2017                         goto alloc_skb;
2018                 }
2019         } else if (newskb) {
2020                 /* this is fast path, we don't necessarily need to
2021                  * call to kfree_skb even though with newskb == NULL
2022                  * this - does no harm
2023                  */
2024                 consume_skb(newskb);
2025                 newskb = NULL;
2026         }
2027
2028         if (skb_append_pagefrags(skb, page, offset, size)) {
2029                 tail = skb;
2030                 goto alloc_skb;
2031         }
2032
2033         skb->len += size;
2034         skb->data_len += size;
2035         skb->truesize += size;
2036         atomic_add(size, &sk->sk_wmem_alloc);
2037
2038         if (newskb) {
2039                 err = unix_scm_to_skb(&scm, skb, false);
2040                 if (err)
2041                         goto err_state_unlock;
2042                 spin_lock(&other->sk_receive_queue.lock);
2043                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2044                 spin_unlock(&other->sk_receive_queue.lock);
2045         }
2046
2047         unix_state_unlock(other);
2048         mutex_unlock(&unix_sk(other)->iolock);
2049
2050         other->sk_data_ready(other);
2051         scm_destroy(&scm);
2052         return size;
2053
2054 err_state_unlock:
2055         unix_state_unlock(other);
2056 err_unlock:
2057         mutex_unlock(&unix_sk(other)->iolock);
2058 err:
2059         kfree_skb(newskb);
2060         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2061                 send_sig(SIGPIPE, current, 0);
2062         if (!init_scm)
2063                 scm_destroy(&scm);
2064         return err;
2065 }
2066
2067 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2068                                   size_t len)
2069 {
2070         int err;
2071         struct sock *sk = sock->sk;
2072
2073         err = sock_error(sk);
2074         if (err)
2075                 return err;
2076
2077         if (sk->sk_state != TCP_ESTABLISHED)
2078                 return -ENOTCONN;
2079
2080         if (msg->msg_namelen)
2081                 msg->msg_namelen = 0;
2082
2083         return unix_dgram_sendmsg(sock, msg, len);
2084 }
2085
2086 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2087                                   size_t size, int flags)
2088 {
2089         struct sock *sk = sock->sk;
2090
2091         if (sk->sk_state != TCP_ESTABLISHED)
2092                 return -ENOTCONN;
2093
2094         return unix_dgram_recvmsg(sock, msg, size, flags);
2095 }
2096
2097 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2098 {
2099         struct unix_sock *u = unix_sk(sk);
2100
2101         if (u->addr) {
2102                 msg->msg_namelen = u->addr->len;
2103                 memcpy(msg->msg_name, u->addr->name, u->addr->len);
2104         }
2105 }
2106
2107 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2108                               size_t size, int flags)
2109 {
2110         struct scm_cookie scm;
2111         struct sock *sk = sock->sk;
2112         struct unix_sock *u = unix_sk(sk);
2113         int noblock = flags & MSG_DONTWAIT;
2114         struct sk_buff *skb;
2115         int err;
2116         int peeked, skip;
2117
2118         err = -EOPNOTSUPP;
2119         if (flags&MSG_OOB)
2120                 goto out;
2121
2122         err = mutex_lock_interruptible(&u->iolock);
2123         if (unlikely(err)) {
2124                 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2125                  * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2126                  */
2127                 err = noblock ? -EAGAIN : -ERESTARTSYS;
2128                 goto out;
2129         }
2130
2131         skip = sk_peek_offset(sk, flags);
2132
2133         skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
2134         if (!skb) {
2135                 unix_state_lock(sk);
2136                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2137                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2138                     (sk->sk_shutdown & RCV_SHUTDOWN))
2139                         err = 0;
2140                 unix_state_unlock(sk);
2141                 goto out_unlock;
2142         }
2143
2144         wake_up_interruptible_sync_poll(&u->peer_wait,
2145                                         POLLOUT | POLLWRNORM | POLLWRBAND);
2146
2147         if (msg->msg_name)
2148                 unix_copy_addr(msg, skb->sk);
2149
2150         if (size > skb->len - skip)
2151                 size = skb->len - skip;
2152         else if (size < skb->len - skip)
2153                 msg->msg_flags |= MSG_TRUNC;
2154
2155         err = skb_copy_datagram_msg(skb, skip, msg, size);
2156         if (err)
2157                 goto out_free;
2158
2159         if (sock_flag(sk, SOCK_RCVTSTAMP))
2160                 __sock_recv_timestamp(msg, sk, skb);
2161
2162         memset(&scm, 0, sizeof(scm));
2163
2164         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2165         unix_set_secdata(&scm, skb);
2166
2167         if (!(flags & MSG_PEEK)) {
2168                 if (UNIXCB(skb).fp)
2169                         unix_detach_fds(&scm, skb);
2170
2171                 sk_peek_offset_bwd(sk, skb->len);
2172         } else {
2173                 /* It is questionable: on PEEK we could:
2174                    - do not return fds - good, but too simple 8)
2175                    - return fds, and do not return them on read (old strategy,
2176                      apparently wrong)
2177                    - clone fds (I chose it for now, it is the most universal
2178                      solution)
2179
2180                    POSIX 1003.1g does not actually define this clearly
2181                    at all. POSIX 1003.1g doesn't define a lot of things
2182                    clearly however!
2183
2184                 */
2185
2186                 sk_peek_offset_fwd(sk, size);
2187
2188                 if (UNIXCB(skb).fp)
2189                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2190         }
2191         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2192
2193         scm_recv(sock, msg, &scm, flags);
2194
2195 out_free:
2196         skb_free_datagram(sk, skb);
2197 out_unlock:
2198         mutex_unlock(&u->iolock);
2199 out:
2200         return err;
2201 }
2202
2203 /*
2204  *      Sleep until more data has arrived. But check for races..
2205  */
2206 static long unix_stream_data_wait(struct sock *sk, long timeo,
2207                                   struct sk_buff *last, unsigned int last_len,
2208                                   bool freezable)
2209 {
2210         struct sk_buff *tail;
2211         DEFINE_WAIT(wait);
2212
2213         unix_state_lock(sk);
2214
2215         for (;;) {
2216                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2217
2218                 tail = skb_peek_tail(&sk->sk_receive_queue);
2219                 if (tail != last ||
2220                     (tail && tail->len != last_len) ||
2221                     sk->sk_err ||
2222                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2223                     signal_pending(current) ||
2224                     !timeo)
2225                         break;
2226
2227                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2228                 unix_state_unlock(sk);
2229                 if (freezable)
2230                         timeo = freezable_schedule_timeout(timeo);
2231                 else
2232                         timeo = schedule_timeout(timeo);
2233                 unix_state_lock(sk);
2234
2235                 if (sock_flag(sk, SOCK_DEAD))
2236                         break;
2237
2238                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2239         }
2240
2241         finish_wait(sk_sleep(sk), &wait);
2242         unix_state_unlock(sk);
2243         return timeo;
2244 }
2245
2246 static unsigned int unix_skb_len(const struct sk_buff *skb)
2247 {
2248         return skb->len - UNIXCB(skb).consumed;
2249 }
2250
2251 struct unix_stream_read_state {
2252         int (*recv_actor)(struct sk_buff *, int, int,
2253                           struct unix_stream_read_state *);
2254         struct socket *socket;
2255         struct msghdr *msg;
2256         struct pipe_inode_info *pipe;
2257         size_t size;
2258         int flags;
2259         unsigned int splice_flags;
2260 };
2261
2262 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2263                                     bool freezable)
2264 {
2265         struct scm_cookie scm;
2266         struct socket *sock = state->socket;
2267         struct sock *sk = sock->sk;
2268         struct unix_sock *u = unix_sk(sk);
2269         int copied = 0;
2270         int flags = state->flags;
2271         int noblock = flags & MSG_DONTWAIT;
2272         bool check_creds = false;
2273         int target;
2274         int err = 0;
2275         long timeo;
2276         int skip;
2277         size_t size = state->size;
2278         unsigned int last_len;
2279
2280         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2281                 err = -EINVAL;
2282                 goto out;
2283         }
2284
2285         if (unlikely(flags & MSG_OOB)) {
2286                 err = -EOPNOTSUPP;
2287                 goto out;
2288         }
2289
2290         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2291         timeo = sock_rcvtimeo(sk, noblock);
2292
2293         memset(&scm, 0, sizeof(scm));
2294
2295         /* Lock the socket to prevent queue disordering
2296          * while sleeps in memcpy_tomsg
2297          */
2298         mutex_lock(&u->iolock);
2299
2300         if (flags & MSG_PEEK)
2301                 skip = sk_peek_offset(sk, flags);
2302         else
2303                 skip = 0;
2304
2305         do {
2306                 int chunk;
2307                 bool drop_skb;
2308                 struct sk_buff *skb, *last;
2309
2310                 unix_state_lock(sk);
2311                 if (sock_flag(sk, SOCK_DEAD)) {
2312                         err = -ECONNRESET;
2313                         goto unlock;
2314                 }
2315                 last = skb = skb_peek(&sk->sk_receive_queue);
2316                 last_len = last ? last->len : 0;
2317 again:
2318                 if (skb == NULL) {
2319                         unix_sk(sk)->recursion_level = 0;
2320                         if (copied >= target)
2321                                 goto unlock;
2322
2323                         /*
2324                          *      POSIX 1003.1g mandates this order.
2325                          */
2326
2327                         err = sock_error(sk);
2328                         if (err)
2329                                 goto unlock;
2330                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2331                                 goto unlock;
2332
2333                         unix_state_unlock(sk);
2334                         if (!timeo) {
2335                                 err = -EAGAIN;
2336                                 break;
2337                         }
2338
2339                         mutex_unlock(&u->iolock);
2340
2341                         timeo = unix_stream_data_wait(sk, timeo, last,
2342                                                       last_len, freezable);
2343
2344                         if (signal_pending(current)) {
2345                                 err = sock_intr_errno(timeo);
2346                                 scm_destroy(&scm);
2347                                 goto out;
2348                         }
2349
2350                         mutex_lock(&u->iolock);
2351                         continue;
2352 unlock:
2353                         unix_state_unlock(sk);
2354                         break;
2355                 }
2356
2357                 while (skip >= unix_skb_len(skb)) {
2358                         skip -= unix_skb_len(skb);
2359                         last = skb;
2360                         last_len = skb->len;
2361                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2362                         if (!skb)
2363                                 goto again;
2364                 }
2365
2366                 unix_state_unlock(sk);
2367
2368                 if (check_creds) {
2369                         /* Never glue messages from different writers */
2370                         if (!unix_skb_scm_eq(skb, &scm))
2371                                 break;
2372                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2373                         /* Copy credentials */
2374                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2375                         unix_set_secdata(&scm, skb);
2376                         check_creds = true;
2377                 }
2378
2379                 /* Copy address just once */
2380                 if (state->msg && state->msg->msg_name) {
2381                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2382                                          state->msg->msg_name);
2383                         unix_copy_addr(state->msg, skb->sk);
2384                         sunaddr = NULL;
2385                 }
2386
2387                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2388                 skb_get(skb);
2389                 chunk = state->recv_actor(skb, skip, chunk, state);
2390                 drop_skb = !unix_skb_len(skb);
2391                 /* skb is only safe to use if !drop_skb */
2392                 consume_skb(skb);
2393                 if (chunk < 0) {
2394                         if (copied == 0)
2395                                 copied = -EFAULT;
2396                         break;
2397                 }
2398                 copied += chunk;
2399                 size -= chunk;
2400
2401                 if (drop_skb) {
2402                         /* the skb was touched by a concurrent reader;
2403                          * we should not expect anything from this skb
2404                          * anymore and assume it invalid - we can be
2405                          * sure it was dropped from the socket queue
2406                          *
2407                          * let's report a short read
2408                          */
2409                         err = 0;
2410                         break;
2411                 }
2412
2413                 /* Mark read part of skb as used */
2414                 if (!(flags & MSG_PEEK)) {
2415                         UNIXCB(skb).consumed += chunk;
2416
2417                         sk_peek_offset_bwd(sk, chunk);
2418
2419                         if (UNIXCB(skb).fp)
2420                                 unix_detach_fds(&scm, skb);
2421
2422                         if (unix_skb_len(skb))
2423                                 break;
2424
2425                         skb_unlink(skb, &sk->sk_receive_queue);
2426                         consume_skb(skb);
2427
2428                         if (scm.fp)
2429                                 break;
2430                 } else {
2431                         /* It is questionable, see note in unix_dgram_recvmsg.
2432                          */
2433                         if (UNIXCB(skb).fp)
2434                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2435
2436                         sk_peek_offset_fwd(sk, chunk);
2437
2438                         if (UNIXCB(skb).fp)
2439                                 break;
2440
2441                         skip = 0;
2442                         last = skb;
2443                         last_len = skb->len;
2444                         unix_state_lock(sk);
2445                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2446                         if (skb)
2447                                 goto again;
2448                         unix_state_unlock(sk);
2449                         break;
2450                 }
2451         } while (size);
2452
2453         mutex_unlock(&u->iolock);
2454         if (state->msg)
2455                 scm_recv(sock, state->msg, &scm, flags);
2456         else
2457                 scm_destroy(&scm);
2458 out:
2459         return copied ? : err;
2460 }
2461
2462 static int unix_stream_read_actor(struct sk_buff *skb,
2463                                   int skip, int chunk,
2464                                   struct unix_stream_read_state *state)
2465 {
2466         int ret;
2467
2468         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2469                                     state->msg, chunk);
2470         return ret ?: chunk;
2471 }
2472
2473 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2474                                size_t size, int flags)
2475 {
2476         struct unix_stream_read_state state = {
2477                 .recv_actor = unix_stream_read_actor,
2478                 .socket = sock,
2479                 .msg = msg,
2480                 .size = size,
2481                 .flags = flags
2482         };
2483
2484         return unix_stream_read_generic(&state, true);
2485 }
2486
2487 static ssize_t skb_unix_socket_splice(struct sock *sk,
2488                                       struct pipe_inode_info *pipe,
2489                                       struct splice_pipe_desc *spd)
2490 {
2491         int ret;
2492         struct unix_sock *u = unix_sk(sk);
2493
2494         mutex_unlock(&u->iolock);
2495         ret = splice_to_pipe(pipe, spd);
2496         mutex_lock(&u->iolock);
2497
2498         return ret;
2499 }
2500
2501 static int unix_stream_splice_actor(struct sk_buff *skb,
2502                                     int skip, int chunk,
2503                                     struct unix_stream_read_state *state)
2504 {
2505         return skb_splice_bits(skb, state->socket->sk,
2506                                UNIXCB(skb).consumed + skip,
2507                                state->pipe, chunk, state->splice_flags,
2508                                skb_unix_socket_splice);
2509 }
2510
2511 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2512                                        struct pipe_inode_info *pipe,
2513                                        size_t size, unsigned int flags)
2514 {
2515         struct unix_stream_read_state state = {
2516                 .recv_actor = unix_stream_splice_actor,
2517                 .socket = sock,
2518                 .pipe = pipe,
2519                 .size = size,
2520                 .splice_flags = flags,
2521         };
2522
2523         if (unlikely(*ppos))
2524                 return -ESPIPE;
2525
2526         if (sock->file->f_flags & O_NONBLOCK ||
2527             flags & SPLICE_F_NONBLOCK)
2528                 state.flags = MSG_DONTWAIT;
2529
2530         return unix_stream_read_generic(&state, false);
2531 }
2532
2533 static int unix_shutdown(struct socket *sock, int mode)
2534 {
2535         struct sock *sk = sock->sk;
2536         struct sock *other;
2537
2538         if (mode < SHUT_RD || mode > SHUT_RDWR)
2539                 return -EINVAL;
2540         /* This maps:
2541          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2542          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2543          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2544          */
2545         ++mode;
2546
2547         unix_state_lock(sk);
2548         sk->sk_shutdown |= mode;
2549         other = unix_peer(sk);
2550         if (other)
2551                 sock_hold(other);
2552         unix_state_unlock(sk);
2553         sk->sk_state_change(sk);
2554
2555         if (other &&
2556                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2557
2558                 int peer_mode = 0;
2559
2560                 if (mode&RCV_SHUTDOWN)
2561                         peer_mode |= SEND_SHUTDOWN;
2562                 if (mode&SEND_SHUTDOWN)
2563                         peer_mode |= RCV_SHUTDOWN;
2564                 unix_state_lock(other);
2565                 other->sk_shutdown |= peer_mode;
2566                 unix_state_unlock(other);
2567                 other->sk_state_change(other);
2568                 if (peer_mode == SHUTDOWN_MASK)
2569                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2570                 else if (peer_mode & RCV_SHUTDOWN)
2571                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2572         }
2573         if (other)
2574                 sock_put(other);
2575
2576         return 0;
2577 }
2578
2579 long unix_inq_len(struct sock *sk)
2580 {
2581         struct sk_buff *skb;
2582         long amount = 0;
2583
2584         if (sk->sk_state == TCP_LISTEN)
2585                 return -EINVAL;
2586
2587         spin_lock(&sk->sk_receive_queue.lock);
2588         if (sk->sk_type == SOCK_STREAM ||
2589             sk->sk_type == SOCK_SEQPACKET) {
2590                 skb_queue_walk(&sk->sk_receive_queue, skb)
2591                         amount += unix_skb_len(skb);
2592         } else {
2593                 skb = skb_peek(&sk->sk_receive_queue);
2594                 if (skb)
2595                         amount = skb->len;
2596         }
2597         spin_unlock(&sk->sk_receive_queue.lock);
2598
2599         return amount;
2600 }
2601 EXPORT_SYMBOL_GPL(unix_inq_len);
2602
2603 long unix_outq_len(struct sock *sk)
2604 {
2605         return sk_wmem_alloc_get(sk);
2606 }
2607 EXPORT_SYMBOL_GPL(unix_outq_len);
2608
2609 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2610 {
2611         struct sock *sk = sock->sk;
2612         long amount = 0;
2613         int err;
2614
2615         switch (cmd) {
2616         case SIOCOUTQ:
2617                 amount = unix_outq_len(sk);
2618                 err = put_user(amount, (int __user *)arg);
2619                 break;
2620         case SIOCINQ:
2621                 amount = unix_inq_len(sk);
2622                 if (amount < 0)
2623                         err = amount;
2624                 else
2625                         err = put_user(amount, (int __user *)arg);
2626                 break;
2627         default:
2628                 err = -ENOIOCTLCMD;
2629                 break;
2630         }
2631         return err;
2632 }
2633
2634 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2635 {
2636         struct sock *sk = sock->sk;
2637         unsigned int mask;
2638
2639         sock_poll_wait(file, sk_sleep(sk), wait);
2640         mask = 0;
2641
2642         /* exceptional events? */
2643         if (sk->sk_err)
2644                 mask |= POLLERR;
2645         if (sk->sk_shutdown == SHUTDOWN_MASK)
2646                 mask |= POLLHUP;
2647         if (sk->sk_shutdown & RCV_SHUTDOWN)
2648                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2649
2650         /* readable? */
2651         if (!skb_queue_empty(&sk->sk_receive_queue))
2652                 mask |= POLLIN | POLLRDNORM;
2653
2654         /* Connection-based need to check for termination and startup */
2655         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2656             sk->sk_state == TCP_CLOSE)
2657                 mask |= POLLHUP;
2658
2659         /*
2660          * we set writable also when the other side has shut down the
2661          * connection. This prevents stuck sockets.
2662          */
2663         if (unix_writable(sk))
2664                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2665
2666         return mask;
2667 }
2668
2669 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2670                                     poll_table *wait)
2671 {
2672         struct sock *sk = sock->sk, *other;
2673         unsigned int mask, writable;
2674
2675         sock_poll_wait(file, sk_sleep(sk), wait);
2676         mask = 0;
2677
2678         /* exceptional events? */
2679         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2680                 mask |= POLLERR |
2681                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2682
2683         if (sk->sk_shutdown & RCV_SHUTDOWN)
2684                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2685         if (sk->sk_shutdown == SHUTDOWN_MASK)
2686                 mask |= POLLHUP;
2687
2688         /* readable? */
2689         if (!skb_queue_empty(&sk->sk_receive_queue))
2690                 mask |= POLLIN | POLLRDNORM;
2691
2692         /* Connection-based need to check for termination and startup */
2693         if (sk->sk_type == SOCK_SEQPACKET) {
2694                 if (sk->sk_state == TCP_CLOSE)
2695                         mask |= POLLHUP;
2696                 /* connection hasn't started yet? */
2697                 if (sk->sk_state == TCP_SYN_SENT)
2698                         return mask;
2699         }
2700
2701         /* No write status requested, avoid expensive OUT tests. */
2702         if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2703                 return mask;
2704
2705         writable = unix_writable(sk);
2706         if (writable) {
2707                 unix_state_lock(sk);
2708
2709                 other = unix_peer(sk);
2710                 if (other && unix_peer(other) != sk &&
2711                     unix_recvq_full(other) &&
2712                     unix_dgram_peer_wake_me(sk, other))
2713                         writable = 0;
2714
2715                 unix_state_unlock(sk);
2716         }
2717
2718         if (writable)
2719                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2720         else
2721                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2722
2723         return mask;
2724 }
2725
2726 #ifdef CONFIG_PROC_FS
2727
2728 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2729
2730 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2731 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2732 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2733
2734 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2735 {
2736         unsigned long offset = get_offset(*pos);
2737         unsigned long bucket = get_bucket(*pos);
2738         struct sock *sk;
2739         unsigned long count = 0;
2740
2741         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2742                 if (sock_net(sk) != seq_file_net(seq))
2743                         continue;
2744                 if (++count == offset)
2745                         break;
2746         }
2747
2748         return sk;
2749 }
2750
2751 static struct sock *unix_next_socket(struct seq_file *seq,
2752                                      struct sock *sk,
2753                                      loff_t *pos)
2754 {
2755         unsigned long bucket;
2756
2757         while (sk > (struct sock *)SEQ_START_TOKEN) {
2758                 sk = sk_next(sk);
2759                 if (!sk)
2760                         goto next_bucket;
2761                 if (sock_net(sk) == seq_file_net(seq))
2762                         return sk;
2763         }
2764
2765         do {
2766                 sk = unix_from_bucket(seq, pos);
2767                 if (sk)
2768                         return sk;
2769
2770 next_bucket:
2771                 bucket = get_bucket(*pos) + 1;
2772                 *pos = set_bucket_offset(bucket, 1);
2773         } while (bucket < ARRAY_SIZE(unix_socket_table));
2774
2775         return NULL;
2776 }
2777
2778 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2779         __acquires(unix_table_lock)
2780 {
2781         spin_lock(&unix_table_lock);
2782
2783         if (!*pos)
2784                 return SEQ_START_TOKEN;
2785
2786         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2787                 return NULL;
2788
2789         return unix_next_socket(seq, NULL, pos);
2790 }
2791
2792 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2793 {
2794         ++*pos;
2795         return unix_next_socket(seq, v, pos);
2796 }
2797
2798 static void unix_seq_stop(struct seq_file *seq, void *v)
2799         __releases(unix_table_lock)
2800 {
2801         spin_unlock(&unix_table_lock);
2802 }
2803
2804 static int unix_seq_show(struct seq_file *seq, void *v)
2805 {
2806
2807         if (v == SEQ_START_TOKEN)
2808                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2809                          "Inode Path\n");
2810         else {
2811                 struct sock *s = v;
2812                 struct unix_sock *u = unix_sk(s);
2813                 unix_state_lock(s);
2814
2815                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2816                         s,
2817                         atomic_read(&s->sk_refcnt),
2818                         0,
2819                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2820                         s->sk_type,
2821                         s->sk_socket ?
2822                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2823                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2824                         sock_i_ino(s));
2825
2826                 if (u->addr) {
2827                         int i, len;
2828                         seq_putc(seq, ' ');
2829
2830                         i = 0;
2831                         len = u->addr->len - sizeof(short);
2832                         if (!UNIX_ABSTRACT(s))
2833                                 len--;
2834                         else {
2835                                 seq_putc(seq, '@');
2836                                 i++;
2837                         }
2838                         for ( ; i < len; i++)
2839                                 seq_putc(seq, u->addr->name->sun_path[i]);
2840                 }
2841                 unix_state_unlock(s);
2842                 seq_putc(seq, '\n');
2843         }
2844
2845         return 0;
2846 }
2847
2848 static const struct seq_operations unix_seq_ops = {
2849         .start  = unix_seq_start,
2850         .next   = unix_seq_next,
2851         .stop   = unix_seq_stop,
2852         .show   = unix_seq_show,
2853 };
2854
2855 static int unix_seq_open(struct inode *inode, struct file *file)
2856 {
2857         return seq_open_net(inode, file, &unix_seq_ops,
2858                             sizeof(struct seq_net_private));
2859 }
2860
2861 static const struct file_operations unix_seq_fops = {
2862         .owner          = THIS_MODULE,
2863         .open           = unix_seq_open,
2864         .read           = seq_read,
2865         .llseek         = seq_lseek,
2866         .release        = seq_release_net,
2867 };
2868
2869 #endif
2870
2871 static const struct net_proto_family unix_family_ops = {
2872         .family = PF_UNIX,
2873         .create = unix_create,
2874         .owner  = THIS_MODULE,
2875 };
2876
2877
2878 static int __net_init unix_net_init(struct net *net)
2879 {
2880         int error = -ENOMEM;
2881
2882         net->unx.sysctl_max_dgram_qlen = 10;
2883         if (unix_sysctl_register(net))
2884                 goto out;
2885
2886 #ifdef CONFIG_PROC_FS
2887         if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2888                 unix_sysctl_unregister(net);
2889                 goto out;
2890         }
2891 #endif
2892         error = 0;
2893 out:
2894         return error;
2895 }
2896
2897 static void __net_exit unix_net_exit(struct net *net)
2898 {
2899         unix_sysctl_unregister(net);
2900         remove_proc_entry("unix", net->proc_net);
2901 }
2902
2903 static struct pernet_operations unix_net_ops = {
2904         .init = unix_net_init,
2905         .exit = unix_net_exit,
2906 };
2907
2908 static int __init af_unix_init(void)
2909 {
2910         int rc = -1;
2911
2912         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2913
2914         rc = proto_register(&unix_proto, 1);
2915         if (rc != 0) {
2916                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2917                 goto out;
2918         }
2919
2920         sock_register(&unix_family_ops);
2921         register_pernet_subsys(&unix_net_ops);
2922 out:
2923         return rc;
2924 }
2925
2926 static void __exit af_unix_exit(void)
2927 {
2928         sock_unregister(PF_UNIX);
2929         proto_unregister(&unix_proto);
2930         unregister_pernet_subsys(&unix_net_ops);
2931 }
2932
2933 /* Earlier than device_initcall() so that other drivers invoking
2934    request_module() don't end up in a loop when modprobe tries
2935    to use a UNIX socket. But later than subsys_initcall() because
2936    we depend on stuff initialised there */
2937 fs_initcall(af_unix_init);
2938 module_exit(af_unix_exit);
2939
2940 MODULE_LICENSE("GPL");
2941 MODULE_ALIAS_NETPROTO(PF_UNIX);