net/unix/af_unix.c

   1 /*
   2  * NET4:        Implementation of BSD Unix domain sockets.
   3  *
   4  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
   5  *
   6  *              This program is free software; you can redistribute it and/or
   7  *              modify it under the terms of the GNU General Public License
   8  *              as published by the Free Software Foundation; either version
   9  *              2 of the License, or (at your option) any later version.
  10  *
  11  * Fixes:
  12  *              Linus Torvalds  :       Assorted bug cures.
  13  *              Niibe Yutaka    :       async I/O support.
  14  *              Carsten Paeth   :       PF_UNIX check, address fixes.
  15  *              Alan Cox        :       Limit size of allocated blocks.
  16  *              Alan Cox        :       Fixed the stupid socketpair bug.
  17  *              Alan Cox        :       BSD compatibility fine tuning.
  18  *              Alan Cox        :       Fixed a bug in connect when interrupted.
  19  *              Alan Cox        :       Sorted out a proper draft version of
  20  *                                      file descriptor passing hacked up from
  21  *                                      Mike Shaver's work.
  22  *              Marty Leisner   :       Fixes to fd passing
  23  *              Nick Nevin      :       recvmsg bugfix.
  24  *              Alan Cox        :       Started proper garbage collector
  25  *              Heiko EiBfeldt  :       Missing verify_area check
  26  *              Alan Cox        :       Started POSIXisms
  27  *              Andreas Schwab  :       Replace inode by dentry for proper
  28  *                                      reference counting
  29  *              Kirk Petersen   :       Made this a module
  30  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
  31  *                                      Lots of bug fixes.
  32  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
  33  *                                      by above two patches.
  34  *           Andrea Arcangeli   :       If possible we block in connect(2)
  35  *                                      if the max backlog of the listen socket
  36  *                                      is been reached. This won't break
  37  *                                      old apps and it will avoid huge amount
  38  *                                      of socks hashed (this for unix_gc()
  39  *                                      performances reasons).
  40  *                                      Security fix that limits the max
  41  *                                      number of socks to 2*max_files and
  42  *                                      the number of skb queueable in the
  43  *                                      dgram receiver.
  44  *              Artur Skawina   :       Hash function optimizations
  45  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
  46  *            Malcolm Beattie   :       Set peercred for socketpair
  47  *           Michal Ostrowski   :       Module initialization cleanup.
  48  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
  49  *                                      the core infrastructure is doing that
  50  *                                      for all net proto families now (2.5.69+)
  51  *
  52  *
  53  * Known differences from reference BSD that was tested:
  54  *
  55  *      [TO FIX]
  56  *      ECONNREFUSED is not returned from one end of a connected() socket to the
  57  *              other the moment one end closes.
  58  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
  59  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
  60  *      [NOT TO FIX]
  61  *      accept() returns a path name even if the connecting socket has closed
  62  *              in the meantime (BSD loses the path and gives up).
  63  *      accept() returns 0 length path for an unbound connector. BSD returns 16
  64  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
  65  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
  66  *      BSD af_unix apparently has connect forgetting to block properly.
  67  *              (need to check this with the POSIX spec in detail)
  68  *
  69  * Differences from 2.0.0-11-... (ANK)
  70  *      Bug fixes and improvements.
  71  *              - client shutdown killed server socket.
  72  *              - removed all useless cli/sti pairs.
  73  *
  74  *      Semantic changes/extensions.
  75  *              - generic control message passing.
  76  *              - SCM_CREDENTIALS control message.
  77  *              - "Abstract" (not FS based) socket bindings.
  78  *                Abstract names are sequences of bytes (not zero terminated)
  79  *                started by 0, so that this name space does not intersect
  80  *                with BSD names.
  81  */
  82
  83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
  84
  85 #include <linux/module.h>
  86 #include <linux/kernel.h>
  87 #include <linux/signal.h>
  88 #include <linux/sched.h>
  89 #include <linux/errno.h>
  90 #include <linux/string.h>
  91 #include <linux/stat.h>
  92 #include <linux/dcache.h>
  93 #include <linux/namei.h>
  94 #include <linux/socket.h>
  95 #include <linux/un.h>
  96 #include <linux/fcntl.h>
  97 #include <linux/termios.h>
  98 #include <linux/sockios.h>
  99 #include <linux/net.h>
 100 #include <linux/in.h>
 101 #include <linux/fs.h>
 102 #include <linux/slab.h>
 103 #include <asm/uaccess.h>
 104 #include <linux/skbuff.h>
 105 #include <linux/netdevice.h>
 106 #include <net/net_namespace.h>
 107 #include <net/sock.h>
 108 #include <net/tcp_states.h>
 109 #include <net/af_unix.h>
 110 #include <linux/proc_fs.h>
 111 #include <linux/seq_file.h>
 112 #include <net/scm.h>
 113 #include <linux/init.h>
 114 #include <linux/poll.h>
 115 #include <linux/rtnetlink.h>
 116 #include <linux/mount.h>
 117 #include <net/checksum.h>
 118 #include <linux/security.h>
 119 #include <linux/freezer.h>
 120
 121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
 122 EXPORT_SYMBOL_GPL(unix_socket_table);
 123 DEFINE_SPINLOCK(unix_table_lock);
 124 EXPORT_SYMBOL_GPL(unix_table_lock);
 125 static atomic_long_t unix_nr_socks;
 126
 127
 128 static struct hlist_head *unix_sockets_unbound(void *addr)
 129 {
 130         unsigned long hash = (unsigned long)addr;
 131
 132         hash ^= hash >> 16;
 133         hash ^= hash >> 8;
 134         hash %= UNIX_HASH_SIZE;
 135         return &unix_socket_table[UNIX_HASH_SIZE + hash];
 136 }
 137
 138 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
 139
 140 #ifdef CONFIG_SECURITY_NETWORK
 141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 142 {
 143         UNIXCB(skb).secid = scm->secid;
 144 }
 145
 146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 147 {
 148         scm->secid = UNIXCB(skb).secid;
 149 }
 150
 151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 152 {
 153         return (scm->secid == UNIXCB(skb).secid);
 154 }
 155 #else
 156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 157 { }
 158
 159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
 160 { }
 161
 162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
 163 {
 164         return true;
 165 }
 166 #endif /* CONFIG_SECURITY_NETWORK */
 167
 168 /*
 169  *  SMP locking strategy:
 170  *    hash table is protected with spinlock unix_table_lock
 171  *    each socket state is protected by separate spin lock.
 172  */
 173
 174 static inline unsigned int unix_hash_fold(__wsum n)
 175 {
 176         unsigned int hash = (__force unsigned int)csum_fold(n);
 177
 178         hash ^= hash>>8;
 179         return hash&(UNIX_HASH_SIZE-1);
 180 }
 181
 182 #define unix_peer(sk) (unix_sk(sk)->peer)
 183
 184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
 185 {
 186         return unix_peer(osk) == sk;
 187 }
 188
 189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
 190 {
 191         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
 192 }
 193
 194 static inline int unix_recvq_full(struct sock const *sk)
 195 {
 196         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
 197 }
 198
 199 struct sock *unix_peer_get(struct sock *s)
 200 {
 201         struct sock *peer;
 202
 203         unix_state_lock(s);
 204         peer = unix_peer(s);
 205         if (peer)
 206                 sock_hold(peer);
 207         unix_state_unlock(s);
 208         return peer;
 209 }
 210 EXPORT_SYMBOL_GPL(unix_peer_get);
 211
 212 static inline void unix_release_addr(struct unix_address *addr)
 213 {
 214         if (atomic_dec_and_test(&addr->refcnt))
 215                 kfree(addr);
 216 }
 217
 218 /*
 219  *      Check unix socket name:
 220  *              - should be not zero length.
 221  *              - if started by not zero, should be NULL terminated (FS object)
 222  *              - if started by zero, it is abstract name.
 223  */
 224
 225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
 226 {
 227         *hashp = 0;
 228
 229         if (len <= sizeof(short) || len > sizeof(*sunaddr))
 230                 return -EINVAL;
 231         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
 232                 return -EINVAL;
 233         if (sunaddr->sun_path[0]) {
 234                 /*
 235                  * This may look like an off by one error but it is a bit more
 236                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
 237                  * sun_path[108] doesn't as such exist.  However in kernel space
 238                  * we are guaranteed that it is a valid memory location in our
 239                  * kernel address buffer.
 240                  */
 241                 ((char *)sunaddr)[len] = 0;
 242                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
 243                 return len;
 244         }
 245
 246         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
 247         return len;
 248 }
 249
 250 static void __unix_remove_socket(struct sock *sk)
 251 {
 252         sk_del_node_init(sk);
 253 }
 254
 255 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
 256 {
 257         WARN_ON(!sk_unhashed(sk));
 258         sk_add_node(sk, list);
 259 }
 260
 261 static inline void unix_remove_socket(struct sock *sk)
 262 {
 263         spin_lock(&unix_table_lock);
 264         __unix_remove_socket(sk);
 265         spin_unlock(&unix_table_lock);
 266 }
 267
 268 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
 269 {
 270         spin_lock(&unix_table_lock);
 271         __unix_insert_socket(list, sk);
 272         spin_unlock(&unix_table_lock);
 273 }
 274
 275 static struct sock *__unix_find_socket_byname(struct net *net,
 276                                               struct sockaddr_un *sunname,
 277                                               int len, int type, unsigned int hash)
 278 {
 279         struct sock *s;
 280
 281         sk_for_each(s, &unix_socket_table[hash ^ type]) {
 282                 struct unix_sock *u = unix_sk(s);
 283
 284                 if (!net_eq(sock_net(s), net))
 285                         continue;
 286
 287                 if (u->addr->len == len &&
 288                     !memcmp(u->addr->name, sunname, len))
 289                         goto found;
 290         }
 291         s = NULL;
 292 found:
 293         return s;
 294 }
 295
 296 static inline struct sock *unix_find_socket_byname(struct net *net,
 297                                                    struct sockaddr_un *sunname,
 298                                                    int len, int type,
 299                                                    unsigned int hash)
 300 {
 301         struct sock *s;
 302
 303         spin_lock(&unix_table_lock);
 304         s = __unix_find_socket_byname(net, sunname, len, type, hash);
 305         if (s)
 306                 sock_hold(s);
 307         spin_unlock(&unix_table_lock);
 308         return s;
 309 }
 310
 311 static struct sock *unix_find_socket_byinode(struct inode *i)
 312 {
 313         struct sock *s;
 314
 315         spin_lock(&unix_table_lock);
 316         sk_for_each(s,
 317                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
 318                 struct dentry *dentry = unix_sk(s)->path.dentry;
 319
 320                 if (dentry && d_real_inode(dentry) == i) {
 321                         sock_hold(s);
 322                         goto found;
 323                 }
 324         }
 325         s = NULL;
 326 found:
 327         spin_unlock(&unix_table_lock);
 328         return s;
 329 }
 330
 331 /* Support code for asymmetrically connected dgram sockets
 332  *
 333  * If a datagram socket is connected to a socket not itself connected
 334  * to the first socket (eg, /dev/log), clients may only enqueue more
 335  * messages if the present receive queue of the server socket is not
 336  * "too large". This means there's a second writeability condition
 337  * poll and sendmsg need to test. The dgram recv code will do a wake
 338  * up on the peer_wait wait queue of a socket upon reception of a
 339  * datagram which needs to be propagated to sleeping would-be writers
 340  * since these might not have sent anything so far. This can't be
 341  * accomplished via poll_wait because the lifetime of the server
 342  * socket might be less than that of its clients if these break their
 343  * association with it or if the server socket is closed while clients
 344  * are still connected to it and there's no way to inform "a polling
 345  * implementation" that it should let go of a certain wait queue
 346  *
 347  * In order to propagate a wake up, a wait_queue_t of the client
 348  * socket is enqueued on the peer_wait queue of the server socket
 349  * whose wake function does a wake_up on the ordinary client socket
 350  * wait queue. This connection is established whenever a write (or
 351  * poll for write) hit the flow control condition and broken when the
 352  * association to the server socket is dissolved or after a wake up
 353  * was relayed.
 354  */
 355
 356 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
 357                                       void *key)
 358 {
 359         struct unix_sock *u;
 360         wait_queue_head_t *u_sleep;
 361
 362         u = container_of(q, struct unix_sock, peer_wake);
 363
 364         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
 365                             q);
 366         u->peer_wake.private = NULL;
 367
 368         /* relaying can only happen while the wq still exists */
 369         u_sleep = sk_sleep(&u->sk);
 370         if (u_sleep)
 371                 wake_up_interruptible_poll(u_sleep, key);
 372
 373         return 0;
 374 }
 375
 376 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
 377 {
 378         struct unix_sock *u, *u_other;
 379         int rc;
 380
 381         u = unix_sk(sk);
 382         u_other = unix_sk(other);
 383         rc = 0;
 384         spin_lock(&u_other->peer_wait.lock);
 385
 386         if (!u->peer_wake.private) {
 387                 u->peer_wake.private = other;
 388                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
 389
 390                 rc = 1;
 391         }
 392
 393         spin_unlock(&u_other->peer_wait.lock);
 394         return rc;
 395 }
 396
 397 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
 398                                             struct sock *other)
 399 {
 400         struct unix_sock *u, *u_other;
 401
 402         u = unix_sk(sk);
 403         u_other = unix_sk(other);
 404         spin_lock(&u_other->peer_wait.lock);
 405
 406         if (u->peer_wake.private == other) {
 407                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
 408                 u->peer_wake.private = NULL;
 409         }
 410
 411         spin_unlock(&u_other->peer_wait.lock);
 412 }
 413
 414 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
 415                                                    struct sock *other)
 416 {
 417         unix_dgram_peer_wake_disconnect(sk, other);
 418         wake_up_interruptible_poll(sk_sleep(sk),
 419                                    POLLOUT |
 420                                    POLLWRNORM |
 421                                    POLLWRBAND);
 422 }
 423
 424 /* preconditions:
 425  *      - unix_peer(sk) == other
 426  *      - association is stable
 427  */
 428 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
 429 {
 430         int connected;
 431
 432         connected = unix_dgram_peer_wake_connect(sk, other);
 433
 434         if (unix_recvq_full(other))
 435                 return 1;
 436
 437         if (connected)
 438                 unix_dgram_peer_wake_disconnect(sk, other);
 439
 440         return 0;
 441 }
 442
 443 static int unix_writable(const struct sock *sk)
 444 {
 445         return sk->sk_state != TCP_LISTEN &&
 446                (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
 447 }
 448
 449 static void unix_write_space(struct sock *sk)
 450 {
 451         struct socket_wq *wq;
 452
 453         rcu_read_lock();
 454         if (unix_writable(sk)) {
 455                 wq = rcu_dereference(sk->sk_wq);
 456                 if (wq_has_sleeper(wq))
 457                         wake_up_interruptible_sync_poll(&wq->wait,
 458                                 POLLOUT | POLLWRNORM | POLLWRBAND);
 459                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
 460         }
 461         rcu_read_unlock();
 462 }
 463
 464 /* When dgram socket disconnects (or changes its peer), we clear its receive
 465  * queue of packets arrived from previous peer. First, it allows to do
 466  * flow control based only on wmem_alloc; second, sk connected to peer
 467  * may receive messages only from that peer. */
 468 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
 469 {
 470         if (!skb_queue_empty(&sk->sk_receive_queue)) {
 471                 skb_queue_purge(&sk->sk_receive_queue);
 472                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
 473
 474                 /* If one link of bidirectional dgram pipe is disconnected,
 475                  * we signal error. Messages are lost. Do not make this,
 476                  * when peer was not connected to us.
 477                  */
 478                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
 479                         other->sk_err = ECONNRESET;
 480                         other->sk_error_report(other);
 481                 }
 482         }
 483 }
 484
 485 static void unix_sock_destructor(struct sock *sk)
 486 {
 487         struct unix_sock *u = unix_sk(sk);
 488
 489         skb_queue_purge(&sk->sk_receive_queue);
 490
 491         WARN_ON(atomic_read(&sk->sk_wmem_alloc));
 492         WARN_ON(!sk_unhashed(sk));
 493         WARN_ON(sk->sk_socket);
 494         if (!sock_flag(sk, SOCK_DEAD)) {
 495                 pr_info("Attempt to release alive unix socket: %p\n", sk);
 496                 return;
 497         }
 498
 499         if (u->addr)
 500                 unix_release_addr(u->addr);
 501
 502         atomic_long_dec(&unix_nr_socks);
 503         local_bh_disable();
 504         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
 505         local_bh_enable();
 506 #ifdef UNIX_REFCNT_DEBUG
 507         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
 508                 atomic_long_read(&unix_nr_socks));
 509 #endif
 510 }
 511
 512 static void unix_release_sock(struct sock *sk, int embrion)
 513 {
 514         struct unix_sock *u = unix_sk(sk);
 515         struct path path;
 516         struct sock *skpair;
 517         struct sk_buff *skb;
 518         int state;
 519
 520         unix_remove_socket(sk);
 521
 522         /* Clear state */
 523         unix_state_lock(sk);
 524         sock_orphan(sk);
 525         sk->sk_shutdown = SHUTDOWN_MASK;
 526         path         = u->path;
 527         u->path.dentry = NULL;
 528         u->path.mnt = NULL;
 529         state = sk->sk_state;
 530         sk->sk_state = TCP_CLOSE;
 531         unix_state_unlock(sk);
 532
 533         wake_up_interruptible_all(&u->peer_wait);
 534
 535         skpair = unix_peer(sk);
 536
 537         if (skpair != NULL) {
 538                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
 539                         unix_state_lock(skpair);
 540                         /* No more writes */
 541                         skpair->sk_shutdown = SHUTDOWN_MASK;
 542                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
 543                                 skpair->sk_err = ECONNRESET;
 544                         unix_state_unlock(skpair);
 545                         skpair->sk_state_change(skpair);
 546                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
 547                 }
 548
 549                 unix_dgram_peer_wake_disconnect(sk, skpair);
 550                 sock_put(skpair); /* It may now die */
 551                 unix_peer(sk) = NULL;
 552         }
 553
 554         /* Try to flush out this socket. Throw out buffers at least */
 555
 556         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
 557                 if (state == TCP_LISTEN)
 558                         unix_release_sock(skb->sk, 1);
 559                 /* passed fds are erased in the kfree_skb hook        */
 560                 UNIXCB(skb).consumed = skb->len;
 561                 kfree_skb(skb);
 562         }
 563
 564         if (path.dentry)
 565                 path_put(&path);
 566
 567         sock_put(sk);
 568
 569         /* ---- Socket is dead now and most probably destroyed ---- */
 570
 571         /*
 572          * Fixme: BSD difference: In BSD all sockets connected to us get
 573          *        ECONNRESET and we die on the spot. In Linux we behave
 574          *        like files and pipes do and wait for the last
 575          *        dereference.
 576          *
 577          * Can't we simply set sock->err?
 578          *
 579          *        What the above comment does talk about? --ANK(980817)
 580          */
 581
 582         if (unix_tot_inflight)
 583                 unix_gc();              /* Garbage collect fds */
 584 }
 585
 586 static void init_peercred(struct sock *sk)
 587 {
 588         put_pid(sk->sk_peer_pid);
 589         if (sk->sk_peer_cred)
 590                 put_cred(sk->sk_peer_cred);
 591         sk->sk_peer_pid  = get_pid(task_tgid(current));
 592         sk->sk_peer_cred = get_current_cred();
 593 }
 594
 595 static void copy_peercred(struct sock *sk, struct sock *peersk)
 596 {
 597         put_pid(sk->sk_peer_pid);
 598         if (sk->sk_peer_cred)
 599                 put_cred(sk->sk_peer_cred);
 600         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
 601         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
 602 }
 603
 604 static int unix_listen(struct socket *sock, int backlog)
 605 {
 606         int err;
 607         struct sock *sk = sock->sk;
 608         struct unix_sock *u = unix_sk(sk);
 609         struct pid *old_pid = NULL;
 610
 611         err = -EOPNOTSUPP;
 612         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
 613                 goto out;       /* Only stream/seqpacket sockets accept */
 614         err = -EINVAL;
 615         if (!u->addr)
 616                 goto out;       /* No listens on an unbound socket */
 617         unix_state_lock(sk);
 618         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
 619                 goto out_unlock;
 620         if (backlog > sk->sk_max_ack_backlog)
 621                 wake_up_interruptible_all(&u->peer_wait);
 622         sk->sk_max_ack_backlog  = backlog;
 623         sk->sk_state            = TCP_LISTEN;
 624         /* set credentials so connect can copy them */
 625         init_peercred(sk);
 626         err = 0;
 627
 628 out_unlock:
 629         unix_state_unlock(sk);
 630         put_pid(old_pid);
 631 out:
 632         return err;
 633 }
 634
 635 static int unix_release(struct socket *);
 636 static int unix_bind(struct socket *, struct sockaddr *, int);
 637 static int unix_stream_connect(struct socket *, struct sockaddr *,
 638                                int addr_len, int flags);
 639 static int unix_socketpair(struct socket *, struct socket *);
 640 static int unix_accept(struct socket *, struct socket *, int);
 641 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
 642 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
 643 static unsigned int unix_dgram_poll(struct file *, struct socket *,
 644                                     poll_table *);
 645 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
 646 static int unix_shutdown(struct socket *, int);
 647 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
 648 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
 649 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
 650                                     size_t size, int flags);
 651 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
 652                                        struct pipe_inode_info *, size_t size,
 653                                        unsigned int flags);
 654 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
 655 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
 656 static int unix_dgram_connect(struct socket *, struct sockaddr *,
 657                               int, int);
 658 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
 659 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
 660                                   int);
 661
 662 static int unix_set_peek_off(struct sock *sk, int val)
 663 {
 664         struct unix_sock *u = unix_sk(sk);
 665
 666         if (mutex_lock_interruptible(&u->iolock))
 667                 return -EINTR;
 668
 669         sk->sk_peek_off = val;
 670         mutex_unlock(&u->iolock);
 671
 672         return 0;
 673 }
 674
 675
 676 static const struct proto_ops unix_stream_ops = {
 677         .family =       PF_UNIX,
 678         .owner =        THIS_MODULE,
 679         .release =      unix_release,
 680         .bind =         unix_bind,
 681         .connect =      unix_stream_connect,
 682         .socketpair =   unix_socketpair,
 683         .accept =       unix_accept,
 684         .getname =      unix_getname,
 685         .poll =         unix_poll,
 686         .ioctl =        unix_ioctl,
 687         .listen =       unix_listen,
 688         .shutdown =     unix_shutdown,
 689         .setsockopt =   sock_no_setsockopt,
 690         .getsockopt =   sock_no_getsockopt,
 691         .sendmsg =      unix_stream_sendmsg,
 692         .recvmsg =      unix_stream_recvmsg,
 693         .mmap =         sock_no_mmap,
 694         .sendpage =     unix_stream_sendpage,
 695         .splice_read =  unix_stream_splice_read,
 696         .set_peek_off = unix_set_peek_off,
 697 };
 698
 699 static const struct proto_ops unix_dgram_ops = {
 700         .family =       PF_UNIX,
 701         .owner =        THIS_MODULE,
 702         .release =      unix_release,
 703         .bind =         unix_bind,
 704         .connect =      unix_dgram_connect,
 705         .socketpair =   unix_socketpair,
 706         .accept =       sock_no_accept,
 707         .getname =      unix_getname,
 708         .poll =         unix_dgram_poll,
 709         .ioctl =        unix_ioctl,
 710         .listen =       sock_no_listen,
 711         .shutdown =     unix_shutdown,
 712         .setsockopt =   sock_no_setsockopt,
 713         .getsockopt =   sock_no_getsockopt,
 714         .sendmsg =      unix_dgram_sendmsg,
 715         .recvmsg =      unix_dgram_recvmsg,
 716         .mmap =         sock_no_mmap,
 717         .sendpage =     sock_no_sendpage,
 718         .set_peek_off = unix_set_peek_off,
 719 };
 720
 721 static const struct proto_ops unix_seqpacket_ops = {
 722         .family =       PF_UNIX,
 723         .owner =        THIS_MODULE,
 724         .release =      unix_release,
 725         .bind =         unix_bind,
 726         .connect =      unix_stream_connect,
 727         .socketpair =   unix_socketpair,
 728         .accept =       unix_accept,
 729         .getname =      unix_getname,
 730         .poll =         unix_dgram_poll,
 731         .ioctl =        unix_ioctl,
 732         .listen =       unix_listen,
 733         .shutdown =     unix_shutdown,
 734         .setsockopt =   sock_no_setsockopt,
 735         .getsockopt =   sock_no_getsockopt,
 736         .sendmsg =      unix_seqpacket_sendmsg,
 737         .recvmsg =      unix_seqpacket_recvmsg,
 738         .mmap =         sock_no_mmap,
 739         .sendpage =     sock_no_sendpage,
 740         .set_peek_off = unix_set_peek_off,
 741 };
 742
 743 static struct proto unix_proto = {
 744         .name                   = "UNIX",
 745         .owner                  = THIS_MODULE,
 746         .obj_size               = sizeof(struct unix_sock),
 747 };
 748
 749 /*
 750  * AF_UNIX sockets do not interact with hardware, hence they
 751  * dont trigger interrupts - so it's safe for them to have
 752  * bh-unsafe locking for their sk_receive_queue.lock. Split off
 753  * this special lock-class by reinitializing the spinlock key:
 754  */
 755 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
 756
 757 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
 758 {
 759         struct sock *sk = NULL;
 760         struct unix_sock *u;
 761
 762         atomic_long_inc(&unix_nr_socks);
 763         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
 764                 goto out;
 765
 766         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
 767         if (!sk)
 768                 goto out;
 769
 770         sock_init_data(sock, sk);
 771         lockdep_set_class(&sk->sk_receive_queue.lock,
 772                                 &af_unix_sk_receive_queue_lock_key);
 773
 774         sk->sk_write_space      = unix_write_space;
 775         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
 776         sk->sk_destruct         = unix_sock_destructor;
 777         u         = unix_sk(sk);
 778         u->path.dentry = NULL;
 779         u->path.mnt = NULL;
 780         spin_lock_init(&u->lock);
 781         atomic_long_set(&u->inflight, 0);
 782         INIT_LIST_HEAD(&u->link);
 783         mutex_init(&u->iolock); /* single task reading lock */
 784         mutex_init(&u->bindlock); /* single task binding lock */
 785         init_waitqueue_head(&u->peer_wait);
 786         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
 787         unix_insert_socket(unix_sockets_unbound(sk), sk);
 788 out:
 789         if (sk == NULL)
 790                 atomic_long_dec(&unix_nr_socks);
 791         else {
 792                 local_bh_disable();
 793                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
 794                 local_bh_enable();
 795         }
 796         return sk;
 797 }
 798
 799 static int unix_create(struct net *net, struct socket *sock, int protocol,
 800                        int kern)
 801 {
 802         if (protocol && protocol != PF_UNIX)
 803                 return -EPROTONOSUPPORT;
 804
 805         sock->state = SS_UNCONNECTED;
 806
 807         switch (sock->type) {
 808         case SOCK_STREAM:
 809                 sock->ops = &unix_stream_ops;
 810                 break;
 811                 /*
 812                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
 813                  *      nothing uses it.
 814                  */
 815         case SOCK_RAW:
 816                 sock->type = SOCK_DGRAM;
 817         case SOCK_DGRAM:
 818                 sock->ops = &unix_dgram_ops;
 819                 break;
 820         case SOCK_SEQPACKET:
 821                 sock->ops = &unix_seqpacket_ops;
 822                 break;
 823         default:
 824                 return -ESOCKTNOSUPPORT;
 825         }
 826
 827         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
 828 }
 829
 830 static int unix_release(struct socket *sock)
 831 {
 832         struct sock *sk = sock->sk;
 833
 834         if (!sk)
 835                 return 0;
 836
 837         unix_release_sock(sk, 0);
 838         sock->sk = NULL;
 839
 840         return 0;
 841 }
 842
 843 static int unix_autobind(struct socket *sock)
 844 {
 845         struct sock *sk = sock->sk;
 846         struct net *net = sock_net(sk);
 847         struct unix_sock *u = unix_sk(sk);
 848         static u32 ordernum = 1;
 849         struct unix_address *addr;
 850         int err;
 851         unsigned int retries = 0;
 852
 853         err = mutex_lock_interruptible(&u->bindlock);
 854         if (err)
 855                 return err;
 856
 857         err = 0;
 858         if (u->addr)
 859                 goto out;
 860
 861         err = -ENOMEM;
 862         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
 863         if (!addr)
 864                 goto out;
 865
 866         addr->name->sun_family = AF_UNIX;
 867         atomic_set(&addr->refcnt, 1);
 868
 869 retry:
 870         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
 871         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
 872
 873         spin_lock(&unix_table_lock);
 874         ordernum = (ordernum+1)&0xFFFFF;
 875
 876         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
 877                                       addr->hash)) {
 878                 spin_unlock(&unix_table_lock);
 879                 /*
 880                  * __unix_find_socket_byname() may take long time if many names
 881                  * are already in use.
 882                  */
 883                 cond_resched();
 884                 /* Give up if all names seems to be in use. */
 885                 if (retries++ == 0xFFFFF) {
 886                         err = -ENOSPC;
 887                         kfree(addr);
 888                         goto out;
 889                 }
 890                 goto retry;
 891         }
 892         addr->hash ^= sk->sk_type;
 893
 894         __unix_remove_socket(sk);
 895         smp_store_release(&u->addr, addr);
 896         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
 897         spin_unlock(&unix_table_lock);
 898         err = 0;
 899
 900 out:    mutex_unlock(&u->bindlock);
 901         return err;
 902 }
 903
 904 static struct sock *unix_find_other(struct net *net,
 905                                     struct sockaddr_un *sunname, int len,
 906                                     int type, unsigned int hash, int *error)
 907 {
 908         struct sock *u;
 909         struct path path;
 910         int err = 0;
 911
 912         if (sunname->sun_path[0]) {
 913                 struct inode *inode;
 914                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
 915                 if (err)
 916                         goto fail;
 917                 inode = d_real_inode(path.dentry);
 918                 err = inode_permission(inode, MAY_WRITE);
 919                 if (err)
 920                         goto put_fail;
 921
 922                 err = -ECONNREFUSED;
 923                 if (!S_ISSOCK(inode->i_mode))
 924                         goto put_fail;
 925                 u = unix_find_socket_byinode(inode);
 926                 if (!u)
 927                         goto put_fail;
 928
 929                 if (u->sk_type == type)
 930                         touch_atime(&path);
 931
 932                 path_put(&path);
 933
 934                 err = -EPROTOTYPE;
 935                 if (u->sk_type != type) {
 936                         sock_put(u);
 937                         goto fail;
 938                 }
 939         } else {
 940                 err = -ECONNREFUSED;
 941                 u = unix_find_socket_byname(net, sunname, len, type, hash);
 942                 if (u) {
 943                         struct dentry *dentry;
 944                         dentry = unix_sk(u)->path.dentry;
 945                         if (dentry)
 946                                 touch_atime(&unix_sk(u)->path);
 947                 } else
 948                         goto fail;
 949         }
 950         return u;
 951
 952 put_fail:
 953         path_put(&path);
 954 fail:
 955         *error = err;
 956         return NULL;
 957 }
 958
 959 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
 960 {
 961         struct dentry *dentry;
 962         struct path path;
 963         int err = 0;
 964         /*
 965          * Get the parent directory, calculate the hash for last
 966          * component.
 967          */
 968         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
 969         err = PTR_ERR(dentry);
 970         if (IS_ERR(dentry))
 971                 return err;
 972
 973         /*
 974          * All right, let's create it.
 975          */
 976         err = security_path_mknod(&path, dentry, mode, 0);
 977         if (!err) {
 978                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
 979                 if (!err) {
 980                         res->mnt = mntget(path.mnt);
 981                         res->dentry = dget(dentry);
 982                 }
 983         }
 984         done_path_create(&path, dentry);
 985         return err;
 986 }
 987
 988 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 989 {
 990         struct sock *sk = sock->sk;
 991         struct net *net = sock_net(sk);
 992         struct unix_sock *u = unix_sk(sk);
 993         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
 994         char *sun_path = sunaddr->sun_path;
 995         int err;
 996         unsigned int hash;
 997         struct unix_address *addr;
 998         struct hlist_head *list;
 999         struct path path = { NULL, NULL };
1000
1001         err = -EINVAL;
1002         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003             sunaddr->sun_family != AF_UNIX)
1004                 goto out;
1005
1006         if (addr_len == sizeof(short)) {
1007                 err = unix_autobind(sock);
1008                 goto out;
1009         }
1010
1011         err = unix_mkname(sunaddr, addr_len, &hash);
1012         if (err < 0)
1013                 goto out;
1014         addr_len = err;
1015
1016         if (sun_path[0]) {
1017                 umode_t mode = S_IFSOCK |
1018                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1019                 err = unix_mknod(sun_path, mode, &path);
1020                 if (err) {
1021                         if (err == -EEXIST)
1022                                 err = -EADDRINUSE;
1023                         goto out;
1024                 }
1025         }
1026
1027         err = mutex_lock_interruptible(&u->bindlock);
1028         if (err)
1029                 goto out_put;
1030
1031         err = -EINVAL;
1032         if (u->addr)
1033                 goto out_up;
1034
1035         err = -ENOMEM;
1036         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1037         if (!addr)
1038                 goto out_up;
1039
1040         memcpy(addr->name, sunaddr, addr_len);
1041         addr->len = addr_len;
1042         addr->hash = hash ^ sk->sk_type;
1043         atomic_set(&addr->refcnt, 1);
1044
1045         if (sun_path[0]) {
1046                 addr->hash = UNIX_HASH_SIZE;
1047                 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048                 spin_lock(&unix_table_lock);
1049                 u->path = path;
1050                 list = &unix_socket_table[hash];
1051         } else {
1052                 spin_lock(&unix_table_lock);
1053                 err = -EADDRINUSE;
1054                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055                                               sk->sk_type, hash)) {
1056                         unix_release_addr(addr);
1057                         goto out_unlock;
1058                 }
1059
1060                 list = &unix_socket_table[addr->hash];
1061         }
1062
1063         err = 0;
1064         __unix_remove_socket(sk);
1065         smp_store_release(&u->addr, addr);
1066         __unix_insert_socket(list, sk);
1067
1068 out_unlock:
1069         spin_unlock(&unix_table_lock);
1070 out_up:
1071         mutex_unlock(&u->bindlock);
1072 out_put:
1073         if (err)
1074                 path_put(&path);
1075 out:
1076         return err;
1077 }
1078
1079 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1080 {
1081         if (unlikely(sk1 == sk2) || !sk2) {
1082                 unix_state_lock(sk1);
1083                 return;
1084         }
1085         if (sk1 < sk2) {
1086                 unix_state_lock(sk1);
1087                 unix_state_lock_nested(sk2);
1088         } else {
1089                 unix_state_lock(sk2);
1090                 unix_state_lock_nested(sk1);
1091         }
1092 }
1093
1094 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1095 {
1096         if (unlikely(sk1 == sk2) || !sk2) {
1097                 unix_state_unlock(sk1);
1098                 return;
1099         }
1100         unix_state_unlock(sk1);
1101         unix_state_unlock(sk2);
1102 }
1103
1104 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105                               int alen, int flags)
1106 {
1107         struct sock *sk = sock->sk;
1108         struct net *net = sock_net(sk);
1109         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1110         struct sock *other;
1111         unsigned int hash;
1112         int err;
1113
1114         err = -EINVAL;
1115         if (alen < offsetofend(struct sockaddr, sa_family))
1116                 goto out;
1117
1118         if (addr->sa_family != AF_UNSPEC) {
1119                 err = unix_mkname(sunaddr, alen, &hash);
1120                 if (err < 0)
1121                         goto out;
1122                 alen = err;
1123
1124                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1126                         goto out;
1127
1128 restart:
1129                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1130                 if (!other)
1131                         goto out;
1132
1133                 unix_state_double_lock(sk, other);
1134
1135                 /* Apparently VFS overslept socket death. Retry. */
1136                 if (sock_flag(other, SOCK_DEAD)) {
1137                         unix_state_double_unlock(sk, other);
1138                         sock_put(other);
1139                         goto restart;
1140                 }
1141
1142                 err = -EPERM;
1143                 if (!unix_may_send(sk, other))
1144                         goto out_unlock;
1145
1146                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1147                 if (err)
1148                         goto out_unlock;
1149
1150         } else {
1151                 /*
1152                  *      1003.1g breaking connected state with AF_UNSPEC
1153                  */
1154                 other = NULL;
1155                 unix_state_double_lock(sk, other);
1156         }
1157
1158         /*
1159          * If it was connected, reconnect.
1160          */
1161         if (unix_peer(sk)) {
1162                 struct sock *old_peer = unix_peer(sk);
1163                 unix_peer(sk) = other;
1164                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1165
1166                 unix_state_double_unlock(sk, other);
1167
1168                 if (other != old_peer)
1169                         unix_dgram_disconnected(sk, old_peer);
1170                 sock_put(old_peer);
1171         } else {
1172                 unix_peer(sk) = other;
1173                 unix_state_double_unlock(sk, other);
1174         }
1175         return 0;
1176
1177 out_unlock:
1178         unix_state_double_unlock(sk, other);
1179         sock_put(other);
1180 out:
1181         return err;
1182 }
1183
1184 static long unix_wait_for_peer(struct sock *other, long timeo)
1185 {
1186         struct unix_sock *u = unix_sk(other);
1187         int sched;
1188         DEFINE_WAIT(wait);
1189
1190         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1191
1192         sched = !sock_flag(other, SOCK_DEAD) &&
1193                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194                 unix_recvq_full(other);
1195
1196         unix_state_unlock(other);
1197
1198         if (sched)
1199                 timeo = schedule_timeout(timeo);
1200
1201         finish_wait(&u->peer_wait, &wait);
1202         return timeo;
1203 }
1204
1205 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206                                int addr_len, int flags)
1207 {
1208         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209         struct sock *sk = sock->sk;
1210         struct net *net = sock_net(sk);
1211         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212         struct sock *newsk = NULL;
1213         struct sock *other = NULL;
1214         struct sk_buff *skb = NULL;
1215         unsigned int hash;
1216         int st;
1217         int err;
1218         long timeo;
1219
1220         err = unix_mkname(sunaddr, addr_len, &hash);
1221         if (err < 0)
1222                 goto out;
1223         addr_len = err;
1224
1225         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226             (err = unix_autobind(sock)) != 0)
1227                 goto out;
1228
1229         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1230
1231         /* First of all allocate resources.
1232            If we will make it after state is locked,
1233            we will have to recheck all again in any case.
1234          */
1235
1236         err = -ENOMEM;
1237
1238         /* create new sock for complete connection */
1239         newsk = unix_create1(sock_net(sk), NULL, 0);
1240         if (newsk == NULL)
1241                 goto out;
1242
1243         /* Allocate skb for sending to listening sock */
1244         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1245         if (skb == NULL)
1246                 goto out;
1247
1248 restart:
1249         /*  Find listening sock. */
1250         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1251         if (!other)
1252                 goto out;
1253
1254         /* Latch state of peer */
1255         unix_state_lock(other);
1256
1257         /* Apparently VFS overslept socket death. Retry. */
1258         if (sock_flag(other, SOCK_DEAD)) {
1259                 unix_state_unlock(other);
1260                 sock_put(other);
1261                 goto restart;
1262         }
1263
1264         err = -ECONNREFUSED;
1265         if (other->sk_state != TCP_LISTEN)
1266                 goto out_unlock;
1267         if (other->sk_shutdown & RCV_SHUTDOWN)
1268                 goto out_unlock;
1269
1270         if (unix_recvq_full(other)) {
1271                 err = -EAGAIN;
1272                 if (!timeo)
1273                         goto out_unlock;
1274
1275                 timeo = unix_wait_for_peer(other, timeo);
1276
1277                 err = sock_intr_errno(timeo);
1278                 if (signal_pending(current))
1279                         goto out;
1280                 sock_put(other);
1281                 goto restart;
1282         }
1283
1284         /* Latch our state.
1285
1286            It is tricky place. We need to grab our state lock and cannot
1287            drop lock on peer. It is dangerous because deadlock is
1288            possible. Connect to self case and simultaneous
1289            attempt to connect are eliminated by checking socket
1290            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291            check this before attempt to grab lock.
1292
1293            Well, and we have to recheck the state after socket locked.
1294          */
1295         st = sk->sk_state;
1296
1297         switch (st) {
1298         case TCP_CLOSE:
1299                 /* This is ok... continue with connect */
1300                 break;
1301         case TCP_ESTABLISHED:
1302                 /* Socket is already connected */
1303                 err = -EISCONN;
1304                 goto out_unlock;
1305         default:
1306                 err = -EINVAL;
1307                 goto out_unlock;
1308         }
1309
1310         unix_state_lock_nested(sk);
1311
1312         if (sk->sk_state != st) {
1313                 unix_state_unlock(sk);
1314                 unix_state_unlock(other);
1315                 sock_put(other);
1316                 goto restart;
1317         }
1318
1319         err = security_unix_stream_connect(sk, other, newsk);
1320         if (err) {
1321                 unix_state_unlock(sk);
1322                 goto out_unlock;
1323         }
1324
1325         /* The way is open! Fastly set all the necessary fields... */
1326
1327         sock_hold(sk);
1328         unix_peer(newsk)        = sk;
1329         newsk->sk_state         = TCP_ESTABLISHED;
1330         newsk->sk_type          = sk->sk_type;
1331         init_peercred(newsk);
1332         newu = unix_sk(newsk);
1333         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334         otheru = unix_sk(other);
1335
1336         /* copy address information from listening to new sock
1337          *
1338          * The contents of *(otheru->addr) and otheru->path
1339          * are seen fully set up here, since we have found
1340          * otheru in hash under unix_table_lock.  Insertion
1341          * into the hash chain we'd found it in had been done
1342          * in an earlier critical area protected by unix_table_lock,
1343          * the same one where we'd set *(otheru->addr) contents,
1344          * as well as otheru->path and otheru->addr itself.
1345          *
1346          * Using smp_store_release() here to set newu->addr
1347          * is enough to make those stores, as well as stores
1348          * to newu->path visible to anyone who gets newu->addr
1349          * by smp_load_acquire().  IOW, the same warranties
1350          * as for unix_sock instances bound in unix_bind() or
1351          * in unix_autobind().
1352          */
1353         if (otheru->path.dentry) {
1354                 path_get(&otheru->path);
1355                 newu->path = otheru->path;
1356         }
1357         atomic_inc(&otheru->addr->refcnt);
1358         smp_store_release(&newu->addr, otheru->addr);
1359
1360         /* Set credentials */
1361         copy_peercred(sk, other);
1362
1363         sock->state     = SS_CONNECTED;
1364         sk->sk_state    = TCP_ESTABLISHED;
1365         sock_hold(newsk);
1366
1367         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1368         unix_peer(sk)   = newsk;
1369
1370         unix_state_unlock(sk);
1371
1372         /* take ten and and send info to listening sock */
1373         spin_lock(&other->sk_receive_queue.lock);
1374         __skb_queue_tail(&other->sk_receive_queue, skb);
1375         spin_unlock(&other->sk_receive_queue.lock);
1376         unix_state_unlock(other);
1377         other->sk_data_ready(other);
1378         sock_put(other);
1379         return 0;
1380
1381 out_unlock:
1382         if (other)
1383                 unix_state_unlock(other);
1384
1385 out:
1386         kfree_skb(skb);
1387         if (newsk)
1388                 unix_release_sock(newsk, 0);
1389         if (other)
1390                 sock_put(other);
1391         return err;
1392 }
1393
1394 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1395 {
1396         struct sock *ska = socka->sk, *skb = sockb->sk;
1397
1398         /* Join our sockets back to back */
1399         sock_hold(ska);
1400         sock_hold(skb);
1401         unix_peer(ska) = skb;
1402         unix_peer(skb) = ska;
1403         init_peercred(ska);
1404         init_peercred(skb);
1405
1406         if (ska->sk_type != SOCK_DGRAM) {
1407                 ska->sk_state = TCP_ESTABLISHED;
1408                 skb->sk_state = TCP_ESTABLISHED;
1409                 socka->state  = SS_CONNECTED;
1410                 sockb->state  = SS_CONNECTED;
1411         }
1412         return 0;
1413 }
1414
1415 static void unix_sock_inherit_flags(const struct socket *old,
1416                                     struct socket *new)
1417 {
1418         if (test_bit(SOCK_PASSCRED, &old->flags))
1419                 set_bit(SOCK_PASSCRED, &new->flags);
1420         if (test_bit(SOCK_PASSSEC, &old->flags))
1421                 set_bit(SOCK_PASSSEC, &new->flags);
1422 }
1423
1424 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1425 {
1426         struct sock *sk = sock->sk;
1427         struct sock *tsk;
1428         struct sk_buff *skb;
1429         int err;
1430
1431         err = -EOPNOTSUPP;
1432         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1433                 goto out;
1434
1435         err = -EINVAL;
1436         if (sk->sk_state != TCP_LISTEN)
1437                 goto out;
1438
1439         /* If socket state is TCP_LISTEN it cannot change (for now...),
1440          * so that no locks are necessary.
1441          */
1442
1443         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1444         if (!skb) {
1445                 /* This means receive shutdown. */
1446                 if (err == 0)
1447                         err = -EINVAL;
1448                 goto out;
1449         }
1450
1451         tsk = skb->sk;
1452         skb_free_datagram(sk, skb);
1453         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1454
1455         /* attach accepted sock to socket */
1456         unix_state_lock(tsk);
1457         newsock->state = SS_CONNECTED;
1458         unix_sock_inherit_flags(sock, newsock);
1459         sock_graft(tsk, newsock);
1460         unix_state_unlock(tsk);
1461         return 0;
1462
1463 out:
1464         return err;
1465 }
1466
1467
1468 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1469 {
1470         struct sock *sk = sock->sk;
1471         struct unix_address *addr;
1472         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1473         int err = 0;
1474
1475         if (peer) {
1476                 sk = unix_peer_get(sk);
1477
1478                 err = -ENOTCONN;
1479                 if (!sk)
1480                         goto out;
1481                 err = 0;
1482         } else {
1483                 sock_hold(sk);
1484         }
1485
1486         addr = smp_load_acquire(&unix_sk(sk)->addr);
1487         if (!addr) {
1488                 sunaddr->sun_family = AF_UNIX;
1489                 sunaddr->sun_path[0] = 0;
1490                 *uaddr_len = sizeof(short);
1491         } else {
1492                 *uaddr_len = addr->len;
1493                 memcpy(sunaddr, addr->name, *uaddr_len);
1494         }
1495         sock_put(sk);
1496 out:
1497         return err;
1498 }
1499
1500 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1501 {
1502         int i;
1503
1504         scm->fp = UNIXCB(skb).fp;
1505         UNIXCB(skb).fp = NULL;
1506
1507         for (i = scm->fp->count-1; i >= 0; i--)
1508                 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1509 }
1510
1511 static void unix_destruct_scm(struct sk_buff *skb)
1512 {
1513         struct scm_cookie scm;
1514         memset(&scm, 0, sizeof(scm));
1515         scm.pid  = UNIXCB(skb).pid;
1516         if (UNIXCB(skb).fp)
1517                 unix_detach_fds(&scm, skb);
1518
1519         /* Alas, it calls VFS */
1520         /* So fscking what? fput() had been SMP-safe since the last Summer */
1521         scm_destroy(&scm);
1522         sock_wfree(skb);
1523 }
1524
1525 /*
1526  * The "user->unix_inflight" variable is protected by the garbage
1527  * collection lock, and we just read it locklessly here. If you go
1528  * over the limit, there might be a tiny race in actually noticing
1529  * it across threads. Tough.
1530  */
1531 static inline bool too_many_unix_fds(struct task_struct *p)
1532 {
1533         struct user_struct *user = current_user();
1534
1535         if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1536                 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1537         return false;
1538 }
1539
1540 #define MAX_RECURSION_LEVEL 4
1541
1542 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1543 {
1544         int i;
1545         unsigned char max_level = 0;
1546
1547         if (too_many_unix_fds(current))
1548                 return -ETOOMANYREFS;
1549
1550         for (i = scm->fp->count - 1; i >= 0; i--) {
1551                 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1552
1553                 if (sk)
1554                         max_level = max(max_level,
1555                                         unix_sk(sk)->recursion_level);
1556         }
1557         if (unlikely(max_level > MAX_RECURSION_LEVEL))
1558                 return -ETOOMANYREFS;
1559
1560         /*
1561          * Need to duplicate file references for the sake of garbage
1562          * collection.  Otherwise a socket in the fps might become a
1563          * candidate for GC while the skb is not yet queued.
1564          */
1565         UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1566         if (!UNIXCB(skb).fp)
1567                 return -ENOMEM;
1568
1569         for (i = scm->fp->count - 1; i >= 0; i--)
1570                 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1571         return max_level;
1572 }
1573
1574 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1575 {
1576         int err = 0;
1577
1578         UNIXCB(skb).pid  = get_pid(scm->pid);
1579         UNIXCB(skb).uid = scm->creds.uid;
1580         UNIXCB(skb).gid = scm->creds.gid;
1581         UNIXCB(skb).fp = NULL;
1582         unix_get_secdata(scm, skb);
1583         if (scm->fp && send_fds)
1584                 err = unix_attach_fds(scm, skb);
1585
1586         skb->destructor = unix_destruct_scm;
1587         return err;
1588 }
1589
1590 static bool unix_passcred_enabled(const struct socket *sock,
1591                                   const struct sock *other)
1592 {
1593         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1594                !other->sk_socket ||
1595                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1596 }
1597
1598 /*
1599  * Some apps rely on write() giving SCM_CREDENTIALS
1600  * We include credentials if source or destination socket
1601  * asserted SOCK_PASSCRED.
1602  */
1603 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1604                             const struct sock *other)
1605 {
1606         if (UNIXCB(skb).pid)
1607                 return;
1608         if (unix_passcred_enabled(sock, other)) {
1609                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1610                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1611         }
1612 }
1613
1614 static int maybe_init_creds(struct scm_cookie *scm,
1615                             struct socket *socket,
1616                             const struct sock *other)
1617 {
1618         int err;
1619         struct msghdr msg = { .msg_controllen = 0 };
1620
1621         err = scm_send(socket, &msg, scm, false);
1622         if (err)
1623                 return err;
1624
1625         if (unix_passcred_enabled(socket, other)) {
1626                 scm->pid = get_pid(task_tgid(current));
1627                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1628         }
1629         return err;
1630 }
1631
1632 static bool unix_skb_scm_eq(struct sk_buff *skb,
1633                             struct scm_cookie *scm)
1634 {
1635         const struct unix_skb_parms *u = &UNIXCB(skb);
1636
1637         return u->pid == scm->pid &&
1638                uid_eq(u->uid, scm->creds.uid) &&
1639                gid_eq(u->gid, scm->creds.gid) &&
1640                unix_secdata_eq(scm, skb);
1641 }
1642
1643 /*
1644  *      Send AF_UNIX data.
1645  */
1646
1647 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1648                               size_t len)
1649 {
1650         struct sock *sk = sock->sk;
1651         struct net *net = sock_net(sk);
1652         struct unix_sock *u = unix_sk(sk);
1653         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1654         struct sock *other = NULL;
1655         int namelen = 0; /* fake GCC */
1656         int err;
1657         unsigned int hash;
1658         struct sk_buff *skb;
1659         long timeo;
1660         struct scm_cookie scm;
1661         int max_level;
1662         int data_len = 0;
1663         int sk_locked;
1664
1665         wait_for_unix_gc();
1666         err = scm_send(sock, msg, &scm, false);
1667         if (err < 0)
1668                 return err;
1669
1670         err = -EOPNOTSUPP;
1671         if (msg->msg_flags&MSG_OOB)
1672                 goto out;
1673
1674         if (msg->msg_namelen) {
1675                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1676                 if (err < 0)
1677                         goto out;
1678                 namelen = err;
1679         } else {
1680                 sunaddr = NULL;
1681                 err = -ENOTCONN;
1682                 other = unix_peer_get(sk);
1683                 if (!other)
1684                         goto out;
1685         }
1686
1687         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1688             && (err = unix_autobind(sock)) != 0)
1689                 goto out;
1690
1691         err = -EMSGSIZE;
1692         if (len > sk->sk_sndbuf - 32)
1693                 goto out;
1694
1695         if (len > SKB_MAX_ALLOC) {
1696                 data_len = min_t(size_t,
1697                                  len - SKB_MAX_ALLOC,
1698                                  MAX_SKB_FRAGS * PAGE_SIZE);
1699                 data_len = PAGE_ALIGN(data_len);
1700
1701                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1702         }
1703
1704         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1705                                    msg->msg_flags & MSG_DONTWAIT, &err,
1706                                    PAGE_ALLOC_COSTLY_ORDER);
1707         if (skb == NULL)
1708                 goto out;
1709
1710         err = unix_scm_to_skb(&scm, skb, true);
1711         if (err < 0)
1712                 goto out_free;
1713         max_level = err + 1;
1714
1715         skb_put(skb, len - data_len);
1716         skb->data_len = data_len;
1717         skb->len = len;
1718         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1719         if (err)
1720                 goto out_free;
1721
1722         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1723
1724 restart:
1725         if (!other) {
1726                 err = -ECONNRESET;
1727                 if (sunaddr == NULL)
1728                         goto out_free;
1729
1730                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1731                                         hash, &err);
1732                 if (other == NULL)
1733                         goto out_free;
1734         }
1735
1736         if (sk_filter(other, skb) < 0) {
1737                 /* Toss the packet but do not return any error to the sender */
1738                 err = len;
1739                 goto out_free;
1740         }
1741
1742         sk_locked = 0;
1743         unix_state_lock(other);
1744 restart_locked:
1745         err = -EPERM;
1746         if (!unix_may_send(sk, other))
1747                 goto out_unlock;
1748
1749         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1750                 /*
1751                  *      Check with 1003.1g - what should
1752                  *      datagram error
1753                  */
1754                 unix_state_unlock(other);
1755                 sock_put(other);
1756
1757                 if (!sk_locked)
1758                         unix_state_lock(sk);
1759
1760                 err = 0;
1761                 if (unix_peer(sk) == other) {
1762                         unix_peer(sk) = NULL;
1763                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1764
1765                         unix_state_unlock(sk);
1766
1767                         unix_dgram_disconnected(sk, other);
1768                         sock_put(other);
1769                         err = -ECONNREFUSED;
1770                 } else {
1771                         unix_state_unlock(sk);
1772                 }
1773
1774                 other = NULL;
1775                 if (err)
1776                         goto out_free;
1777                 goto restart;
1778         }
1779
1780         err = -EPIPE;
1781         if (other->sk_shutdown & RCV_SHUTDOWN)
1782                 goto out_unlock;
1783
1784         if (sk->sk_type != SOCK_SEQPACKET) {
1785                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1786                 if (err)
1787                         goto out_unlock;
1788         }
1789
1790         /* other == sk && unix_peer(other) != sk if
1791          * - unix_peer(sk) == NULL, destination address bound to sk
1792          * - unix_peer(sk) == sk by time of get but disconnected before lock
1793          */
1794         if (other != sk &&
1795             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1796                 if (timeo) {
1797                         timeo = unix_wait_for_peer(other, timeo);
1798
1799                         err = sock_intr_errno(timeo);
1800                         if (signal_pending(current))
1801                                 goto out_free;
1802
1803                         goto restart;
1804                 }
1805
1806                 if (!sk_locked) {
1807                         unix_state_unlock(other);
1808                         unix_state_double_lock(sk, other);
1809                 }
1810
1811                 if (unix_peer(sk) != other ||
1812                     unix_dgram_peer_wake_me(sk, other)) {
1813                         err = -EAGAIN;
1814                         sk_locked = 1;
1815                         goto out_unlock;
1816                 }
1817
1818                 if (!sk_locked) {
1819                         sk_locked = 1;
1820                         goto restart_locked;
1821                 }
1822         }
1823
1824         if (unlikely(sk_locked))
1825                 unix_state_unlock(sk);
1826
1827         if (sock_flag(other, SOCK_RCVTSTAMP))
1828                 __net_timestamp(skb);
1829         maybe_add_creds(skb, sock, other);
1830         skb_queue_tail(&other->sk_receive_queue, skb);
1831         if (max_level > unix_sk(other)->recursion_level)
1832                 unix_sk(other)->recursion_level = max_level;
1833         unix_state_unlock(other);
1834         other->sk_data_ready(other);
1835         sock_put(other);
1836         scm_destroy(&scm);
1837         return len;
1838
1839 out_unlock:
1840         if (sk_locked)
1841                 unix_state_unlock(sk);
1842         unix_state_unlock(other);
1843 out_free:
1844         kfree_skb(skb);
1845 out:
1846         if (other)
1847                 sock_put(other);
1848         scm_destroy(&scm);
1849         return err;
1850 }
1851
1852 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1853  * bytes, and a minimun of a full page.
1854  */
1855 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1856
1857 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1858                                size_t len)
1859 {
1860         struct sock *sk = sock->sk;
1861         struct sock *other = NULL;
1862         int err, size;
1863         struct sk_buff *skb;
1864         int sent = 0;
1865         struct scm_cookie scm;
1866         bool fds_sent = false;
1867         int max_level;
1868         int data_len;
1869
1870         wait_for_unix_gc();
1871         err = scm_send(sock, msg, &scm, false);
1872         if (err < 0)
1873                 return err;
1874
1875         err = -EOPNOTSUPP;
1876         if (msg->msg_flags&MSG_OOB)
1877                 goto out_err;
1878
1879         if (msg->msg_namelen) {
1880                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1881                 goto out_err;
1882         } else {
1883                 err = -ENOTCONN;
1884                 other = unix_peer(sk);
1885                 if (!other)
1886                         goto out_err;
1887         }
1888
1889         if (sk->sk_shutdown & SEND_SHUTDOWN)
1890                 goto pipe_err;
1891
1892         while (sent < len) {
1893                 size = len - sent;
1894
1895                 /* Keep two messages in the pipe so it schedules better */
1896                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1897
1898                 /* allow fallback to order-0 allocations */
1899                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1900
1901                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1902
1903                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1904
1905                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1906                                            msg->msg_flags & MSG_DONTWAIT, &err,
1907                                            get_order(UNIX_SKB_FRAGS_SZ));
1908                 if (!skb)
1909                         goto out_err;
1910
1911                 /* Only send the fds in the first buffer */
1912                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1913                 if (err < 0) {
1914                         kfree_skb(skb);
1915                         goto out_err;
1916                 }
1917                 max_level = err + 1;
1918                 fds_sent = true;
1919
1920                 skb_put(skb, size - data_len);
1921                 skb->data_len = data_len;
1922                 skb->len = size;
1923                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1924                 if (err) {
1925                         kfree_skb(skb);
1926                         goto out_err;
1927                 }
1928
1929                 unix_state_lock(other);
1930
1931                 if (sock_flag(other, SOCK_DEAD) ||
1932                     (other->sk_shutdown & RCV_SHUTDOWN))
1933                         goto pipe_err_free;
1934
1935                 maybe_add_creds(skb, sock, other);
1936                 skb_queue_tail(&other->sk_receive_queue, skb);
1937                 if (max_level > unix_sk(other)->recursion_level)
1938                         unix_sk(other)->recursion_level = max_level;
1939                 unix_state_unlock(other);
1940                 other->sk_data_ready(other);
1941                 sent += size;
1942         }
1943
1944         scm_destroy(&scm);
1945
1946         return sent;
1947
1948 pipe_err_free:
1949         unix_state_unlock(other);
1950         kfree_skb(skb);
1951 pipe_err:
1952         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1953                 send_sig(SIGPIPE, current, 0);
1954         err = -EPIPE;
1955 out_err:
1956         scm_destroy(&scm);
1957         return sent ? : err;
1958 }
1959
1960 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1961                                     int offset, size_t size, int flags)
1962 {
1963         int err;
1964         bool send_sigpipe = false;
1965         bool init_scm = true;
1966         struct scm_cookie scm;
1967         struct sock *other, *sk = socket->sk;
1968         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1969
1970         if (flags & MSG_OOB)
1971                 return -EOPNOTSUPP;
1972
1973         other = unix_peer(sk);
1974         if (!other || sk->sk_state != TCP_ESTABLISHED)
1975                 return -ENOTCONN;
1976
1977         if (false) {
1978 alloc_skb:
1979                 unix_state_unlock(other);
1980                 mutex_unlock(&unix_sk(other)->iolock);
1981                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1982                                               &err, 0);
1983                 if (!newskb)
1984                         goto err;
1985         }
1986
1987         /* we must acquire iolock as we modify already present
1988          * skbs in the sk_receive_queue and mess with skb->len
1989          */
1990         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1991         if (err) {
1992                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1993                 goto err;
1994         }
1995
1996         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1997                 err = -EPIPE;
1998                 send_sigpipe = true;
1999                 goto err_unlock;
2000         }
2001
2002         unix_state_lock(other);
2003
2004         if (sock_flag(other, SOCK_DEAD) ||
2005             other->sk_shutdown & RCV_SHUTDOWN) {
2006                 err = -EPIPE;
2007                 send_sigpipe = true;
2008                 goto err_state_unlock;
2009         }
2010
2011         if (init_scm) {
2012                 err = maybe_init_creds(&scm, socket, other);
2013                 if (err)
2014                         goto err_state_unlock;
2015                 init_scm = false;
2016         }
2017
2018         skb = skb_peek_tail(&other->sk_receive_queue);
2019         if (tail && tail == skb) {
2020                 skb = newskb;
2021         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2022                 if (newskb) {
2023                         skb = newskb;
2024                 } else {
2025                         tail = skb;
2026                         goto alloc_skb;
2027                 }
2028         } else if (newskb) {
2029                 /* this is fast path, we don't necessarily need to
2030                  * call to kfree_skb even though with newskb == NULL
2031                  * this - does no harm
2032                  */
2033                 consume_skb(newskb);
2034                 newskb = NULL;
2035         }
2036
2037         if (skb_append_pagefrags(skb, page, offset, size)) {
2038                 tail = skb;
2039                 goto alloc_skb;
2040         }
2041
2042         skb->len += size;
2043         skb->data_len += size;
2044         skb->truesize += size;
2045         atomic_add(size, &sk->sk_wmem_alloc);
2046
2047         if (newskb) {
2048                 err = unix_scm_to_skb(&scm, skb, false);
2049                 if (err)
2050                         goto err_state_unlock;
2051                 spin_lock(&other->sk_receive_queue.lock);
2052                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2053                 spin_unlock(&other->sk_receive_queue.lock);
2054         }
2055
2056         unix_state_unlock(other);
2057         mutex_unlock(&unix_sk(other)->iolock);
2058
2059         other->sk_data_ready(other);
2060         scm_destroy(&scm);
2061         return size;
2062
2063 err_state_unlock:
2064         unix_state_unlock(other);
2065 err_unlock:
2066         mutex_unlock(&unix_sk(other)->iolock);
2067 err:
2068         kfree_skb(newskb);
2069         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2070                 send_sig(SIGPIPE, current, 0);
2071         if (!init_scm)
2072                 scm_destroy(&scm);
2073         return err;
2074 }
2075
2076 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2077                                   size_t len)
2078 {
2079         int err;
2080         struct sock *sk = sock->sk;
2081
2082         err = sock_error(sk);
2083         if (err)
2084                 return err;
2085
2086         if (sk->sk_state != TCP_ESTABLISHED)
2087                 return -ENOTCONN;
2088
2089         if (msg->msg_namelen)
2090                 msg->msg_namelen = 0;
2091
2092         return unix_dgram_sendmsg(sock, msg, len);
2093 }
2094
2095 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2096                                   size_t size, int flags)
2097 {
2098         struct sock *sk = sock->sk;
2099
2100         if (sk->sk_state != TCP_ESTABLISHED)
2101                 return -ENOTCONN;
2102
2103         return unix_dgram_recvmsg(sock, msg, size, flags);
2104 }
2105
2106 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2107 {
2108         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2109
2110         if (addr) {
2111                 msg->msg_namelen = addr->len;
2112                 memcpy(msg->msg_name, addr->name, addr->len);
2113         }
2114 }
2115
2116 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2117                               size_t size, int flags)
2118 {
2119         struct scm_cookie scm;
2120         struct sock *sk = sock->sk;
2121         struct unix_sock *u = unix_sk(sk);
2122         int noblock = flags & MSG_DONTWAIT;
2123         struct sk_buff *skb;
2124         int err;
2125         int peeked, skip;
2126
2127         err = -EOPNOTSUPP;
2128         if (flags&MSG_OOB)
2129                 goto out;
2130
2131         err = mutex_lock_interruptible(&u->iolock);
2132         if (unlikely(err)) {
2133                 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2134                  * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2135                  */
2136                 err = noblock ? -EAGAIN : -ERESTARTSYS;
2137                 goto out;
2138         }
2139
2140         skip = sk_peek_offset(sk, flags);
2141
2142         skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
2143         if (!skb) {
2144                 unix_state_lock(sk);
2145                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2146                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2147                     (sk->sk_shutdown & RCV_SHUTDOWN))
2148                         err = 0;
2149                 unix_state_unlock(sk);
2150                 goto out_unlock;
2151         }
2152
2153         wake_up_interruptible_sync_poll(&u->peer_wait,
2154                                         POLLOUT | POLLWRNORM | POLLWRBAND);
2155
2156         if (msg->msg_name)
2157                 unix_copy_addr(msg, skb->sk);
2158
2159         if (size > skb->len - skip)
2160                 size = skb->len - skip;
2161         else if (size < skb->len - skip)
2162                 msg->msg_flags |= MSG_TRUNC;
2163
2164         err = skb_copy_datagram_msg(skb, skip, msg, size);
2165         if (err)
2166                 goto out_free;
2167
2168         if (sock_flag(sk, SOCK_RCVTSTAMP))
2169                 __sock_recv_timestamp(msg, sk, skb);
2170
2171         memset(&scm, 0, sizeof(scm));
2172
2173         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2174         unix_set_secdata(&scm, skb);
2175
2176         if (!(flags & MSG_PEEK)) {
2177                 if (UNIXCB(skb).fp)
2178                         unix_detach_fds(&scm, skb);
2179
2180                 sk_peek_offset_bwd(sk, skb->len);
2181         } else {
2182                 /* It is questionable: on PEEK we could:
2183                    - do not return fds - good, but too simple 8)
2184                    - return fds, and do not return them on read (old strategy,
2185                      apparently wrong)
2186                    - clone fds (I chose it for now, it is the most universal
2187                      solution)
2188
2189                    POSIX 1003.1g does not actually define this clearly
2190                    at all. POSIX 1003.1g doesn't define a lot of things
2191                    clearly however!
2192
2193                 */
2194
2195                 sk_peek_offset_fwd(sk, size);
2196
2197                 if (UNIXCB(skb).fp)
2198                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2199         }
2200         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2201
2202         scm_recv(sock, msg, &scm, flags);
2203
2204 out_free:
2205         skb_free_datagram(sk, skb);
2206 out_unlock:
2207         mutex_unlock(&u->iolock);
2208 out:
2209         return err;
2210 }
2211
2212 /*
2213  *      Sleep until more data has arrived. But check for races..
2214  */
2215 static long unix_stream_data_wait(struct sock *sk, long timeo,
2216                                   struct sk_buff *last, unsigned int last_len,
2217                                   bool freezable)
2218 {
2219         struct sk_buff *tail;
2220         DEFINE_WAIT(wait);
2221
2222         unix_state_lock(sk);
2223
2224         for (;;) {
2225                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2226
2227                 tail = skb_peek_tail(&sk->sk_receive_queue);
2228                 if (tail != last ||
2229                     (tail && tail->len != last_len) ||
2230                     sk->sk_err ||
2231                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2232                     signal_pending(current) ||
2233                     !timeo)
2234                         break;
2235
2236                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2237                 unix_state_unlock(sk);
2238                 if (freezable)
2239                         timeo = freezable_schedule_timeout(timeo);
2240                 else
2241                         timeo = schedule_timeout(timeo);
2242                 unix_state_lock(sk);
2243
2244                 if (sock_flag(sk, SOCK_DEAD))
2245                         break;
2246
2247                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2248         }
2249
2250         finish_wait(sk_sleep(sk), &wait);
2251         unix_state_unlock(sk);
2252         return timeo;
2253 }
2254
2255 static unsigned int unix_skb_len(const struct sk_buff *skb)
2256 {
2257         return skb->len - UNIXCB(skb).consumed;
2258 }
2259
2260 struct unix_stream_read_state {
2261         int (*recv_actor)(struct sk_buff *, int, int,
2262                           struct unix_stream_read_state *);
2263         struct socket *socket;
2264         struct msghdr *msg;
2265         struct pipe_inode_info *pipe;
2266         size_t size;
2267         int flags;
2268         unsigned int splice_flags;
2269 };
2270
2271 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2272                                     bool freezable)
2273 {
2274         struct scm_cookie scm;
2275         struct socket *sock = state->socket;
2276         struct sock *sk = sock->sk;
2277         struct unix_sock *u = unix_sk(sk);
2278         int copied = 0;
2279         int flags = state->flags;
2280         int noblock = flags & MSG_DONTWAIT;
2281         bool check_creds = false;
2282         int target;
2283         int err = 0;
2284         long timeo;
2285         int skip;
2286         size_t size = state->size;
2287         unsigned int last_len;
2288
2289         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2290                 err = -EINVAL;
2291                 goto out;
2292         }
2293
2294         if (unlikely(flags & MSG_OOB)) {
2295                 err = -EOPNOTSUPP;
2296                 goto out;
2297         }
2298
2299         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2300         timeo = sock_rcvtimeo(sk, noblock);
2301
2302         memset(&scm, 0, sizeof(scm));
2303
2304         /* Lock the socket to prevent queue disordering
2305          * while sleeps in memcpy_tomsg
2306          */
2307         mutex_lock(&u->iolock);
2308
2309         if (flags & MSG_PEEK)
2310                 skip = sk_peek_offset(sk, flags);
2311         else
2312                 skip = 0;
2313
2314         do {
2315                 int chunk;
2316                 bool drop_skb;
2317                 struct sk_buff *skb, *last;
2318
2319                 unix_state_lock(sk);
2320                 if (sock_flag(sk, SOCK_DEAD)) {
2321                         err = -ECONNRESET;
2322                         goto unlock;
2323                 }
2324                 last = skb = skb_peek(&sk->sk_receive_queue);
2325                 last_len = last ? last->len : 0;
2326 again:
2327                 if (skb == NULL) {
2328                         unix_sk(sk)->recursion_level = 0;
2329                         if (copied >= target)
2330                                 goto unlock;
2331
2332                         /*
2333                          *      POSIX 1003.1g mandates this order.
2334                          */
2335
2336                         err = sock_error(sk);
2337                         if (err)
2338                                 goto unlock;
2339                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2340                                 goto unlock;
2341
2342                         unix_state_unlock(sk);
2343                         if (!timeo) {
2344                                 err = -EAGAIN;
2345                                 break;
2346                         }
2347
2348                         mutex_unlock(&u->iolock);
2349
2350                         timeo = unix_stream_data_wait(sk, timeo, last,
2351                                                       last_len, freezable);
2352
2353                         if (signal_pending(current)) {
2354                                 err = sock_intr_errno(timeo);
2355                                 scm_destroy(&scm);
2356                                 goto out;
2357                         }
2358
2359                         mutex_lock(&u->iolock);
2360                         continue;
2361 unlock:
2362                         unix_state_unlock(sk);
2363                         break;
2364                 }
2365
2366                 while (skip >= unix_skb_len(skb)) {
2367                         skip -= unix_skb_len(skb);
2368                         last = skb;
2369                         last_len = skb->len;
2370                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2371                         if (!skb)
2372                                 goto again;
2373                 }
2374
2375                 unix_state_unlock(sk);
2376
2377                 if (check_creds) {
2378                         /* Never glue messages from different writers */
2379                         if (!unix_skb_scm_eq(skb, &scm))
2380                                 break;
2381                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2382                         /* Copy credentials */
2383                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2384                         unix_set_secdata(&scm, skb);
2385                         check_creds = true;
2386                 }
2387
2388                 /* Copy address just once */
2389                 if (state->msg && state->msg->msg_name) {
2390                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2391                                          state->msg->msg_name);
2392                         unix_copy_addr(state->msg, skb->sk);
2393                         sunaddr = NULL;
2394                 }
2395
2396                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2397                 skb_get(skb);
2398                 chunk = state->recv_actor(skb, skip, chunk, state);
2399                 drop_skb = !unix_skb_len(skb);
2400                 /* skb is only safe to use if !drop_skb */
2401                 consume_skb(skb);
2402                 if (chunk < 0) {
2403                         if (copied == 0)
2404                                 copied = -EFAULT;
2405                         break;
2406                 }
2407                 copied += chunk;
2408                 size -= chunk;
2409
2410                 if (drop_skb) {
2411                         /* the skb was touched by a concurrent reader;
2412                          * we should not expect anything from this skb
2413                          * anymore and assume it invalid - we can be
2414                          * sure it was dropped from the socket queue
2415                          *
2416                          * let's report a short read
2417                          */
2418                         err = 0;
2419                         break;
2420                 }
2421
2422                 /* Mark read part of skb as used */
2423                 if (!(flags & MSG_PEEK)) {
2424                         UNIXCB(skb).consumed += chunk;
2425
2426                         sk_peek_offset_bwd(sk, chunk);
2427
2428                         if (UNIXCB(skb).fp)
2429                                 unix_detach_fds(&scm, skb);
2430
2431                         if (unix_skb_len(skb))
2432                                 break;
2433
2434                         skb_unlink(skb, &sk->sk_receive_queue);
2435                         consume_skb(skb);
2436
2437                         if (scm.fp)
2438                                 break;
2439                 } else {
2440                         /* It is questionable, see note in unix_dgram_recvmsg.
2441                          */
2442                         if (UNIXCB(skb).fp)
2443                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2444
2445                         sk_peek_offset_fwd(sk, chunk);
2446
2447                         if (UNIXCB(skb).fp)
2448                                 break;
2449
2450                         skip = 0;
2451                         last = skb;
2452                         last_len = skb->len;
2453                         unix_state_lock(sk);
2454                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2455                         if (skb)
2456                                 goto again;
2457                         unix_state_unlock(sk);
2458                         break;
2459                 }
2460         } while (size);
2461
2462         mutex_unlock(&u->iolock);
2463         if (state->msg)
2464                 scm_recv(sock, state->msg, &scm, flags);
2465         else
2466                 scm_destroy(&scm);
2467 out:
2468         return copied ? : err;
2469 }
2470
2471 static int unix_stream_read_actor(struct sk_buff *skb,
2472                                   int skip, int chunk,
2473                                   struct unix_stream_read_state *state)
2474 {
2475         int ret;
2476
2477         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2478                                     state->msg, chunk);
2479         return ret ?: chunk;
2480 }
2481
2482 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2483                                size_t size, int flags)
2484 {
2485         struct unix_stream_read_state state = {
2486                 .recv_actor = unix_stream_read_actor,
2487                 .socket = sock,
2488                 .msg = msg,
2489                 .size = size,
2490                 .flags = flags
2491         };
2492
2493         return unix_stream_read_generic(&state, true);
2494 }
2495
2496 static ssize_t skb_unix_socket_splice(struct sock *sk,
2497                                       struct pipe_inode_info *pipe,
2498                                       struct splice_pipe_desc *spd)
2499 {
2500         int ret;
2501         struct unix_sock *u = unix_sk(sk);
2502
2503         mutex_unlock(&u->iolock);
2504         ret = splice_to_pipe(pipe, spd);
2505         mutex_lock(&u->iolock);
2506
2507         return ret;
2508 }
2509
2510 static int unix_stream_splice_actor(struct sk_buff *skb,
2511                                     int skip, int chunk,
2512                                     struct unix_stream_read_state *state)
2513 {
2514         return skb_splice_bits(skb, state->socket->sk,
2515                                UNIXCB(skb).consumed + skip,
2516                                state->pipe, chunk, state->splice_flags,
2517                                skb_unix_socket_splice);
2518 }
2519
2520 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2521                                        struct pipe_inode_info *pipe,
2522                                        size_t size, unsigned int flags)
2523 {
2524         struct unix_stream_read_state state = {
2525                 .recv_actor = unix_stream_splice_actor,
2526                 .socket = sock,
2527                 .pipe = pipe,
2528                 .size = size,
2529                 .splice_flags = flags,
2530         };
2531
2532         if (unlikely(*ppos))
2533                 return -ESPIPE;
2534
2535         if (sock->file->f_flags & O_NONBLOCK ||
2536             flags & SPLICE_F_NONBLOCK)
2537                 state.flags = MSG_DONTWAIT;
2538
2539         return unix_stream_read_generic(&state, false);
2540 }
2541
2542 static int unix_shutdown(struct socket *sock, int mode)
2543 {
2544         struct sock *sk = sock->sk;
2545         struct sock *other;
2546
2547         if (mode < SHUT_RD || mode > SHUT_RDWR)
2548                 return -EINVAL;
2549         /* This maps:
2550          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2551          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2552          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2553          */
2554         ++mode;
2555
2556         unix_state_lock(sk);
2557         sk->sk_shutdown |= mode;
2558         other = unix_peer(sk);
2559         if (other)
2560                 sock_hold(other);
2561         unix_state_unlock(sk);
2562         sk->sk_state_change(sk);
2563
2564         if (other &&
2565                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2566
2567                 int peer_mode = 0;
2568
2569                 if (mode&RCV_SHUTDOWN)
2570                         peer_mode |= SEND_SHUTDOWN;
2571                 if (mode&SEND_SHUTDOWN)
2572                         peer_mode |= RCV_SHUTDOWN;
2573                 unix_state_lock(other);
2574                 other->sk_shutdown |= peer_mode;
2575                 unix_state_unlock(other);
2576                 other->sk_state_change(other);
2577                 if (peer_mode == SHUTDOWN_MASK)
2578                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2579                 else if (peer_mode & RCV_SHUTDOWN)
2580                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2581         }
2582         if (other)
2583                 sock_put(other);
2584
2585         return 0;
2586 }
2587
2588 long unix_inq_len(struct sock *sk)
2589 {
2590         struct sk_buff *skb;
2591         long amount = 0;
2592
2593         if (sk->sk_state == TCP_LISTEN)
2594                 return -EINVAL;
2595
2596         spin_lock(&sk->sk_receive_queue.lock);
2597         if (sk->sk_type == SOCK_STREAM ||
2598             sk->sk_type == SOCK_SEQPACKET) {
2599                 skb_queue_walk(&sk->sk_receive_queue, skb)
2600                         amount += unix_skb_len(skb);
2601         } else {
2602                 skb = skb_peek(&sk->sk_receive_queue);
2603                 if (skb)
2604                         amount = skb->len;
2605         }
2606         spin_unlock(&sk->sk_receive_queue.lock);
2607
2608         return amount;
2609 }
2610 EXPORT_SYMBOL_GPL(unix_inq_len);
2611
2612 long unix_outq_len(struct sock *sk)
2613 {
2614         return sk_wmem_alloc_get(sk);
2615 }
2616 EXPORT_SYMBOL_GPL(unix_outq_len);
2617
2618 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2619 {
2620         struct sock *sk = sock->sk;
2621         long amount = 0;
2622         int err;
2623
2624         switch (cmd) {
2625         case SIOCOUTQ:
2626                 amount = unix_outq_len(sk);
2627                 err = put_user(amount, (int __user *)arg);
2628                 break;
2629         case SIOCINQ:
2630                 amount = unix_inq_len(sk);
2631                 if (amount < 0)
2632                         err = amount;
2633                 else
2634                         err = put_user(amount, (int __user *)arg);
2635                 break;
2636         default:
2637                 err = -ENOIOCTLCMD;
2638                 break;
2639         }
2640         return err;
2641 }
2642
2643 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2644 {
2645         struct sock *sk = sock->sk;
2646         unsigned int mask;
2647
2648         sock_poll_wait(file, sk_sleep(sk), wait);
2649         mask = 0;
2650
2651         /* exceptional events? */
2652         if (sk->sk_err)
2653                 mask |= POLLERR;
2654         if (sk->sk_shutdown == SHUTDOWN_MASK)
2655                 mask |= POLLHUP;
2656         if (sk->sk_shutdown & RCV_SHUTDOWN)
2657                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2658
2659         /* readable? */
2660         if (!skb_queue_empty(&sk->sk_receive_queue))
2661                 mask |= POLLIN | POLLRDNORM;
2662
2663         /* Connection-based need to check for termination and startup */
2664         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2665             sk->sk_state == TCP_CLOSE)
2666                 mask |= POLLHUP;
2667
2668         /*
2669          * we set writable also when the other side has shut down the
2670          * connection. This prevents stuck sockets.
2671          */
2672         if (unix_writable(sk))
2673                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2674
2675         return mask;
2676 }
2677
2678 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2679                                     poll_table *wait)
2680 {
2681         struct sock *sk = sock->sk, *other;
2682         unsigned int mask, writable;
2683
2684         sock_poll_wait(file, sk_sleep(sk), wait);
2685         mask = 0;
2686
2687         /* exceptional events? */
2688         if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2689                 mask |= POLLERR |
2690                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2691
2692         if (sk->sk_shutdown & RCV_SHUTDOWN)
2693                 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2694         if (sk->sk_shutdown == SHUTDOWN_MASK)
2695                 mask |= POLLHUP;
2696
2697         /* readable? */
2698         if (!skb_queue_empty(&sk->sk_receive_queue))
2699                 mask |= POLLIN | POLLRDNORM;
2700
2701         /* Connection-based need to check for termination and startup */
2702         if (sk->sk_type == SOCK_SEQPACKET) {
2703                 if (sk->sk_state == TCP_CLOSE)
2704                         mask |= POLLHUP;
2705                 /* connection hasn't started yet? */
2706                 if (sk->sk_state == TCP_SYN_SENT)
2707                         return mask;
2708         }
2709
2710         /* No write status requested, avoid expensive OUT tests. */
2711         if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2712                 return mask;
2713
2714         writable = unix_writable(sk);
2715         if (writable) {
2716                 unix_state_lock(sk);
2717
2718                 other = unix_peer(sk);
2719                 if (other && unix_peer(other) != sk &&
2720                     unix_recvq_full(other) &&
2721                     unix_dgram_peer_wake_me(sk, other))
2722                         writable = 0;
2723
2724                 unix_state_unlock(sk);
2725         }
2726
2727         if (writable)
2728                 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2729         else
2730                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2731
2732         return mask;
2733 }
2734
2735 #ifdef CONFIG_PROC_FS
2736
2737 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2738
2739 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2740 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2741 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2742
2743 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2744 {
2745         unsigned long offset = get_offset(*pos);
2746         unsigned long bucket = get_bucket(*pos);
2747         struct sock *sk;
2748         unsigned long count = 0;
2749
2750         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2751                 if (sock_net(sk) != seq_file_net(seq))
2752                         continue;
2753                 if (++count == offset)
2754                         break;
2755         }
2756
2757         return sk;
2758 }
2759
2760 static struct sock *unix_next_socket(struct seq_file *seq,
2761                                      struct sock *sk,
2762                                      loff_t *pos)
2763 {
2764         unsigned long bucket;
2765
2766         while (sk > (struct sock *)SEQ_START_TOKEN) {
2767                 sk = sk_next(sk);
2768                 if (!sk)
2769                         goto next_bucket;
2770                 if (sock_net(sk) == seq_file_net(seq))
2771                         return sk;
2772         }
2773
2774         do {
2775                 sk = unix_from_bucket(seq, pos);
2776                 if (sk)
2777                         return sk;
2778
2779 next_bucket:
2780                 bucket = get_bucket(*pos) + 1;
2781                 *pos = set_bucket_offset(bucket, 1);
2782         } while (bucket < ARRAY_SIZE(unix_socket_table));
2783
2784         return NULL;
2785 }
2786
2787 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2788         __acquires(unix_table_lock)
2789 {
2790         spin_lock(&unix_table_lock);
2791
2792         if (!*pos)
2793                 return SEQ_START_TOKEN;
2794
2795         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2796                 return NULL;
2797
2798         return unix_next_socket(seq, NULL, pos);
2799 }
2800
2801 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2802 {
2803         ++*pos;
2804         return unix_next_socket(seq, v, pos);
2805 }
2806
2807 static void unix_seq_stop(struct seq_file *seq, void *v)
2808         __releases(unix_table_lock)
2809 {
2810         spin_unlock(&unix_table_lock);
2811 }
2812
2813 static int unix_seq_show(struct seq_file *seq, void *v)
2814 {
2815
2816         if (v == SEQ_START_TOKEN)
2817                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2818                          "Inode Path\n");
2819         else {
2820                 struct sock *s = v;
2821                 struct unix_sock *u = unix_sk(s);
2822                 unix_state_lock(s);
2823
2824                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2825                         s,
2826                         atomic_read(&s->sk_refcnt),
2827                         0,
2828                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2829                         s->sk_type,
2830                         s->sk_socket ?
2831                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2832                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2833                         sock_i_ino(s));
2834
2835                 if (u->addr) {  // under unix_table_lock here
2836                         int i, len;
2837                         seq_putc(seq, ' ');
2838
2839                         i = 0;
2840                         len = u->addr->len - sizeof(short);
2841                         if (!UNIX_ABSTRACT(s))
2842                                 len--;
2843                         else {
2844                                 seq_putc(seq, '@');
2845                                 i++;
2846                         }
2847                         for ( ; i < len; i++)
2848                                 seq_putc(seq, u->addr->name->sun_path[i]);
2849                 }
2850                 unix_state_unlock(s);
2851                 seq_putc(seq, '\n');
2852         }
2853
2854         return 0;
2855 }
2856
2857 static const struct seq_operations unix_seq_ops = {
2858         .start  = unix_seq_start,
2859         .next   = unix_seq_next,
2860         .stop   = unix_seq_stop,
2861         .show   = unix_seq_show,
2862 };
2863
2864 static int unix_seq_open(struct inode *inode, struct file *file)
2865 {
2866         return seq_open_net(inode, file, &unix_seq_ops,
2867                             sizeof(struct seq_net_private));
2868 }
2869
2870 static const struct file_operations unix_seq_fops = {
2871         .owner          = THIS_MODULE,
2872         .open           = unix_seq_open,
2873         .read           = seq_read,
2874         .llseek         = seq_lseek,
2875         .release        = seq_release_net,
2876 };
2877
2878 #endif
2879
2880 static const struct net_proto_family unix_family_ops = {
2881         .family = PF_UNIX,
2882         .create = unix_create,
2883         .owner  = THIS_MODULE,
2884 };
2885
2886
2887 static int __net_init unix_net_init(struct net *net)
2888 {
2889         int error = -ENOMEM;
2890
2891         net->unx.sysctl_max_dgram_qlen = 10;
2892         if (unix_sysctl_register(net))
2893                 goto out;
2894
2895 #ifdef CONFIG_PROC_FS
2896         if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2897                 unix_sysctl_unregister(net);
2898                 goto out;
2899         }
2900 #endif
2901         error = 0;
2902 out:
2903         return error;
2904 }
2905
2906 static void __net_exit unix_net_exit(struct net *net)
2907 {
2908         unix_sysctl_unregister(net);
2909         remove_proc_entry("unix", net->proc_net);
2910 }
2911
2912 static struct pernet_operations unix_net_ops = {
2913         .init = unix_net_init,
2914         .exit = unix_net_exit,
2915 };
2916
2917 static int __init af_unix_init(void)
2918 {
2919         int rc = -1;
2920
2921         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2922
2923         rc = proto_register(&unix_proto, 1);
2924         if (rc != 0) {
2925                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2926                 goto out;
2927         }
2928
2929         sock_register(&unix_family_ops);
2930         register_pernet_subsys(&unix_net_ops);
2931 out:
2932         return rc;
2933 }
2934
2935 static void __exit af_unix_exit(void)
2936 {
2937         sock_unregister(PF_UNIX);
2938         proto_unregister(&unix_proto);
2939         unregister_pernet_subsys(&unix_net_ops);
2940 }
2941
2942 /* Earlier than device_initcall() so that other drivers invoking
2943    request_module() don't end up in a loop when modprobe tries
2944    to use a UNIX socket. But later than subsys_initcall() because
2945    we depend on stuff initialised there */
2946 fs_initcall(af_unix_init);
2947 module_exit(af_unix_exit);
2948
2949 MODULE_LICENSE("GPL");
2950 MODULE_ALIAS_NETPROTO(PF_UNIX);