OSDN Git Service

smb3: Add defines for new information level, FileIdInformation
[tomoyo/tomoyo-test1.git] / net / unix / af_unix.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:        Implementation of BSD Unix domain sockets.
4  *
5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *              Linus Torvalds  :       Assorted bug cures.
9  *              Niibe Yutaka    :       async I/O support.
10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
11  *              Alan Cox        :       Limit size of allocated blocks.
12  *              Alan Cox        :       Fixed the stupid socketpair bug.
13  *              Alan Cox        :       BSD compatibility fine tuning.
14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
15  *              Alan Cox        :       Sorted out a proper draft version of
16  *                                      file descriptor passing hacked up from
17  *                                      Mike Shaver's work.
18  *              Marty Leisner   :       Fixes to fd passing
19  *              Nick Nevin      :       recvmsg bugfix.
20  *              Alan Cox        :       Started proper garbage collector
21  *              Heiko EiBfeldt  :       Missing verify_area check
22  *              Alan Cox        :       Started POSIXisms
23  *              Andreas Schwab  :       Replace inode by dentry for proper
24  *                                      reference counting
25  *              Kirk Petersen   :       Made this a module
26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
27  *                                      Lots of bug fixes.
28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
29  *                                      by above two patches.
30  *           Andrea Arcangeli   :       If possible we block in connect(2)
31  *                                      if the max backlog of the listen socket
32  *                                      is been reached. This won't break
33  *                                      old apps and it will avoid huge amount
34  *                                      of socks hashed (this for unix_gc()
35  *                                      performances reasons).
36  *                                      Security fix that limits the max
37  *                                      number of socks to 2*max_files and
38  *                                      the number of skb queueable in the
39  *                                      dgram receiver.
40  *              Artur Skawina   :       Hash function optimizations
41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
42  *            Malcolm Beattie   :       Set peercred for socketpair
43  *           Michal Ostrowski   :       Module initialization cleanup.
44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
45  *                                      the core infrastructure is doing that
46  *                                      for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *      [TO FIX]
51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
52  *              other the moment one end closes.
53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *      [NOT TO FIX]
56  *      accept() returns a path name even if the connecting socket has closed
57  *              in the meantime (BSD loses the path and gives up).
58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *      BSD af_unix apparently has connect forgetting to block properly.
62  *              (need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *      Bug fixes and improvements.
66  *              - client shutdown killed server socket.
67  *              - removed all useless cli/sti pairs.
68  *
69  *      Semantic changes/extensions.
70  *              - generic control message passing.
71  *              - SCM_CREDENTIALS control message.
72  *              - "Abstract" (not FS based) socket bindings.
73  *                Abstract names are sequences of bytes (not zero terminated)
74  *                started by 0, so that this name space does not intersect
75  *                with BSD names.
76  */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116
117 #include "scm.h"
118
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124
125
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128         unsigned long hash = (unsigned long)addr;
129
130         hash ^= hash >> 16;
131         hash ^= hash >> 8;
132         hash %= UNIX_HASH_SIZE;
133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135
136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141         UNIXCB(skb).secid = scm->secid;
142 }
143
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146         scm->secid = UNIXCB(skb).secid;
147 }
148
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151         return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162         return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174         unsigned int hash = (__force unsigned int)csum_fold(n);
175
176         hash ^= hash>>8;
177         return hash&(UNIX_HASH_SIZE-1);
178 }
179
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184         return unix_peer(osk) == sk;
185 }
186
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191
192 static inline int unix_recvq_full(struct sock const *sk)
193 {
194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196
197 struct sock *unix_peer_get(struct sock *s)
198 {
199         struct sock *peer;
200
201         unix_state_lock(s);
202         peer = unix_peer(s);
203         if (peer)
204                 sock_hold(peer);
205         unix_state_unlock(s);
206         return peer;
207 }
208 EXPORT_SYMBOL_GPL(unix_peer_get);
209
210 static inline void unix_release_addr(struct unix_address *addr)
211 {
212         if (refcount_dec_and_test(&addr->refcnt))
213                 kfree(addr);
214 }
215
216 /*
217  *      Check unix socket name:
218  *              - should be not zero length.
219  *              - if started by not zero, should be NULL terminated (FS object)
220  *              - if started by zero, it is abstract name.
221  */
222
223 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
224 {
225         *hashp = 0;
226
227         if (len <= sizeof(short) || len > sizeof(*sunaddr))
228                 return -EINVAL;
229         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
230                 return -EINVAL;
231         if (sunaddr->sun_path[0]) {
232                 /*
233                  * This may look like an off by one error but it is a bit more
234                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
235                  * sun_path[108] doesn't as such exist.  However in kernel space
236                  * we are guaranteed that it is a valid memory location in our
237                  * kernel address buffer.
238                  */
239                 ((char *)sunaddr)[len] = 0;
240                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
241                 return len;
242         }
243
244         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
245         return len;
246 }
247
248 static void __unix_remove_socket(struct sock *sk)
249 {
250         sk_del_node_init(sk);
251 }
252
253 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
254 {
255         WARN_ON(!sk_unhashed(sk));
256         sk_add_node(sk, list);
257 }
258
259 static inline void unix_remove_socket(struct sock *sk)
260 {
261         spin_lock(&unix_table_lock);
262         __unix_remove_socket(sk);
263         spin_unlock(&unix_table_lock);
264 }
265
266 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
267 {
268         spin_lock(&unix_table_lock);
269         __unix_insert_socket(list, sk);
270         spin_unlock(&unix_table_lock);
271 }
272
273 static struct sock *__unix_find_socket_byname(struct net *net,
274                                               struct sockaddr_un *sunname,
275                                               int len, int type, unsigned int hash)
276 {
277         struct sock *s;
278
279         sk_for_each(s, &unix_socket_table[hash ^ type]) {
280                 struct unix_sock *u = unix_sk(s);
281
282                 if (!net_eq(sock_net(s), net))
283                         continue;
284
285                 if (u->addr->len == len &&
286                     !memcmp(u->addr->name, sunname, len))
287                         return s;
288         }
289         return NULL;
290 }
291
292 static inline struct sock *unix_find_socket_byname(struct net *net,
293                                                    struct sockaddr_un *sunname,
294                                                    int len, int type,
295                                                    unsigned int hash)
296 {
297         struct sock *s;
298
299         spin_lock(&unix_table_lock);
300         s = __unix_find_socket_byname(net, sunname, len, type, hash);
301         if (s)
302                 sock_hold(s);
303         spin_unlock(&unix_table_lock);
304         return s;
305 }
306
307 static struct sock *unix_find_socket_byinode(struct inode *i)
308 {
309         struct sock *s;
310
311         spin_lock(&unix_table_lock);
312         sk_for_each(s,
313                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
314                 struct dentry *dentry = unix_sk(s)->path.dentry;
315
316                 if (dentry && d_backing_inode(dentry) == i) {
317                         sock_hold(s);
318                         goto found;
319                 }
320         }
321         s = NULL;
322 found:
323         spin_unlock(&unix_table_lock);
324         return s;
325 }
326
327 /* Support code for asymmetrically connected dgram sockets
328  *
329  * If a datagram socket is connected to a socket not itself connected
330  * to the first socket (eg, /dev/log), clients may only enqueue more
331  * messages if the present receive queue of the server socket is not
332  * "too large". This means there's a second writeability condition
333  * poll and sendmsg need to test. The dgram recv code will do a wake
334  * up on the peer_wait wait queue of a socket upon reception of a
335  * datagram which needs to be propagated to sleeping would-be writers
336  * since these might not have sent anything so far. This can't be
337  * accomplished via poll_wait because the lifetime of the server
338  * socket might be less than that of its clients if these break their
339  * association with it or if the server socket is closed while clients
340  * are still connected to it and there's no way to inform "a polling
341  * implementation" that it should let go of a certain wait queue
342  *
343  * In order to propagate a wake up, a wait_queue_entry_t of the client
344  * socket is enqueued on the peer_wait queue of the server socket
345  * whose wake function does a wake_up on the ordinary client socket
346  * wait queue. This connection is established whenever a write (or
347  * poll for write) hit the flow control condition and broken when the
348  * association to the server socket is dissolved or after a wake up
349  * was relayed.
350  */
351
352 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
353                                       void *key)
354 {
355         struct unix_sock *u;
356         wait_queue_head_t *u_sleep;
357
358         u = container_of(q, struct unix_sock, peer_wake);
359
360         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
361                             q);
362         u->peer_wake.private = NULL;
363
364         /* relaying can only happen while the wq still exists */
365         u_sleep = sk_sleep(&u->sk);
366         if (u_sleep)
367                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
368
369         return 0;
370 }
371
372 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
373 {
374         struct unix_sock *u, *u_other;
375         int rc;
376
377         u = unix_sk(sk);
378         u_other = unix_sk(other);
379         rc = 0;
380         spin_lock(&u_other->peer_wait.lock);
381
382         if (!u->peer_wake.private) {
383                 u->peer_wake.private = other;
384                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
385
386                 rc = 1;
387         }
388
389         spin_unlock(&u_other->peer_wait.lock);
390         return rc;
391 }
392
393 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
394                                             struct sock *other)
395 {
396         struct unix_sock *u, *u_other;
397
398         u = unix_sk(sk);
399         u_other = unix_sk(other);
400         spin_lock(&u_other->peer_wait.lock);
401
402         if (u->peer_wake.private == other) {
403                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
404                 u->peer_wake.private = NULL;
405         }
406
407         spin_unlock(&u_other->peer_wait.lock);
408 }
409
410 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
411                                                    struct sock *other)
412 {
413         unix_dgram_peer_wake_disconnect(sk, other);
414         wake_up_interruptible_poll(sk_sleep(sk),
415                                    EPOLLOUT |
416                                    EPOLLWRNORM |
417                                    EPOLLWRBAND);
418 }
419
420 /* preconditions:
421  *      - unix_peer(sk) == other
422  *      - association is stable
423  */
424 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
425 {
426         int connected;
427
428         connected = unix_dgram_peer_wake_connect(sk, other);
429
430         /* If other is SOCK_DEAD, we want to make sure we signal
431          * POLLOUT, such that a subsequent write() can get a
432          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
433          * to other and its full, we will hang waiting for POLLOUT.
434          */
435         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
436                 return 1;
437
438         if (connected)
439                 unix_dgram_peer_wake_disconnect(sk, other);
440
441         return 0;
442 }
443
444 static int unix_writable(const struct sock *sk)
445 {
446         return sk->sk_state != TCP_LISTEN &&
447                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
448 }
449
450 static void unix_write_space(struct sock *sk)
451 {
452         struct socket_wq *wq;
453
454         rcu_read_lock();
455         if (unix_writable(sk)) {
456                 wq = rcu_dereference(sk->sk_wq);
457                 if (skwq_has_sleeper(wq))
458                         wake_up_interruptible_sync_poll(&wq->wait,
459                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
460                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
461         }
462         rcu_read_unlock();
463 }
464
465 /* When dgram socket disconnects (or changes its peer), we clear its receive
466  * queue of packets arrived from previous peer. First, it allows to do
467  * flow control based only on wmem_alloc; second, sk connected to peer
468  * may receive messages only from that peer. */
469 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
470 {
471         if (!skb_queue_empty(&sk->sk_receive_queue)) {
472                 skb_queue_purge(&sk->sk_receive_queue);
473                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
474
475                 /* If one link of bidirectional dgram pipe is disconnected,
476                  * we signal error. Messages are lost. Do not make this,
477                  * when peer was not connected to us.
478                  */
479                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
480                         other->sk_err = ECONNRESET;
481                         other->sk_error_report(other);
482                 }
483         }
484 }
485
486 static void unix_sock_destructor(struct sock *sk)
487 {
488         struct unix_sock *u = unix_sk(sk);
489
490         skb_queue_purge(&sk->sk_receive_queue);
491
492         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
493         WARN_ON(!sk_unhashed(sk));
494         WARN_ON(sk->sk_socket);
495         if (!sock_flag(sk, SOCK_DEAD)) {
496                 pr_info("Attempt to release alive unix socket: %p\n", sk);
497                 return;
498         }
499
500         if (u->addr)
501                 unix_release_addr(u->addr);
502
503         atomic_long_dec(&unix_nr_socks);
504         local_bh_disable();
505         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
506         local_bh_enable();
507 #ifdef UNIX_REFCNT_DEBUG
508         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
509                 atomic_long_read(&unix_nr_socks));
510 #endif
511 }
512
513 static void unix_release_sock(struct sock *sk, int embrion)
514 {
515         struct unix_sock *u = unix_sk(sk);
516         struct path path;
517         struct sock *skpair;
518         struct sk_buff *skb;
519         int state;
520
521         unix_remove_socket(sk);
522
523         /* Clear state */
524         unix_state_lock(sk);
525         sock_orphan(sk);
526         sk->sk_shutdown = SHUTDOWN_MASK;
527         path         = u->path;
528         u->path.dentry = NULL;
529         u->path.mnt = NULL;
530         state = sk->sk_state;
531         sk->sk_state = TCP_CLOSE;
532         unix_state_unlock(sk);
533
534         wake_up_interruptible_all(&u->peer_wait);
535
536         skpair = unix_peer(sk);
537
538         if (skpair != NULL) {
539                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
540                         unix_state_lock(skpair);
541                         /* No more writes */
542                         skpair->sk_shutdown = SHUTDOWN_MASK;
543                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
544                                 skpair->sk_err = ECONNRESET;
545                         unix_state_unlock(skpair);
546                         skpair->sk_state_change(skpair);
547                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
548                 }
549
550                 unix_dgram_peer_wake_disconnect(sk, skpair);
551                 sock_put(skpair); /* It may now die */
552                 unix_peer(sk) = NULL;
553         }
554
555         /* Try to flush out this socket. Throw out buffers at least */
556
557         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
558                 if (state == TCP_LISTEN)
559                         unix_release_sock(skb->sk, 1);
560                 /* passed fds are erased in the kfree_skb hook        */
561                 UNIXCB(skb).consumed = skb->len;
562                 kfree_skb(skb);
563         }
564
565         if (path.dentry)
566                 path_put(&path);
567
568         sock_put(sk);
569
570         /* ---- Socket is dead now and most probably destroyed ---- */
571
572         /*
573          * Fixme: BSD difference: In BSD all sockets connected to us get
574          *        ECONNRESET and we die on the spot. In Linux we behave
575          *        like files and pipes do and wait for the last
576          *        dereference.
577          *
578          * Can't we simply set sock->err?
579          *
580          *        What the above comment does talk about? --ANK(980817)
581          */
582
583         if (unix_tot_inflight)
584                 unix_gc();              /* Garbage collect fds */
585 }
586
587 static void init_peercred(struct sock *sk)
588 {
589         put_pid(sk->sk_peer_pid);
590         if (sk->sk_peer_cred)
591                 put_cred(sk->sk_peer_cred);
592         sk->sk_peer_pid  = get_pid(task_tgid(current));
593         sk->sk_peer_cred = get_current_cred();
594 }
595
596 static void copy_peercred(struct sock *sk, struct sock *peersk)
597 {
598         put_pid(sk->sk_peer_pid);
599         if (sk->sk_peer_cred)
600                 put_cred(sk->sk_peer_cred);
601         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
602         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
603 }
604
605 static int unix_listen(struct socket *sock, int backlog)
606 {
607         int err;
608         struct sock *sk = sock->sk;
609         struct unix_sock *u = unix_sk(sk);
610         struct pid *old_pid = NULL;
611
612         err = -EOPNOTSUPP;
613         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
614                 goto out;       /* Only stream/seqpacket sockets accept */
615         err = -EINVAL;
616         if (!u->addr)
617                 goto out;       /* No listens on an unbound socket */
618         unix_state_lock(sk);
619         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
620                 goto out_unlock;
621         if (backlog > sk->sk_max_ack_backlog)
622                 wake_up_interruptible_all(&u->peer_wait);
623         sk->sk_max_ack_backlog  = backlog;
624         sk->sk_state            = TCP_LISTEN;
625         /* set credentials so connect can copy them */
626         init_peercred(sk);
627         err = 0;
628
629 out_unlock:
630         unix_state_unlock(sk);
631         put_pid(old_pid);
632 out:
633         return err;
634 }
635
636 static int unix_release(struct socket *);
637 static int unix_bind(struct socket *, struct sockaddr *, int);
638 static int unix_stream_connect(struct socket *, struct sockaddr *,
639                                int addr_len, int flags);
640 static int unix_socketpair(struct socket *, struct socket *);
641 static int unix_accept(struct socket *, struct socket *, int, bool);
642 static int unix_getname(struct socket *, struct sockaddr *, int);
643 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
644 static __poll_t unix_dgram_poll(struct file *, struct socket *,
645                                     poll_table *);
646 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
647 #ifdef CONFIG_COMPAT
648 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
649 #endif
650 static int unix_shutdown(struct socket *, int);
651 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
652 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
653 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
654                                     size_t size, int flags);
655 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
656                                        struct pipe_inode_info *, size_t size,
657                                        unsigned int flags);
658 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
659 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
660 static int unix_dgram_connect(struct socket *, struct sockaddr *,
661                               int, int);
662 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
663 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
664                                   int);
665
666 static int unix_set_peek_off(struct sock *sk, int val)
667 {
668         struct unix_sock *u = unix_sk(sk);
669
670         if (mutex_lock_interruptible(&u->iolock))
671                 return -EINTR;
672
673         sk->sk_peek_off = val;
674         mutex_unlock(&u->iolock);
675
676         return 0;
677 }
678
679 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
680 {
681         struct sock *sk = sock->sk;
682         struct unix_sock *u;
683
684         if (sk) {
685                 u = unix_sk(sock->sk);
686                 seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
687         }
688 }
689
690 static const struct proto_ops unix_stream_ops = {
691         .family =       PF_UNIX,
692         .owner =        THIS_MODULE,
693         .release =      unix_release,
694         .bind =         unix_bind,
695         .connect =      unix_stream_connect,
696         .socketpair =   unix_socketpair,
697         .accept =       unix_accept,
698         .getname =      unix_getname,
699         .poll =         unix_poll,
700         .ioctl =        unix_ioctl,
701 #ifdef CONFIG_COMPAT
702         .compat_ioctl = unix_compat_ioctl,
703 #endif
704         .listen =       unix_listen,
705         .shutdown =     unix_shutdown,
706         .setsockopt =   sock_no_setsockopt,
707         .getsockopt =   sock_no_getsockopt,
708         .sendmsg =      unix_stream_sendmsg,
709         .recvmsg =      unix_stream_recvmsg,
710         .mmap =         sock_no_mmap,
711         .sendpage =     unix_stream_sendpage,
712         .splice_read =  unix_stream_splice_read,
713         .set_peek_off = unix_set_peek_off,
714         .show_fdinfo =  unix_show_fdinfo,
715 };
716
717 static const struct proto_ops unix_dgram_ops = {
718         .family =       PF_UNIX,
719         .owner =        THIS_MODULE,
720         .release =      unix_release,
721         .bind =         unix_bind,
722         .connect =      unix_dgram_connect,
723         .socketpair =   unix_socketpair,
724         .accept =       sock_no_accept,
725         .getname =      unix_getname,
726         .poll =         unix_dgram_poll,
727         .ioctl =        unix_ioctl,
728 #ifdef CONFIG_COMPAT
729         .compat_ioctl = unix_compat_ioctl,
730 #endif
731         .listen =       sock_no_listen,
732         .shutdown =     unix_shutdown,
733         .setsockopt =   sock_no_setsockopt,
734         .getsockopt =   sock_no_getsockopt,
735         .sendmsg =      unix_dgram_sendmsg,
736         .recvmsg =      unix_dgram_recvmsg,
737         .mmap =         sock_no_mmap,
738         .sendpage =     sock_no_sendpage,
739         .set_peek_off = unix_set_peek_off,
740         .show_fdinfo =  unix_show_fdinfo,
741 };
742
743 static const struct proto_ops unix_seqpacket_ops = {
744         .family =       PF_UNIX,
745         .owner =        THIS_MODULE,
746         .release =      unix_release,
747         .bind =         unix_bind,
748         .connect =      unix_stream_connect,
749         .socketpair =   unix_socketpair,
750         .accept =       unix_accept,
751         .getname =      unix_getname,
752         .poll =         unix_dgram_poll,
753         .ioctl =        unix_ioctl,
754 #ifdef CONFIG_COMPAT
755         .compat_ioctl = unix_compat_ioctl,
756 #endif
757         .listen =       unix_listen,
758         .shutdown =     unix_shutdown,
759         .setsockopt =   sock_no_setsockopt,
760         .getsockopt =   sock_no_getsockopt,
761         .sendmsg =      unix_seqpacket_sendmsg,
762         .recvmsg =      unix_seqpacket_recvmsg,
763         .mmap =         sock_no_mmap,
764         .sendpage =     sock_no_sendpage,
765         .set_peek_off = unix_set_peek_off,
766         .show_fdinfo =  unix_show_fdinfo,
767 };
768
769 static struct proto unix_proto = {
770         .name                   = "UNIX",
771         .owner                  = THIS_MODULE,
772         .obj_size               = sizeof(struct unix_sock),
773 };
774
775 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
776 {
777         struct sock *sk = NULL;
778         struct unix_sock *u;
779
780         atomic_long_inc(&unix_nr_socks);
781         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
782                 goto out;
783
784         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
785         if (!sk)
786                 goto out;
787
788         sock_init_data(sock, sk);
789
790         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
791         sk->sk_write_space      = unix_write_space;
792         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
793         sk->sk_destruct         = unix_sock_destructor;
794         u         = unix_sk(sk);
795         u->path.dentry = NULL;
796         u->path.mnt = NULL;
797         spin_lock_init(&u->lock);
798         atomic_long_set(&u->inflight, 0);
799         INIT_LIST_HEAD(&u->link);
800         mutex_init(&u->iolock); /* single task reading lock */
801         mutex_init(&u->bindlock); /* single task binding lock */
802         init_waitqueue_head(&u->peer_wait);
803         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
804         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
805         unix_insert_socket(unix_sockets_unbound(sk), sk);
806 out:
807         if (sk == NULL)
808                 atomic_long_dec(&unix_nr_socks);
809         else {
810                 local_bh_disable();
811                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
812                 local_bh_enable();
813         }
814         return sk;
815 }
816
817 static int unix_create(struct net *net, struct socket *sock, int protocol,
818                        int kern)
819 {
820         if (protocol && protocol != PF_UNIX)
821                 return -EPROTONOSUPPORT;
822
823         sock->state = SS_UNCONNECTED;
824
825         switch (sock->type) {
826         case SOCK_STREAM:
827                 sock->ops = &unix_stream_ops;
828                 break;
829                 /*
830                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
831                  *      nothing uses it.
832                  */
833         case SOCK_RAW:
834                 sock->type = SOCK_DGRAM;
835                 /* fall through */
836         case SOCK_DGRAM:
837                 sock->ops = &unix_dgram_ops;
838                 break;
839         case SOCK_SEQPACKET:
840                 sock->ops = &unix_seqpacket_ops;
841                 break;
842         default:
843                 return -ESOCKTNOSUPPORT;
844         }
845
846         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
847 }
848
849 static int unix_release(struct socket *sock)
850 {
851         struct sock *sk = sock->sk;
852
853         if (!sk)
854                 return 0;
855
856         unix_release_sock(sk, 0);
857         sock->sk = NULL;
858
859         return 0;
860 }
861
862 static int unix_autobind(struct socket *sock)
863 {
864         struct sock *sk = sock->sk;
865         struct net *net = sock_net(sk);
866         struct unix_sock *u = unix_sk(sk);
867         static u32 ordernum = 1;
868         struct unix_address *addr;
869         int err;
870         unsigned int retries = 0;
871
872         err = mutex_lock_interruptible(&u->bindlock);
873         if (err)
874                 return err;
875
876         err = 0;
877         if (u->addr)
878                 goto out;
879
880         err = -ENOMEM;
881         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
882         if (!addr)
883                 goto out;
884
885         addr->name->sun_family = AF_UNIX;
886         refcount_set(&addr->refcnt, 1);
887
888 retry:
889         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
890         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
891
892         spin_lock(&unix_table_lock);
893         ordernum = (ordernum+1)&0xFFFFF;
894
895         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
896                                       addr->hash)) {
897                 spin_unlock(&unix_table_lock);
898                 /*
899                  * __unix_find_socket_byname() may take long time if many names
900                  * are already in use.
901                  */
902                 cond_resched();
903                 /* Give up if all names seems to be in use. */
904                 if (retries++ == 0xFFFFF) {
905                         err = -ENOSPC;
906                         kfree(addr);
907                         goto out;
908                 }
909                 goto retry;
910         }
911         addr->hash ^= sk->sk_type;
912
913         __unix_remove_socket(sk);
914         smp_store_release(&u->addr, addr);
915         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
916         spin_unlock(&unix_table_lock);
917         err = 0;
918
919 out:    mutex_unlock(&u->bindlock);
920         return err;
921 }
922
923 static struct sock *unix_find_other(struct net *net,
924                                     struct sockaddr_un *sunname, int len,
925                                     int type, unsigned int hash, int *error)
926 {
927         struct sock *u;
928         struct path path;
929         int err = 0;
930
931         if (sunname->sun_path[0]) {
932                 struct inode *inode;
933                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
934                 if (err)
935                         goto fail;
936                 inode = d_backing_inode(path.dentry);
937                 err = inode_permission(inode, MAY_WRITE);
938                 if (err)
939                         goto put_fail;
940
941                 err = -ECONNREFUSED;
942                 if (!S_ISSOCK(inode->i_mode))
943                         goto put_fail;
944                 u = unix_find_socket_byinode(inode);
945                 if (!u)
946                         goto put_fail;
947
948                 if (u->sk_type == type)
949                         touch_atime(&path);
950
951                 path_put(&path);
952
953                 err = -EPROTOTYPE;
954                 if (u->sk_type != type) {
955                         sock_put(u);
956                         goto fail;
957                 }
958         } else {
959                 err = -ECONNREFUSED;
960                 u = unix_find_socket_byname(net, sunname, len, type, hash);
961                 if (u) {
962                         struct dentry *dentry;
963                         dentry = unix_sk(u)->path.dentry;
964                         if (dentry)
965                                 touch_atime(&unix_sk(u)->path);
966                 } else
967                         goto fail;
968         }
969         return u;
970
971 put_fail:
972         path_put(&path);
973 fail:
974         *error = err;
975         return NULL;
976 }
977
978 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
979 {
980         struct dentry *dentry;
981         struct path path;
982         int err = 0;
983         /*
984          * Get the parent directory, calculate the hash for last
985          * component.
986          */
987         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
988         err = PTR_ERR(dentry);
989         if (IS_ERR(dentry))
990                 return err;
991
992         /*
993          * All right, let's create it.
994          */
995         err = security_path_mknod(&path, dentry, mode, 0);
996         if (!err) {
997                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
998                 if (!err) {
999                         res->mnt = mntget(path.mnt);
1000                         res->dentry = dget(dentry);
1001                 }
1002         }
1003         done_path_create(&path, dentry);
1004         return err;
1005 }
1006
1007 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1008 {
1009         struct sock *sk = sock->sk;
1010         struct net *net = sock_net(sk);
1011         struct unix_sock *u = unix_sk(sk);
1012         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1013         char *sun_path = sunaddr->sun_path;
1014         int err;
1015         unsigned int hash;
1016         struct unix_address *addr;
1017         struct hlist_head *list;
1018         struct path path = { };
1019
1020         err = -EINVAL;
1021         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1022             sunaddr->sun_family != AF_UNIX)
1023                 goto out;
1024
1025         if (addr_len == sizeof(short)) {
1026                 err = unix_autobind(sock);
1027                 goto out;
1028         }
1029
1030         err = unix_mkname(sunaddr, addr_len, &hash);
1031         if (err < 0)
1032                 goto out;
1033         addr_len = err;
1034
1035         if (sun_path[0]) {
1036                 umode_t mode = S_IFSOCK |
1037                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1038                 err = unix_mknod(sun_path, mode, &path);
1039                 if (err) {
1040                         if (err == -EEXIST)
1041                                 err = -EADDRINUSE;
1042                         goto out;
1043                 }
1044         }
1045
1046         err = mutex_lock_interruptible(&u->bindlock);
1047         if (err)
1048                 goto out_put;
1049
1050         err = -EINVAL;
1051         if (u->addr)
1052                 goto out_up;
1053
1054         err = -ENOMEM;
1055         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1056         if (!addr)
1057                 goto out_up;
1058
1059         memcpy(addr->name, sunaddr, addr_len);
1060         addr->len = addr_len;
1061         addr->hash = hash ^ sk->sk_type;
1062         refcount_set(&addr->refcnt, 1);
1063
1064         if (sun_path[0]) {
1065                 addr->hash = UNIX_HASH_SIZE;
1066                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1067                 spin_lock(&unix_table_lock);
1068                 u->path = path;
1069                 list = &unix_socket_table[hash];
1070         } else {
1071                 spin_lock(&unix_table_lock);
1072                 err = -EADDRINUSE;
1073                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1074                                               sk->sk_type, hash)) {
1075                         unix_release_addr(addr);
1076                         goto out_unlock;
1077                 }
1078
1079                 list = &unix_socket_table[addr->hash];
1080         }
1081
1082         err = 0;
1083         __unix_remove_socket(sk);
1084         smp_store_release(&u->addr, addr);
1085         __unix_insert_socket(list, sk);
1086
1087 out_unlock:
1088         spin_unlock(&unix_table_lock);
1089 out_up:
1090         mutex_unlock(&u->bindlock);
1091 out_put:
1092         if (err)
1093                 path_put(&path);
1094 out:
1095         return err;
1096 }
1097
1098 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1099 {
1100         if (unlikely(sk1 == sk2) || !sk2) {
1101                 unix_state_lock(sk1);
1102                 return;
1103         }
1104         if (sk1 < sk2) {
1105                 unix_state_lock(sk1);
1106                 unix_state_lock_nested(sk2);
1107         } else {
1108                 unix_state_lock(sk2);
1109                 unix_state_lock_nested(sk1);
1110         }
1111 }
1112
1113 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1114 {
1115         if (unlikely(sk1 == sk2) || !sk2) {
1116                 unix_state_unlock(sk1);
1117                 return;
1118         }
1119         unix_state_unlock(sk1);
1120         unix_state_unlock(sk2);
1121 }
1122
1123 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1124                               int alen, int flags)
1125 {
1126         struct sock *sk = sock->sk;
1127         struct net *net = sock_net(sk);
1128         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1129         struct sock *other;
1130         unsigned int hash;
1131         int err;
1132
1133         err = -EINVAL;
1134         if (alen < offsetofend(struct sockaddr, sa_family))
1135                 goto out;
1136
1137         if (addr->sa_family != AF_UNSPEC) {
1138                 err = unix_mkname(sunaddr, alen, &hash);
1139                 if (err < 0)
1140                         goto out;
1141                 alen = err;
1142
1143                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1144                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1145                         goto out;
1146
1147 restart:
1148                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1149                 if (!other)
1150                         goto out;
1151
1152                 unix_state_double_lock(sk, other);
1153
1154                 /* Apparently VFS overslept socket death. Retry. */
1155                 if (sock_flag(other, SOCK_DEAD)) {
1156                         unix_state_double_unlock(sk, other);
1157                         sock_put(other);
1158                         goto restart;
1159                 }
1160
1161                 err = -EPERM;
1162                 if (!unix_may_send(sk, other))
1163                         goto out_unlock;
1164
1165                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1166                 if (err)
1167                         goto out_unlock;
1168
1169         } else {
1170                 /*
1171                  *      1003.1g breaking connected state with AF_UNSPEC
1172                  */
1173                 other = NULL;
1174                 unix_state_double_lock(sk, other);
1175         }
1176
1177         /*
1178          * If it was connected, reconnect.
1179          */
1180         if (unix_peer(sk)) {
1181                 struct sock *old_peer = unix_peer(sk);
1182                 unix_peer(sk) = other;
1183                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1184
1185                 unix_state_double_unlock(sk, other);
1186
1187                 if (other != old_peer)
1188                         unix_dgram_disconnected(sk, old_peer);
1189                 sock_put(old_peer);
1190         } else {
1191                 unix_peer(sk) = other;
1192                 unix_state_double_unlock(sk, other);
1193         }
1194         return 0;
1195
1196 out_unlock:
1197         unix_state_double_unlock(sk, other);
1198         sock_put(other);
1199 out:
1200         return err;
1201 }
1202
1203 static long unix_wait_for_peer(struct sock *other, long timeo)
1204 {
1205         struct unix_sock *u = unix_sk(other);
1206         int sched;
1207         DEFINE_WAIT(wait);
1208
1209         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1210
1211         sched = !sock_flag(other, SOCK_DEAD) &&
1212                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1213                 unix_recvq_full(other);
1214
1215         unix_state_unlock(other);
1216
1217         if (sched)
1218                 timeo = schedule_timeout(timeo);
1219
1220         finish_wait(&u->peer_wait, &wait);
1221         return timeo;
1222 }
1223
1224 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1225                                int addr_len, int flags)
1226 {
1227         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1228         struct sock *sk = sock->sk;
1229         struct net *net = sock_net(sk);
1230         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1231         struct sock *newsk = NULL;
1232         struct sock *other = NULL;
1233         struct sk_buff *skb = NULL;
1234         unsigned int hash;
1235         int st;
1236         int err;
1237         long timeo;
1238
1239         err = unix_mkname(sunaddr, addr_len, &hash);
1240         if (err < 0)
1241                 goto out;
1242         addr_len = err;
1243
1244         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1245             (err = unix_autobind(sock)) != 0)
1246                 goto out;
1247
1248         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1249
1250         /* First of all allocate resources.
1251            If we will make it after state is locked,
1252            we will have to recheck all again in any case.
1253          */
1254
1255         err = -ENOMEM;
1256
1257         /* create new sock for complete connection */
1258         newsk = unix_create1(sock_net(sk), NULL, 0);
1259         if (newsk == NULL)
1260                 goto out;
1261
1262         /* Allocate skb for sending to listening sock */
1263         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1264         if (skb == NULL)
1265                 goto out;
1266
1267 restart:
1268         /*  Find listening sock. */
1269         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1270         if (!other)
1271                 goto out;
1272
1273         /* Latch state of peer */
1274         unix_state_lock(other);
1275
1276         /* Apparently VFS overslept socket death. Retry. */
1277         if (sock_flag(other, SOCK_DEAD)) {
1278                 unix_state_unlock(other);
1279                 sock_put(other);
1280                 goto restart;
1281         }
1282
1283         err = -ECONNREFUSED;
1284         if (other->sk_state != TCP_LISTEN)
1285                 goto out_unlock;
1286         if (other->sk_shutdown & RCV_SHUTDOWN)
1287                 goto out_unlock;
1288
1289         if (unix_recvq_full(other)) {
1290                 err = -EAGAIN;
1291                 if (!timeo)
1292                         goto out_unlock;
1293
1294                 timeo = unix_wait_for_peer(other, timeo);
1295
1296                 err = sock_intr_errno(timeo);
1297                 if (signal_pending(current))
1298                         goto out;
1299                 sock_put(other);
1300                 goto restart;
1301         }
1302
1303         /* Latch our state.
1304
1305            It is tricky place. We need to grab our state lock and cannot
1306            drop lock on peer. It is dangerous because deadlock is
1307            possible. Connect to self case and simultaneous
1308            attempt to connect are eliminated by checking socket
1309            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1310            check this before attempt to grab lock.
1311
1312            Well, and we have to recheck the state after socket locked.
1313          */
1314         st = sk->sk_state;
1315
1316         switch (st) {
1317         case TCP_CLOSE:
1318                 /* This is ok... continue with connect */
1319                 break;
1320         case TCP_ESTABLISHED:
1321                 /* Socket is already connected */
1322                 err = -EISCONN;
1323                 goto out_unlock;
1324         default:
1325                 err = -EINVAL;
1326                 goto out_unlock;
1327         }
1328
1329         unix_state_lock_nested(sk);
1330
1331         if (sk->sk_state != st) {
1332                 unix_state_unlock(sk);
1333                 unix_state_unlock(other);
1334                 sock_put(other);
1335                 goto restart;
1336         }
1337
1338         err = security_unix_stream_connect(sk, other, newsk);
1339         if (err) {
1340                 unix_state_unlock(sk);
1341                 goto out_unlock;
1342         }
1343
1344         /* The way is open! Fastly set all the necessary fields... */
1345
1346         sock_hold(sk);
1347         unix_peer(newsk)        = sk;
1348         newsk->sk_state         = TCP_ESTABLISHED;
1349         newsk->sk_type          = sk->sk_type;
1350         init_peercred(newsk);
1351         newu = unix_sk(newsk);
1352         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1353         otheru = unix_sk(other);
1354
1355         /* copy address information from listening to new sock
1356          *
1357          * The contents of *(otheru->addr) and otheru->path
1358          * are seen fully set up here, since we have found
1359          * otheru in hash under unix_table_lock.  Insertion
1360          * into the hash chain we'd found it in had been done
1361          * in an earlier critical area protected by unix_table_lock,
1362          * the same one where we'd set *(otheru->addr) contents,
1363          * as well as otheru->path and otheru->addr itself.
1364          *
1365          * Using smp_store_release() here to set newu->addr
1366          * is enough to make those stores, as well as stores
1367          * to newu->path visible to anyone who gets newu->addr
1368          * by smp_load_acquire().  IOW, the same warranties
1369          * as for unix_sock instances bound in unix_bind() or
1370          * in unix_autobind().
1371          */
1372         if (otheru->path.dentry) {
1373                 path_get(&otheru->path);
1374                 newu->path = otheru->path;
1375         }
1376         refcount_inc(&otheru->addr->refcnt);
1377         smp_store_release(&newu->addr, otheru->addr);
1378
1379         /* Set credentials */
1380         copy_peercred(sk, other);
1381
1382         sock->state     = SS_CONNECTED;
1383         sk->sk_state    = TCP_ESTABLISHED;
1384         sock_hold(newsk);
1385
1386         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1387         unix_peer(sk)   = newsk;
1388
1389         unix_state_unlock(sk);
1390
1391         /* take ten and and send info to listening sock */
1392         spin_lock(&other->sk_receive_queue.lock);
1393         __skb_queue_tail(&other->sk_receive_queue, skb);
1394         spin_unlock(&other->sk_receive_queue.lock);
1395         unix_state_unlock(other);
1396         other->sk_data_ready(other);
1397         sock_put(other);
1398         return 0;
1399
1400 out_unlock:
1401         if (other)
1402                 unix_state_unlock(other);
1403
1404 out:
1405         kfree_skb(skb);
1406         if (newsk)
1407                 unix_release_sock(newsk, 0);
1408         if (other)
1409                 sock_put(other);
1410         return err;
1411 }
1412
1413 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1414 {
1415         struct sock *ska = socka->sk, *skb = sockb->sk;
1416
1417         /* Join our sockets back to back */
1418         sock_hold(ska);
1419         sock_hold(skb);
1420         unix_peer(ska) = skb;
1421         unix_peer(skb) = ska;
1422         init_peercred(ska);
1423         init_peercred(skb);
1424
1425         if (ska->sk_type != SOCK_DGRAM) {
1426                 ska->sk_state = TCP_ESTABLISHED;
1427                 skb->sk_state = TCP_ESTABLISHED;
1428                 socka->state  = SS_CONNECTED;
1429                 sockb->state  = SS_CONNECTED;
1430         }
1431         return 0;
1432 }
1433
1434 static void unix_sock_inherit_flags(const struct socket *old,
1435                                     struct socket *new)
1436 {
1437         if (test_bit(SOCK_PASSCRED, &old->flags))
1438                 set_bit(SOCK_PASSCRED, &new->flags);
1439         if (test_bit(SOCK_PASSSEC, &old->flags))
1440                 set_bit(SOCK_PASSSEC, &new->flags);
1441 }
1442
1443 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1444                        bool kern)
1445 {
1446         struct sock *sk = sock->sk;
1447         struct sock *tsk;
1448         struct sk_buff *skb;
1449         int err;
1450
1451         err = -EOPNOTSUPP;
1452         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1453                 goto out;
1454
1455         err = -EINVAL;
1456         if (sk->sk_state != TCP_LISTEN)
1457                 goto out;
1458
1459         /* If socket state is TCP_LISTEN it cannot change (for now...),
1460          * so that no locks are necessary.
1461          */
1462
1463         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1464         if (!skb) {
1465                 /* This means receive shutdown. */
1466                 if (err == 0)
1467                         err = -EINVAL;
1468                 goto out;
1469         }
1470
1471         tsk = skb->sk;
1472         skb_free_datagram(sk, skb);
1473         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1474
1475         /* attach accepted sock to socket */
1476         unix_state_lock(tsk);
1477         newsock->state = SS_CONNECTED;
1478         unix_sock_inherit_flags(sock, newsock);
1479         sock_graft(tsk, newsock);
1480         unix_state_unlock(tsk);
1481         return 0;
1482
1483 out:
1484         return err;
1485 }
1486
1487
1488 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1489 {
1490         struct sock *sk = sock->sk;
1491         struct unix_address *addr;
1492         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1493         int err = 0;
1494
1495         if (peer) {
1496                 sk = unix_peer_get(sk);
1497
1498                 err = -ENOTCONN;
1499                 if (!sk)
1500                         goto out;
1501                 err = 0;
1502         } else {
1503                 sock_hold(sk);
1504         }
1505
1506         addr = smp_load_acquire(&unix_sk(sk)->addr);
1507         if (!addr) {
1508                 sunaddr->sun_family = AF_UNIX;
1509                 sunaddr->sun_path[0] = 0;
1510                 err = sizeof(short);
1511         } else {
1512                 err = addr->len;
1513                 memcpy(sunaddr, addr->name, addr->len);
1514         }
1515         sock_put(sk);
1516 out:
1517         return err;
1518 }
1519
1520 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1521 {
1522         int err = 0;
1523
1524         UNIXCB(skb).pid  = get_pid(scm->pid);
1525         UNIXCB(skb).uid = scm->creds.uid;
1526         UNIXCB(skb).gid = scm->creds.gid;
1527         UNIXCB(skb).fp = NULL;
1528         unix_get_secdata(scm, skb);
1529         if (scm->fp && send_fds)
1530                 err = unix_attach_fds(scm, skb);
1531
1532         skb->destructor = unix_destruct_scm;
1533         return err;
1534 }
1535
1536 static bool unix_passcred_enabled(const struct socket *sock,
1537                                   const struct sock *other)
1538 {
1539         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1540                !other->sk_socket ||
1541                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1542 }
1543
1544 /*
1545  * Some apps rely on write() giving SCM_CREDENTIALS
1546  * We include credentials if source or destination socket
1547  * asserted SOCK_PASSCRED.
1548  */
1549 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1550                             const struct sock *other)
1551 {
1552         if (UNIXCB(skb).pid)
1553                 return;
1554         if (unix_passcred_enabled(sock, other)) {
1555                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1556                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1557         }
1558 }
1559
1560 static int maybe_init_creds(struct scm_cookie *scm,
1561                             struct socket *socket,
1562                             const struct sock *other)
1563 {
1564         int err;
1565         struct msghdr msg = { .msg_controllen = 0 };
1566
1567         err = scm_send(socket, &msg, scm, false);
1568         if (err)
1569                 return err;
1570
1571         if (unix_passcred_enabled(socket, other)) {
1572                 scm->pid = get_pid(task_tgid(current));
1573                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1574         }
1575         return err;
1576 }
1577
1578 static bool unix_skb_scm_eq(struct sk_buff *skb,
1579                             struct scm_cookie *scm)
1580 {
1581         const struct unix_skb_parms *u = &UNIXCB(skb);
1582
1583         return u->pid == scm->pid &&
1584                uid_eq(u->uid, scm->creds.uid) &&
1585                gid_eq(u->gid, scm->creds.gid) &&
1586                unix_secdata_eq(scm, skb);
1587 }
1588
1589 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1590 {
1591         struct scm_fp_list *fp = UNIXCB(skb).fp;
1592         struct unix_sock *u = unix_sk(sk);
1593
1594         lockdep_assert_held(&sk->sk_receive_queue.lock);
1595
1596         if (unlikely(fp && fp->count))
1597                 u->scm_stat.nr_fds += fp->count;
1598 }
1599
1600 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1601 {
1602         struct scm_fp_list *fp = UNIXCB(skb).fp;
1603         struct unix_sock *u = unix_sk(sk);
1604
1605         lockdep_assert_held(&sk->sk_receive_queue.lock);
1606
1607         if (unlikely(fp && fp->count))
1608                 u->scm_stat.nr_fds -= fp->count;
1609 }
1610
1611 /*
1612  *      Send AF_UNIX data.
1613  */
1614
1615 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1616                               size_t len)
1617 {
1618         struct sock *sk = sock->sk;
1619         struct net *net = sock_net(sk);
1620         struct unix_sock *u = unix_sk(sk);
1621         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1622         struct sock *other = NULL;
1623         int namelen = 0; /* fake GCC */
1624         int err;
1625         unsigned int hash;
1626         struct sk_buff *skb;
1627         long timeo;
1628         struct scm_cookie scm;
1629         int data_len = 0;
1630         int sk_locked;
1631
1632         wait_for_unix_gc();
1633         err = scm_send(sock, msg, &scm, false);
1634         if (err < 0)
1635                 return err;
1636
1637         err = -EOPNOTSUPP;
1638         if (msg->msg_flags&MSG_OOB)
1639                 goto out;
1640
1641         if (msg->msg_namelen) {
1642                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1643                 if (err < 0)
1644                         goto out;
1645                 namelen = err;
1646         } else {
1647                 sunaddr = NULL;
1648                 err = -ENOTCONN;
1649                 other = unix_peer_get(sk);
1650                 if (!other)
1651                         goto out;
1652         }
1653
1654         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1655             && (err = unix_autobind(sock)) != 0)
1656                 goto out;
1657
1658         err = -EMSGSIZE;
1659         if (len > sk->sk_sndbuf - 32)
1660                 goto out;
1661
1662         if (len > SKB_MAX_ALLOC) {
1663                 data_len = min_t(size_t,
1664                                  len - SKB_MAX_ALLOC,
1665                                  MAX_SKB_FRAGS * PAGE_SIZE);
1666                 data_len = PAGE_ALIGN(data_len);
1667
1668                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1669         }
1670
1671         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1672                                    msg->msg_flags & MSG_DONTWAIT, &err,
1673                                    PAGE_ALLOC_COSTLY_ORDER);
1674         if (skb == NULL)
1675                 goto out;
1676
1677         err = unix_scm_to_skb(&scm, skb, true);
1678         if (err < 0)
1679                 goto out_free;
1680
1681         skb_put(skb, len - data_len);
1682         skb->data_len = data_len;
1683         skb->len = len;
1684         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1685         if (err)
1686                 goto out_free;
1687
1688         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1689
1690 restart:
1691         if (!other) {
1692                 err = -ECONNRESET;
1693                 if (sunaddr == NULL)
1694                         goto out_free;
1695
1696                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1697                                         hash, &err);
1698                 if (other == NULL)
1699                         goto out_free;
1700         }
1701
1702         if (sk_filter(other, skb) < 0) {
1703                 /* Toss the packet but do not return any error to the sender */
1704                 err = len;
1705                 goto out_free;
1706         }
1707
1708         sk_locked = 0;
1709         unix_state_lock(other);
1710 restart_locked:
1711         err = -EPERM;
1712         if (!unix_may_send(sk, other))
1713                 goto out_unlock;
1714
1715         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1716                 /*
1717                  *      Check with 1003.1g - what should
1718                  *      datagram error
1719                  */
1720                 unix_state_unlock(other);
1721                 sock_put(other);
1722
1723                 if (!sk_locked)
1724                         unix_state_lock(sk);
1725
1726                 err = 0;
1727                 if (unix_peer(sk) == other) {
1728                         unix_peer(sk) = NULL;
1729                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1730
1731                         unix_state_unlock(sk);
1732
1733                         unix_dgram_disconnected(sk, other);
1734                         sock_put(other);
1735                         err = -ECONNREFUSED;
1736                 } else {
1737                         unix_state_unlock(sk);
1738                 }
1739
1740                 other = NULL;
1741                 if (err)
1742                         goto out_free;
1743                 goto restart;
1744         }
1745
1746         err = -EPIPE;
1747         if (other->sk_shutdown & RCV_SHUTDOWN)
1748                 goto out_unlock;
1749
1750         if (sk->sk_type != SOCK_SEQPACKET) {
1751                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1752                 if (err)
1753                         goto out_unlock;
1754         }
1755
1756         /* other == sk && unix_peer(other) != sk if
1757          * - unix_peer(sk) == NULL, destination address bound to sk
1758          * - unix_peer(sk) == sk by time of get but disconnected before lock
1759          */
1760         if (other != sk &&
1761             unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1762                 if (timeo) {
1763                         timeo = unix_wait_for_peer(other, timeo);
1764
1765                         err = sock_intr_errno(timeo);
1766                         if (signal_pending(current))
1767                                 goto out_free;
1768
1769                         goto restart;
1770                 }
1771
1772                 if (!sk_locked) {
1773                         unix_state_unlock(other);
1774                         unix_state_double_lock(sk, other);
1775                 }
1776
1777                 if (unix_peer(sk) != other ||
1778                     unix_dgram_peer_wake_me(sk, other)) {
1779                         err = -EAGAIN;
1780                         sk_locked = 1;
1781                         goto out_unlock;
1782                 }
1783
1784                 if (!sk_locked) {
1785                         sk_locked = 1;
1786                         goto restart_locked;
1787                 }
1788         }
1789
1790         if (unlikely(sk_locked))
1791                 unix_state_unlock(sk);
1792
1793         if (sock_flag(other, SOCK_RCVTSTAMP))
1794                 __net_timestamp(skb);
1795         maybe_add_creds(skb, sock, other);
1796         spin_lock(&other->sk_receive_queue.lock);
1797         scm_stat_add(other, skb);
1798         __skb_queue_tail(&other->sk_receive_queue, skb);
1799         spin_unlock(&other->sk_receive_queue.lock);
1800         unix_state_unlock(other);
1801         other->sk_data_ready(other);
1802         sock_put(other);
1803         scm_destroy(&scm);
1804         return len;
1805
1806 out_unlock:
1807         if (sk_locked)
1808                 unix_state_unlock(sk);
1809         unix_state_unlock(other);
1810 out_free:
1811         kfree_skb(skb);
1812 out:
1813         if (other)
1814                 sock_put(other);
1815         scm_destroy(&scm);
1816         return err;
1817 }
1818
1819 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1820  * bytes, and a minimum of a full page.
1821  */
1822 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1823
1824 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1825                                size_t len)
1826 {
1827         struct sock *sk = sock->sk;
1828         struct sock *other = NULL;
1829         int err, size;
1830         struct sk_buff *skb;
1831         int sent = 0;
1832         struct scm_cookie scm;
1833         bool fds_sent = false;
1834         int data_len;
1835
1836         wait_for_unix_gc();
1837         err = scm_send(sock, msg, &scm, false);
1838         if (err < 0)
1839                 return err;
1840
1841         err = -EOPNOTSUPP;
1842         if (msg->msg_flags&MSG_OOB)
1843                 goto out_err;
1844
1845         if (msg->msg_namelen) {
1846                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1847                 goto out_err;
1848         } else {
1849                 err = -ENOTCONN;
1850                 other = unix_peer(sk);
1851                 if (!other)
1852                         goto out_err;
1853         }
1854
1855         if (sk->sk_shutdown & SEND_SHUTDOWN)
1856                 goto pipe_err;
1857
1858         while (sent < len) {
1859                 size = len - sent;
1860
1861                 /* Keep two messages in the pipe so it schedules better */
1862                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1863
1864                 /* allow fallback to order-0 allocations */
1865                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1866
1867                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1868
1869                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1870
1871                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1872                                            msg->msg_flags & MSG_DONTWAIT, &err,
1873                                            get_order(UNIX_SKB_FRAGS_SZ));
1874                 if (!skb)
1875                         goto out_err;
1876
1877                 /* Only send the fds in the first buffer */
1878                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1879                 if (err < 0) {
1880                         kfree_skb(skb);
1881                         goto out_err;
1882                 }
1883                 fds_sent = true;
1884
1885                 skb_put(skb, size - data_len);
1886                 skb->data_len = data_len;
1887                 skb->len = size;
1888                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1889                 if (err) {
1890                         kfree_skb(skb);
1891                         goto out_err;
1892                 }
1893
1894                 unix_state_lock(other);
1895
1896                 if (sock_flag(other, SOCK_DEAD) ||
1897                     (other->sk_shutdown & RCV_SHUTDOWN))
1898                         goto pipe_err_free;
1899
1900                 maybe_add_creds(skb, sock, other);
1901                 spin_lock(&other->sk_receive_queue.lock);
1902                 scm_stat_add(other, skb);
1903                 __skb_queue_tail(&other->sk_receive_queue, skb);
1904                 spin_unlock(&other->sk_receive_queue.lock);
1905                 unix_state_unlock(other);
1906                 other->sk_data_ready(other);
1907                 sent += size;
1908         }
1909
1910         scm_destroy(&scm);
1911
1912         return sent;
1913
1914 pipe_err_free:
1915         unix_state_unlock(other);
1916         kfree_skb(skb);
1917 pipe_err:
1918         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1919                 send_sig(SIGPIPE, current, 0);
1920         err = -EPIPE;
1921 out_err:
1922         scm_destroy(&scm);
1923         return sent ? : err;
1924 }
1925
1926 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1927                                     int offset, size_t size, int flags)
1928 {
1929         int err;
1930         bool send_sigpipe = false;
1931         bool init_scm = true;
1932         struct scm_cookie scm;
1933         struct sock *other, *sk = socket->sk;
1934         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1935
1936         if (flags & MSG_OOB)
1937                 return -EOPNOTSUPP;
1938
1939         other = unix_peer(sk);
1940         if (!other || sk->sk_state != TCP_ESTABLISHED)
1941                 return -ENOTCONN;
1942
1943         if (false) {
1944 alloc_skb:
1945                 unix_state_unlock(other);
1946                 mutex_unlock(&unix_sk(other)->iolock);
1947                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1948                                               &err, 0);
1949                 if (!newskb)
1950                         goto err;
1951         }
1952
1953         /* we must acquire iolock as we modify already present
1954          * skbs in the sk_receive_queue and mess with skb->len
1955          */
1956         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1957         if (err) {
1958                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1959                 goto err;
1960         }
1961
1962         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1963                 err = -EPIPE;
1964                 send_sigpipe = true;
1965                 goto err_unlock;
1966         }
1967
1968         unix_state_lock(other);
1969
1970         if (sock_flag(other, SOCK_DEAD) ||
1971             other->sk_shutdown & RCV_SHUTDOWN) {
1972                 err = -EPIPE;
1973                 send_sigpipe = true;
1974                 goto err_state_unlock;
1975         }
1976
1977         if (init_scm) {
1978                 err = maybe_init_creds(&scm, socket, other);
1979                 if (err)
1980                         goto err_state_unlock;
1981                 init_scm = false;
1982         }
1983
1984         skb = skb_peek_tail(&other->sk_receive_queue);
1985         if (tail && tail == skb) {
1986                 skb = newskb;
1987         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1988                 if (newskb) {
1989                         skb = newskb;
1990                 } else {
1991                         tail = skb;
1992                         goto alloc_skb;
1993                 }
1994         } else if (newskb) {
1995                 /* this is fast path, we don't necessarily need to
1996                  * call to kfree_skb even though with newskb == NULL
1997                  * this - does no harm
1998                  */
1999                 consume_skb(newskb);
2000                 newskb = NULL;
2001         }
2002
2003         if (skb_append_pagefrags(skb, page, offset, size)) {
2004                 tail = skb;
2005                 goto alloc_skb;
2006         }
2007
2008         skb->len += size;
2009         skb->data_len += size;
2010         skb->truesize += size;
2011         refcount_add(size, &sk->sk_wmem_alloc);
2012
2013         if (newskb) {
2014                 err = unix_scm_to_skb(&scm, skb, false);
2015                 if (err)
2016                         goto err_state_unlock;
2017                 spin_lock(&other->sk_receive_queue.lock);
2018                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2019                 spin_unlock(&other->sk_receive_queue.lock);
2020         }
2021
2022         unix_state_unlock(other);
2023         mutex_unlock(&unix_sk(other)->iolock);
2024
2025         other->sk_data_ready(other);
2026         scm_destroy(&scm);
2027         return size;
2028
2029 err_state_unlock:
2030         unix_state_unlock(other);
2031 err_unlock:
2032         mutex_unlock(&unix_sk(other)->iolock);
2033 err:
2034         kfree_skb(newskb);
2035         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2036                 send_sig(SIGPIPE, current, 0);
2037         if (!init_scm)
2038                 scm_destroy(&scm);
2039         return err;
2040 }
2041
2042 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2043                                   size_t len)
2044 {
2045         int err;
2046         struct sock *sk = sock->sk;
2047
2048         err = sock_error(sk);
2049         if (err)
2050                 return err;
2051
2052         if (sk->sk_state != TCP_ESTABLISHED)
2053                 return -ENOTCONN;
2054
2055         if (msg->msg_namelen)
2056                 msg->msg_namelen = 0;
2057
2058         return unix_dgram_sendmsg(sock, msg, len);
2059 }
2060
2061 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2062                                   size_t size, int flags)
2063 {
2064         struct sock *sk = sock->sk;
2065
2066         if (sk->sk_state != TCP_ESTABLISHED)
2067                 return -ENOTCONN;
2068
2069         return unix_dgram_recvmsg(sock, msg, size, flags);
2070 }
2071
2072 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2073 {
2074         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2075
2076         if (addr) {
2077                 msg->msg_namelen = addr->len;
2078                 memcpy(msg->msg_name, addr->name, addr->len);
2079         }
2080 }
2081
2082 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2083                               size_t size, int flags)
2084 {
2085         struct scm_cookie scm;
2086         struct sock *sk = sock->sk;
2087         struct unix_sock *u = unix_sk(sk);
2088         struct sk_buff *skb, *last;
2089         long timeo;
2090         int skip;
2091         int err;
2092
2093         err = -EOPNOTSUPP;
2094         if (flags&MSG_OOB)
2095                 goto out;
2096
2097         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2098
2099         do {
2100                 mutex_lock(&u->iolock);
2101
2102                 skip = sk_peek_offset(sk, flags);
2103                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2104                                               scm_stat_del, &skip, &err, &last);
2105                 if (skb)
2106                         break;
2107
2108                 mutex_unlock(&u->iolock);
2109
2110                 if (err != -EAGAIN)
2111                         break;
2112         } while (timeo &&
2113                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2114                                               &err, &timeo, last));
2115
2116         if (!skb) { /* implies iolock unlocked */
2117                 unix_state_lock(sk);
2118                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2119                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2120                     (sk->sk_shutdown & RCV_SHUTDOWN))
2121                         err = 0;
2122                 unix_state_unlock(sk);
2123                 goto out;
2124         }
2125
2126         if (wq_has_sleeper(&u->peer_wait))
2127                 wake_up_interruptible_sync_poll(&u->peer_wait,
2128                                                 EPOLLOUT | EPOLLWRNORM |
2129                                                 EPOLLWRBAND);
2130
2131         if (msg->msg_name)
2132                 unix_copy_addr(msg, skb->sk);
2133
2134         if (size > skb->len - skip)
2135                 size = skb->len - skip;
2136         else if (size < skb->len - skip)
2137                 msg->msg_flags |= MSG_TRUNC;
2138
2139         err = skb_copy_datagram_msg(skb, skip, msg, size);
2140         if (err)
2141                 goto out_free;
2142
2143         if (sock_flag(sk, SOCK_RCVTSTAMP))
2144                 __sock_recv_timestamp(msg, sk, skb);
2145
2146         memset(&scm, 0, sizeof(scm));
2147
2148         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2149         unix_set_secdata(&scm, skb);
2150
2151         if (!(flags & MSG_PEEK)) {
2152                 if (UNIXCB(skb).fp)
2153                         unix_detach_fds(&scm, skb);
2154
2155                 sk_peek_offset_bwd(sk, skb->len);
2156         } else {
2157                 /* It is questionable: on PEEK we could:
2158                    - do not return fds - good, but too simple 8)
2159                    - return fds, and do not return them on read (old strategy,
2160                      apparently wrong)
2161                    - clone fds (I chose it for now, it is the most universal
2162                      solution)
2163
2164                    POSIX 1003.1g does not actually define this clearly
2165                    at all. POSIX 1003.1g doesn't define a lot of things
2166                    clearly however!
2167
2168                 */
2169
2170                 sk_peek_offset_fwd(sk, size);
2171
2172                 if (UNIXCB(skb).fp)
2173                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2174         }
2175         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2176
2177         scm_recv(sock, msg, &scm, flags);
2178
2179 out_free:
2180         skb_free_datagram(sk, skb);
2181         mutex_unlock(&u->iolock);
2182 out:
2183         return err;
2184 }
2185
2186 /*
2187  *      Sleep until more data has arrived. But check for races..
2188  */
2189 static long unix_stream_data_wait(struct sock *sk, long timeo,
2190                                   struct sk_buff *last, unsigned int last_len,
2191                                   bool freezable)
2192 {
2193         struct sk_buff *tail;
2194         DEFINE_WAIT(wait);
2195
2196         unix_state_lock(sk);
2197
2198         for (;;) {
2199                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2200
2201                 tail = skb_peek_tail(&sk->sk_receive_queue);
2202                 if (tail != last ||
2203                     (tail && tail->len != last_len) ||
2204                     sk->sk_err ||
2205                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2206                     signal_pending(current) ||
2207                     !timeo)
2208                         break;
2209
2210                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2211                 unix_state_unlock(sk);
2212                 if (freezable)
2213                         timeo = freezable_schedule_timeout(timeo);
2214                 else
2215                         timeo = schedule_timeout(timeo);
2216                 unix_state_lock(sk);
2217
2218                 if (sock_flag(sk, SOCK_DEAD))
2219                         break;
2220
2221                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2222         }
2223
2224         finish_wait(sk_sleep(sk), &wait);
2225         unix_state_unlock(sk);
2226         return timeo;
2227 }
2228
2229 static unsigned int unix_skb_len(const struct sk_buff *skb)
2230 {
2231         return skb->len - UNIXCB(skb).consumed;
2232 }
2233
2234 struct unix_stream_read_state {
2235         int (*recv_actor)(struct sk_buff *, int, int,
2236                           struct unix_stream_read_state *);
2237         struct socket *socket;
2238         struct msghdr *msg;
2239         struct pipe_inode_info *pipe;
2240         size_t size;
2241         int flags;
2242         unsigned int splice_flags;
2243 };
2244
2245 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2246                                     bool freezable)
2247 {
2248         struct scm_cookie scm;
2249         struct socket *sock = state->socket;
2250         struct sock *sk = sock->sk;
2251         struct unix_sock *u = unix_sk(sk);
2252         int copied = 0;
2253         int flags = state->flags;
2254         int noblock = flags & MSG_DONTWAIT;
2255         bool check_creds = false;
2256         int target;
2257         int err = 0;
2258         long timeo;
2259         int skip;
2260         size_t size = state->size;
2261         unsigned int last_len;
2262
2263         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2264                 err = -EINVAL;
2265                 goto out;
2266         }
2267
2268         if (unlikely(flags & MSG_OOB)) {
2269                 err = -EOPNOTSUPP;
2270                 goto out;
2271         }
2272
2273         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2274         timeo = sock_rcvtimeo(sk, noblock);
2275
2276         memset(&scm, 0, sizeof(scm));
2277
2278         /* Lock the socket to prevent queue disordering
2279          * while sleeps in memcpy_tomsg
2280          */
2281         mutex_lock(&u->iolock);
2282
2283         skip = max(sk_peek_offset(sk, flags), 0);
2284
2285         do {
2286                 int chunk;
2287                 bool drop_skb;
2288                 struct sk_buff *skb, *last;
2289
2290 redo:
2291                 unix_state_lock(sk);
2292                 if (sock_flag(sk, SOCK_DEAD)) {
2293                         err = -ECONNRESET;
2294                         goto unlock;
2295                 }
2296                 last = skb = skb_peek(&sk->sk_receive_queue);
2297                 last_len = last ? last->len : 0;
2298 again:
2299                 if (skb == NULL) {
2300                         if (copied >= target)
2301                                 goto unlock;
2302
2303                         /*
2304                          *      POSIX 1003.1g mandates this order.
2305                          */
2306
2307                         err = sock_error(sk);
2308                         if (err)
2309                                 goto unlock;
2310                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2311                                 goto unlock;
2312
2313                         unix_state_unlock(sk);
2314                         if (!timeo) {
2315                                 err = -EAGAIN;
2316                                 break;
2317                         }
2318
2319                         mutex_unlock(&u->iolock);
2320
2321                         timeo = unix_stream_data_wait(sk, timeo, last,
2322                                                       last_len, freezable);
2323
2324                         if (signal_pending(current)) {
2325                                 err = sock_intr_errno(timeo);
2326                                 scm_destroy(&scm);
2327                                 goto out;
2328                         }
2329
2330                         mutex_lock(&u->iolock);
2331                         goto redo;
2332 unlock:
2333                         unix_state_unlock(sk);
2334                         break;
2335                 }
2336
2337                 while (skip >= unix_skb_len(skb)) {
2338                         skip -= unix_skb_len(skb);
2339                         last = skb;
2340                         last_len = skb->len;
2341                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2342                         if (!skb)
2343                                 goto again;
2344                 }
2345
2346                 unix_state_unlock(sk);
2347
2348                 if (check_creds) {
2349                         /* Never glue messages from different writers */
2350                         if (!unix_skb_scm_eq(skb, &scm))
2351                                 break;
2352                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2353                         /* Copy credentials */
2354                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2355                         unix_set_secdata(&scm, skb);
2356                         check_creds = true;
2357                 }
2358
2359                 /* Copy address just once */
2360                 if (state->msg && state->msg->msg_name) {
2361                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2362                                          state->msg->msg_name);
2363                         unix_copy_addr(state->msg, skb->sk);
2364                         sunaddr = NULL;
2365                 }
2366
2367                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2368                 skb_get(skb);
2369                 chunk = state->recv_actor(skb, skip, chunk, state);
2370                 drop_skb = !unix_skb_len(skb);
2371                 /* skb is only safe to use if !drop_skb */
2372                 consume_skb(skb);
2373                 if (chunk < 0) {
2374                         if (copied == 0)
2375                                 copied = -EFAULT;
2376                         break;
2377                 }
2378                 copied += chunk;
2379                 size -= chunk;
2380
2381                 if (drop_skb) {
2382                         /* the skb was touched by a concurrent reader;
2383                          * we should not expect anything from this skb
2384                          * anymore and assume it invalid - we can be
2385                          * sure it was dropped from the socket queue
2386                          *
2387                          * let's report a short read
2388                          */
2389                         err = 0;
2390                         break;
2391                 }
2392
2393                 /* Mark read part of skb as used */
2394                 if (!(flags & MSG_PEEK)) {
2395                         UNIXCB(skb).consumed += chunk;
2396
2397                         sk_peek_offset_bwd(sk, chunk);
2398
2399                         if (UNIXCB(skb).fp) {
2400                                 spin_lock(&sk->sk_receive_queue.lock);
2401                                 scm_stat_del(sk, skb);
2402                                 spin_unlock(&sk->sk_receive_queue.lock);
2403                                 unix_detach_fds(&scm, skb);
2404                         }
2405
2406                         if (unix_skb_len(skb))
2407                                 break;
2408
2409                         skb_unlink(skb, &sk->sk_receive_queue);
2410                         consume_skb(skb);
2411
2412                         if (scm.fp)
2413                                 break;
2414                 } else {
2415                         /* It is questionable, see note in unix_dgram_recvmsg.
2416                          */
2417                         if (UNIXCB(skb).fp)
2418                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2419
2420                         sk_peek_offset_fwd(sk, chunk);
2421
2422                         if (UNIXCB(skb).fp)
2423                                 break;
2424
2425                         skip = 0;
2426                         last = skb;
2427                         last_len = skb->len;
2428                         unix_state_lock(sk);
2429                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2430                         if (skb)
2431                                 goto again;
2432                         unix_state_unlock(sk);
2433                         break;
2434                 }
2435         } while (size);
2436
2437         mutex_unlock(&u->iolock);
2438         if (state->msg)
2439                 scm_recv(sock, state->msg, &scm, flags);
2440         else
2441                 scm_destroy(&scm);
2442 out:
2443         return copied ? : err;
2444 }
2445
2446 static int unix_stream_read_actor(struct sk_buff *skb,
2447                                   int skip, int chunk,
2448                                   struct unix_stream_read_state *state)
2449 {
2450         int ret;
2451
2452         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2453                                     state->msg, chunk);
2454         return ret ?: chunk;
2455 }
2456
2457 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2458                                size_t size, int flags)
2459 {
2460         struct unix_stream_read_state state = {
2461                 .recv_actor = unix_stream_read_actor,
2462                 .socket = sock,
2463                 .msg = msg,
2464                 .size = size,
2465                 .flags = flags
2466         };
2467
2468         return unix_stream_read_generic(&state, true);
2469 }
2470
2471 static int unix_stream_splice_actor(struct sk_buff *skb,
2472                                     int skip, int chunk,
2473                                     struct unix_stream_read_state *state)
2474 {
2475         return skb_splice_bits(skb, state->socket->sk,
2476                                UNIXCB(skb).consumed + skip,
2477                                state->pipe, chunk, state->splice_flags);
2478 }
2479
2480 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2481                                        struct pipe_inode_info *pipe,
2482                                        size_t size, unsigned int flags)
2483 {
2484         struct unix_stream_read_state state = {
2485                 .recv_actor = unix_stream_splice_actor,
2486                 .socket = sock,
2487                 .pipe = pipe,
2488                 .size = size,
2489                 .splice_flags = flags,
2490         };
2491
2492         if (unlikely(*ppos))
2493                 return -ESPIPE;
2494
2495         if (sock->file->f_flags & O_NONBLOCK ||
2496             flags & SPLICE_F_NONBLOCK)
2497                 state.flags = MSG_DONTWAIT;
2498
2499         return unix_stream_read_generic(&state, false);
2500 }
2501
2502 static int unix_shutdown(struct socket *sock, int mode)
2503 {
2504         struct sock *sk = sock->sk;
2505         struct sock *other;
2506
2507         if (mode < SHUT_RD || mode > SHUT_RDWR)
2508                 return -EINVAL;
2509         /* This maps:
2510          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2511          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2512          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2513          */
2514         ++mode;
2515
2516         unix_state_lock(sk);
2517         sk->sk_shutdown |= mode;
2518         other = unix_peer(sk);
2519         if (other)
2520                 sock_hold(other);
2521         unix_state_unlock(sk);
2522         sk->sk_state_change(sk);
2523
2524         if (other &&
2525                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2526
2527                 int peer_mode = 0;
2528
2529                 if (mode&RCV_SHUTDOWN)
2530                         peer_mode |= SEND_SHUTDOWN;
2531                 if (mode&SEND_SHUTDOWN)
2532                         peer_mode |= RCV_SHUTDOWN;
2533                 unix_state_lock(other);
2534                 other->sk_shutdown |= peer_mode;
2535                 unix_state_unlock(other);
2536                 other->sk_state_change(other);
2537                 if (peer_mode == SHUTDOWN_MASK)
2538                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2539                 else if (peer_mode & RCV_SHUTDOWN)
2540                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2541         }
2542         if (other)
2543                 sock_put(other);
2544
2545         return 0;
2546 }
2547
2548 long unix_inq_len(struct sock *sk)
2549 {
2550         struct sk_buff *skb;
2551         long amount = 0;
2552
2553         if (sk->sk_state == TCP_LISTEN)
2554                 return -EINVAL;
2555
2556         spin_lock(&sk->sk_receive_queue.lock);
2557         if (sk->sk_type == SOCK_STREAM ||
2558             sk->sk_type == SOCK_SEQPACKET) {
2559                 skb_queue_walk(&sk->sk_receive_queue, skb)
2560                         amount += unix_skb_len(skb);
2561         } else {
2562                 skb = skb_peek(&sk->sk_receive_queue);
2563                 if (skb)
2564                         amount = skb->len;
2565         }
2566         spin_unlock(&sk->sk_receive_queue.lock);
2567
2568         return amount;
2569 }
2570 EXPORT_SYMBOL_GPL(unix_inq_len);
2571
2572 long unix_outq_len(struct sock *sk)
2573 {
2574         return sk_wmem_alloc_get(sk);
2575 }
2576 EXPORT_SYMBOL_GPL(unix_outq_len);
2577
2578 static int unix_open_file(struct sock *sk)
2579 {
2580         struct path path;
2581         struct file *f;
2582         int fd;
2583
2584         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2585                 return -EPERM;
2586
2587         if (!smp_load_acquire(&unix_sk(sk)->addr))
2588                 return -ENOENT;
2589
2590         path = unix_sk(sk)->path;
2591         if (!path.dentry)
2592                 return -ENOENT;
2593
2594         path_get(&path);
2595
2596         fd = get_unused_fd_flags(O_CLOEXEC);
2597         if (fd < 0)
2598                 goto out;
2599
2600         f = dentry_open(&path, O_PATH, current_cred());
2601         if (IS_ERR(f)) {
2602                 put_unused_fd(fd);
2603                 fd = PTR_ERR(f);
2604                 goto out;
2605         }
2606
2607         fd_install(fd, f);
2608 out:
2609         path_put(&path);
2610
2611         return fd;
2612 }
2613
2614 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2615 {
2616         struct sock *sk = sock->sk;
2617         long amount = 0;
2618         int err;
2619
2620         switch (cmd) {
2621         case SIOCOUTQ:
2622                 amount = unix_outq_len(sk);
2623                 err = put_user(amount, (int __user *)arg);
2624                 break;
2625         case SIOCINQ:
2626                 amount = unix_inq_len(sk);
2627                 if (amount < 0)
2628                         err = amount;
2629                 else
2630                         err = put_user(amount, (int __user *)arg);
2631                 break;
2632         case SIOCUNIXFILE:
2633                 err = unix_open_file(sk);
2634                 break;
2635         default:
2636                 err = -ENOIOCTLCMD;
2637                 break;
2638         }
2639         return err;
2640 }
2641
2642 #ifdef CONFIG_COMPAT
2643 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2644 {
2645         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2646 }
2647 #endif
2648
2649 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2650 {
2651         struct sock *sk = sock->sk;
2652         __poll_t mask;
2653
2654         sock_poll_wait(file, sock, wait);
2655         mask = 0;
2656
2657         /* exceptional events? */
2658         if (sk->sk_err)
2659                 mask |= EPOLLERR;
2660         if (sk->sk_shutdown == SHUTDOWN_MASK)
2661                 mask |= EPOLLHUP;
2662         if (sk->sk_shutdown & RCV_SHUTDOWN)
2663                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2664
2665         /* readable? */
2666         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2667                 mask |= EPOLLIN | EPOLLRDNORM;
2668
2669         /* Connection-based need to check for termination and startup */
2670         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2671             sk->sk_state == TCP_CLOSE)
2672                 mask |= EPOLLHUP;
2673
2674         /*
2675          * we set writable also when the other side has shut down the
2676          * connection. This prevents stuck sockets.
2677          */
2678         if (unix_writable(sk))
2679                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2680
2681         return mask;
2682 }
2683
2684 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2685                                     poll_table *wait)
2686 {
2687         struct sock *sk = sock->sk, *other;
2688         unsigned int writable;
2689         __poll_t mask;
2690
2691         sock_poll_wait(file, sock, wait);
2692         mask = 0;
2693
2694         /* exceptional events? */
2695         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2696                 mask |= EPOLLERR |
2697                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2698
2699         if (sk->sk_shutdown & RCV_SHUTDOWN)
2700                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2701         if (sk->sk_shutdown == SHUTDOWN_MASK)
2702                 mask |= EPOLLHUP;
2703
2704         /* readable? */
2705         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2706                 mask |= EPOLLIN | EPOLLRDNORM;
2707
2708         /* Connection-based need to check for termination and startup */
2709         if (sk->sk_type == SOCK_SEQPACKET) {
2710                 if (sk->sk_state == TCP_CLOSE)
2711                         mask |= EPOLLHUP;
2712                 /* connection hasn't started yet? */
2713                 if (sk->sk_state == TCP_SYN_SENT)
2714                         return mask;
2715         }
2716
2717         /* No write status requested, avoid expensive OUT tests. */
2718         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2719                 return mask;
2720
2721         writable = unix_writable(sk);
2722         if (writable) {
2723                 unix_state_lock(sk);
2724
2725                 other = unix_peer(sk);
2726                 if (other && unix_peer(other) != sk &&
2727                     unix_recvq_full(other) &&
2728                     unix_dgram_peer_wake_me(sk, other))
2729                         writable = 0;
2730
2731                 unix_state_unlock(sk);
2732         }
2733
2734         if (writable)
2735                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2736         else
2737                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2738
2739         return mask;
2740 }
2741
2742 #ifdef CONFIG_PROC_FS
2743
2744 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2745
2746 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2747 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2748 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2749
2750 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2751 {
2752         unsigned long offset = get_offset(*pos);
2753         unsigned long bucket = get_bucket(*pos);
2754         struct sock *sk;
2755         unsigned long count = 0;
2756
2757         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2758                 if (sock_net(sk) != seq_file_net(seq))
2759                         continue;
2760                 if (++count == offset)
2761                         break;
2762         }
2763
2764         return sk;
2765 }
2766
2767 static struct sock *unix_next_socket(struct seq_file *seq,
2768                                      struct sock *sk,
2769                                      loff_t *pos)
2770 {
2771         unsigned long bucket;
2772
2773         while (sk > (struct sock *)SEQ_START_TOKEN) {
2774                 sk = sk_next(sk);
2775                 if (!sk)
2776                         goto next_bucket;
2777                 if (sock_net(sk) == seq_file_net(seq))
2778                         return sk;
2779         }
2780
2781         do {
2782                 sk = unix_from_bucket(seq, pos);
2783                 if (sk)
2784                         return sk;
2785
2786 next_bucket:
2787                 bucket = get_bucket(*pos) + 1;
2788                 *pos = set_bucket_offset(bucket, 1);
2789         } while (bucket < ARRAY_SIZE(unix_socket_table));
2790
2791         return NULL;
2792 }
2793
2794 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2795         __acquires(unix_table_lock)
2796 {
2797         spin_lock(&unix_table_lock);
2798
2799         if (!*pos)
2800                 return SEQ_START_TOKEN;
2801
2802         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2803                 return NULL;
2804
2805         return unix_next_socket(seq, NULL, pos);
2806 }
2807
2808 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2809 {
2810         ++*pos;
2811         return unix_next_socket(seq, v, pos);
2812 }
2813
2814 static void unix_seq_stop(struct seq_file *seq, void *v)
2815         __releases(unix_table_lock)
2816 {
2817         spin_unlock(&unix_table_lock);
2818 }
2819
2820 static int unix_seq_show(struct seq_file *seq, void *v)
2821 {
2822
2823         if (v == SEQ_START_TOKEN)
2824                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2825                          "Inode Path\n");
2826         else {
2827                 struct sock *s = v;
2828                 struct unix_sock *u = unix_sk(s);
2829                 unix_state_lock(s);
2830
2831                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2832                         s,
2833                         refcount_read(&s->sk_refcnt),
2834                         0,
2835                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2836                         s->sk_type,
2837                         s->sk_socket ?
2838                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2839                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2840                         sock_i_ino(s));
2841
2842                 if (u->addr) {  // under unix_table_lock here
2843                         int i, len;
2844                         seq_putc(seq, ' ');
2845
2846                         i = 0;
2847                         len = u->addr->len - sizeof(short);
2848                         if (!UNIX_ABSTRACT(s))
2849                                 len--;
2850                         else {
2851                                 seq_putc(seq, '@');
2852                                 i++;
2853                         }
2854                         for ( ; i < len; i++)
2855                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2856                                          '@');
2857                 }
2858                 unix_state_unlock(s);
2859                 seq_putc(seq, '\n');
2860         }
2861
2862         return 0;
2863 }
2864
2865 static const struct seq_operations unix_seq_ops = {
2866         .start  = unix_seq_start,
2867         .next   = unix_seq_next,
2868         .stop   = unix_seq_stop,
2869         .show   = unix_seq_show,
2870 };
2871 #endif
2872
2873 static const struct net_proto_family unix_family_ops = {
2874         .family = PF_UNIX,
2875         .create = unix_create,
2876         .owner  = THIS_MODULE,
2877 };
2878
2879
2880 static int __net_init unix_net_init(struct net *net)
2881 {
2882         int error = -ENOMEM;
2883
2884         net->unx.sysctl_max_dgram_qlen = 10;
2885         if (unix_sysctl_register(net))
2886                 goto out;
2887
2888 #ifdef CONFIG_PROC_FS
2889         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2890                         sizeof(struct seq_net_private))) {
2891                 unix_sysctl_unregister(net);
2892                 goto out;
2893         }
2894 #endif
2895         error = 0;
2896 out:
2897         return error;
2898 }
2899
2900 static void __net_exit unix_net_exit(struct net *net)
2901 {
2902         unix_sysctl_unregister(net);
2903         remove_proc_entry("unix", net->proc_net);
2904 }
2905
2906 static struct pernet_operations unix_net_ops = {
2907         .init = unix_net_init,
2908         .exit = unix_net_exit,
2909 };
2910
2911 static int __init af_unix_init(void)
2912 {
2913         int rc = -1;
2914
2915         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2916
2917         rc = proto_register(&unix_proto, 1);
2918         if (rc != 0) {
2919                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2920                 goto out;
2921         }
2922
2923         sock_register(&unix_family_ops);
2924         register_pernet_subsys(&unix_net_ops);
2925 out:
2926         return rc;
2927 }
2928
2929 static void __exit af_unix_exit(void)
2930 {
2931         sock_unregister(PF_UNIX);
2932         proto_unregister(&unix_proto);
2933         unregister_pernet_subsys(&unix_net_ops);
2934 }
2935
2936 /* Earlier than device_initcall() so that other drivers invoking
2937    request_module() don't end up in a loop when modprobe tries
2938    to use a UNIX socket. But later than subsys_initcall() because
2939    we depend on stuff initialised there */
2940 fs_initcall(af_unix_init);
2941 module_exit(af_unix_exit);
2942
2943 MODULE_LICENSE("GPL");
2944 MODULE_ALIAS_NETPROTO(PF_UNIX);