OSDN Git Service

block: Fix partition support for host aware zoned block devices
[tomoyo/tomoyo-test1.git] / net / unix / af_unix.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * NET4:        Implementation of BSD Unix domain sockets.
4  *
5  * Authors:     Alan Cox, <alan@lxorguk.ukuu.org.uk>
6  *
7  * Fixes:
8  *              Linus Torvalds  :       Assorted bug cures.
9  *              Niibe Yutaka    :       async I/O support.
10  *              Carsten Paeth   :       PF_UNIX check, address fixes.
11  *              Alan Cox        :       Limit size of allocated blocks.
12  *              Alan Cox        :       Fixed the stupid socketpair bug.
13  *              Alan Cox        :       BSD compatibility fine tuning.
14  *              Alan Cox        :       Fixed a bug in connect when interrupted.
15  *              Alan Cox        :       Sorted out a proper draft version of
16  *                                      file descriptor passing hacked up from
17  *                                      Mike Shaver's work.
18  *              Marty Leisner   :       Fixes to fd passing
19  *              Nick Nevin      :       recvmsg bugfix.
20  *              Alan Cox        :       Started proper garbage collector
21  *              Heiko EiBfeldt  :       Missing verify_area check
22  *              Alan Cox        :       Started POSIXisms
23  *              Andreas Schwab  :       Replace inode by dentry for proper
24  *                                      reference counting
25  *              Kirk Petersen   :       Made this a module
26  *          Christoph Rohland   :       Elegant non-blocking accept/connect algorithm.
27  *                                      Lots of bug fixes.
28  *           Alexey Kuznetosv   :       Repaired (I hope) bugs introduces
29  *                                      by above two patches.
30  *           Andrea Arcangeli   :       If possible we block in connect(2)
31  *                                      if the max backlog of the listen socket
32  *                                      is been reached. This won't break
33  *                                      old apps and it will avoid huge amount
34  *                                      of socks hashed (this for unix_gc()
35  *                                      performances reasons).
36  *                                      Security fix that limits the max
37  *                                      number of socks to 2*max_files and
38  *                                      the number of skb queueable in the
39  *                                      dgram receiver.
40  *              Artur Skawina   :       Hash function optimizations
41  *           Alexey Kuznetsov   :       Full scale SMP. Lot of bugs are introduced 8)
42  *            Malcolm Beattie   :       Set peercred for socketpair
43  *           Michal Ostrowski   :       Module initialization cleanup.
44  *           Arnaldo C. Melo    :       Remove MOD_{INC,DEC}_USE_COUNT,
45  *                                      the core infrastructure is doing that
46  *                                      for all net proto families now (2.5.69+)
47  *
48  * Known differences from reference BSD that was tested:
49  *
50  *      [TO FIX]
51  *      ECONNREFUSED is not returned from one end of a connected() socket to the
52  *              other the moment one end closes.
53  *      fstat() doesn't return st_dev=0, and give the blksize as high water mark
54  *              and a fake inode identifier (nor the BSD first socket fstat twice bug).
55  *      [NOT TO FIX]
56  *      accept() returns a path name even if the connecting socket has closed
57  *              in the meantime (BSD loses the path and gives up).
58  *      accept() returns 0 length path for an unbound connector. BSD returns 16
59  *              and a null first byte in the path (but not for gethost/peername - BSD bug ??)
60  *      socketpair(...SOCK_RAW..) doesn't panic the kernel.
61  *      BSD af_unix apparently has connect forgetting to block properly.
62  *              (need to check this with the POSIX spec in detail)
63  *
64  * Differences from 2.0.0-11-... (ANK)
65  *      Bug fixes and improvements.
66  *              - client shutdown killed server socket.
67  *              - removed all useless cli/sti pairs.
68  *
69  *      Semantic changes/extensions.
70  *              - generic control message passing.
71  *              - SCM_CREDENTIALS control message.
72  *              - "Abstract" (not FS based) socket bindings.
73  *                Abstract names are sequences of bytes (not zero terminated)
74  *                started by 0, so that this name space does not intersect
75  *                with BSD names.
76  */
77
78 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
79
80 #include <linux/module.h>
81 #include <linux/kernel.h>
82 #include <linux/signal.h>
83 #include <linux/sched/signal.h>
84 #include <linux/errno.h>
85 #include <linux/string.h>
86 #include <linux/stat.h>
87 #include <linux/dcache.h>
88 #include <linux/namei.h>
89 #include <linux/socket.h>
90 #include <linux/un.h>
91 #include <linux/fcntl.h>
92 #include <linux/termios.h>
93 #include <linux/sockios.h>
94 #include <linux/net.h>
95 #include <linux/in.h>
96 #include <linux/fs.h>
97 #include <linux/slab.h>
98 #include <linux/uaccess.h>
99 #include <linux/skbuff.h>
100 #include <linux/netdevice.h>
101 #include <net/net_namespace.h>
102 #include <net/sock.h>
103 #include <net/tcp_states.h>
104 #include <net/af_unix.h>
105 #include <linux/proc_fs.h>
106 #include <linux/seq_file.h>
107 #include <net/scm.h>
108 #include <linux/init.h>
109 #include <linux/poll.h>
110 #include <linux/rtnetlink.h>
111 #include <linux/mount.h>
112 #include <net/checksum.h>
113 #include <linux/security.h>
114 #include <linux/freezer.h>
115 #include <linux/file.h>
116
117 #include "scm.h"
118
119 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
120 EXPORT_SYMBOL_GPL(unix_socket_table);
121 DEFINE_SPINLOCK(unix_table_lock);
122 EXPORT_SYMBOL_GPL(unix_table_lock);
123 static atomic_long_t unix_nr_socks;
124
125
126 static struct hlist_head *unix_sockets_unbound(void *addr)
127 {
128         unsigned long hash = (unsigned long)addr;
129
130         hash ^= hash >> 16;
131         hash ^= hash >> 8;
132         hash %= UNIX_HASH_SIZE;
133         return &unix_socket_table[UNIX_HASH_SIZE + hash];
134 }
135
136 #define UNIX_ABSTRACT(sk)       (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
137
138 #ifdef CONFIG_SECURITY_NETWORK
139 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
140 {
141         UNIXCB(skb).secid = scm->secid;
142 }
143
144 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
145 {
146         scm->secid = UNIXCB(skb).secid;
147 }
148
149 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
150 {
151         return (scm->secid == UNIXCB(skb).secid);
152 }
153 #else
154 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
155 { }
156
157 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
158 { }
159
160 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
161 {
162         return true;
163 }
164 #endif /* CONFIG_SECURITY_NETWORK */
165
166 /*
167  *  SMP locking strategy:
168  *    hash table is protected with spinlock unix_table_lock
169  *    each socket state is protected by separate spin lock.
170  */
171
172 static inline unsigned int unix_hash_fold(__wsum n)
173 {
174         unsigned int hash = (__force unsigned int)csum_fold(n);
175
176         hash ^= hash>>8;
177         return hash&(UNIX_HASH_SIZE-1);
178 }
179
180 #define unix_peer(sk) (unix_sk(sk)->peer)
181
182 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
183 {
184         return unix_peer(osk) == sk;
185 }
186
187 static inline int unix_may_send(struct sock *sk, struct sock *osk)
188 {
189         return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
190 }
191
192 static inline int unix_recvq_full(const struct sock *sk)
193 {
194         return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
195 }
196
197 static inline int unix_recvq_full_lockless(const struct sock *sk)
198 {
199         return skb_queue_len_lockless(&sk->sk_receive_queue) >
200                 READ_ONCE(sk->sk_max_ack_backlog);
201 }
202
203 struct sock *unix_peer_get(struct sock *s)
204 {
205         struct sock *peer;
206
207         unix_state_lock(s);
208         peer = unix_peer(s);
209         if (peer)
210                 sock_hold(peer);
211         unix_state_unlock(s);
212         return peer;
213 }
214 EXPORT_SYMBOL_GPL(unix_peer_get);
215
216 static inline void unix_release_addr(struct unix_address *addr)
217 {
218         if (refcount_dec_and_test(&addr->refcnt))
219                 kfree(addr);
220 }
221
222 /*
223  *      Check unix socket name:
224  *              - should be not zero length.
225  *              - if started by not zero, should be NULL terminated (FS object)
226  *              - if started by zero, it is abstract name.
227  */
228
229 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
230 {
231         *hashp = 0;
232
233         if (len <= sizeof(short) || len > sizeof(*sunaddr))
234                 return -EINVAL;
235         if (!sunaddr || sunaddr->sun_family != AF_UNIX)
236                 return -EINVAL;
237         if (sunaddr->sun_path[0]) {
238                 /*
239                  * This may look like an off by one error but it is a bit more
240                  * subtle. 108 is the longest valid AF_UNIX path for a binding.
241                  * sun_path[108] doesn't as such exist.  However in kernel space
242                  * we are guaranteed that it is a valid memory location in our
243                  * kernel address buffer.
244                  */
245                 ((char *)sunaddr)[len] = 0;
246                 len = strlen(sunaddr->sun_path)+1+sizeof(short);
247                 return len;
248         }
249
250         *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
251         return len;
252 }
253
254 static void __unix_remove_socket(struct sock *sk)
255 {
256         sk_del_node_init(sk);
257 }
258
259 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
260 {
261         WARN_ON(!sk_unhashed(sk));
262         sk_add_node(sk, list);
263 }
264
265 static inline void unix_remove_socket(struct sock *sk)
266 {
267         spin_lock(&unix_table_lock);
268         __unix_remove_socket(sk);
269         spin_unlock(&unix_table_lock);
270 }
271
272 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
273 {
274         spin_lock(&unix_table_lock);
275         __unix_insert_socket(list, sk);
276         spin_unlock(&unix_table_lock);
277 }
278
279 static struct sock *__unix_find_socket_byname(struct net *net,
280                                               struct sockaddr_un *sunname,
281                                               int len, int type, unsigned int hash)
282 {
283         struct sock *s;
284
285         sk_for_each(s, &unix_socket_table[hash ^ type]) {
286                 struct unix_sock *u = unix_sk(s);
287
288                 if (!net_eq(sock_net(s), net))
289                         continue;
290
291                 if (u->addr->len == len &&
292                     !memcmp(u->addr->name, sunname, len))
293                         return s;
294         }
295         return NULL;
296 }
297
298 static inline struct sock *unix_find_socket_byname(struct net *net,
299                                                    struct sockaddr_un *sunname,
300                                                    int len, int type,
301                                                    unsigned int hash)
302 {
303         struct sock *s;
304
305         spin_lock(&unix_table_lock);
306         s = __unix_find_socket_byname(net, sunname, len, type, hash);
307         if (s)
308                 sock_hold(s);
309         spin_unlock(&unix_table_lock);
310         return s;
311 }
312
313 static struct sock *unix_find_socket_byinode(struct inode *i)
314 {
315         struct sock *s;
316
317         spin_lock(&unix_table_lock);
318         sk_for_each(s,
319                     &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
320                 struct dentry *dentry = unix_sk(s)->path.dentry;
321
322                 if (dentry && d_backing_inode(dentry) == i) {
323                         sock_hold(s);
324                         goto found;
325                 }
326         }
327         s = NULL;
328 found:
329         spin_unlock(&unix_table_lock);
330         return s;
331 }
332
333 /* Support code for asymmetrically connected dgram sockets
334  *
335  * If a datagram socket is connected to a socket not itself connected
336  * to the first socket (eg, /dev/log), clients may only enqueue more
337  * messages if the present receive queue of the server socket is not
338  * "too large". This means there's a second writeability condition
339  * poll and sendmsg need to test. The dgram recv code will do a wake
340  * up on the peer_wait wait queue of a socket upon reception of a
341  * datagram which needs to be propagated to sleeping would-be writers
342  * since these might not have sent anything so far. This can't be
343  * accomplished via poll_wait because the lifetime of the server
344  * socket might be less than that of its clients if these break their
345  * association with it or if the server socket is closed while clients
346  * are still connected to it and there's no way to inform "a polling
347  * implementation" that it should let go of a certain wait queue
348  *
349  * In order to propagate a wake up, a wait_queue_entry_t of the client
350  * socket is enqueued on the peer_wait queue of the server socket
351  * whose wake function does a wake_up on the ordinary client socket
352  * wait queue. This connection is established whenever a write (or
353  * poll for write) hit the flow control condition and broken when the
354  * association to the server socket is dissolved or after a wake up
355  * was relayed.
356  */
357
358 static int unix_dgram_peer_wake_relay(wait_queue_entry_t *q, unsigned mode, int flags,
359                                       void *key)
360 {
361         struct unix_sock *u;
362         wait_queue_head_t *u_sleep;
363
364         u = container_of(q, struct unix_sock, peer_wake);
365
366         __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
367                             q);
368         u->peer_wake.private = NULL;
369
370         /* relaying can only happen while the wq still exists */
371         u_sleep = sk_sleep(&u->sk);
372         if (u_sleep)
373                 wake_up_interruptible_poll(u_sleep, key_to_poll(key));
374
375         return 0;
376 }
377
378 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
379 {
380         struct unix_sock *u, *u_other;
381         int rc;
382
383         u = unix_sk(sk);
384         u_other = unix_sk(other);
385         rc = 0;
386         spin_lock(&u_other->peer_wait.lock);
387
388         if (!u->peer_wake.private) {
389                 u->peer_wake.private = other;
390                 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
391
392                 rc = 1;
393         }
394
395         spin_unlock(&u_other->peer_wait.lock);
396         return rc;
397 }
398
399 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400                                             struct sock *other)
401 {
402         struct unix_sock *u, *u_other;
403
404         u = unix_sk(sk);
405         u_other = unix_sk(other);
406         spin_lock(&u_other->peer_wait.lock);
407
408         if (u->peer_wake.private == other) {
409                 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
410                 u->peer_wake.private = NULL;
411         }
412
413         spin_unlock(&u_other->peer_wait.lock);
414 }
415
416 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417                                                    struct sock *other)
418 {
419         unix_dgram_peer_wake_disconnect(sk, other);
420         wake_up_interruptible_poll(sk_sleep(sk),
421                                    EPOLLOUT |
422                                    EPOLLWRNORM |
423                                    EPOLLWRBAND);
424 }
425
426 /* preconditions:
427  *      - unix_peer(sk) == other
428  *      - association is stable
429  */
430 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
431 {
432         int connected;
433
434         connected = unix_dgram_peer_wake_connect(sk, other);
435
436         /* If other is SOCK_DEAD, we want to make sure we signal
437          * POLLOUT, such that a subsequent write() can get a
438          * -ECONNREFUSED. Otherwise, if we haven't queued any skbs
439          * to other and its full, we will hang waiting for POLLOUT.
440          */
441         if (unix_recvq_full(other) && !sock_flag(other, SOCK_DEAD))
442                 return 1;
443
444         if (connected)
445                 unix_dgram_peer_wake_disconnect(sk, other);
446
447         return 0;
448 }
449
450 static int unix_writable(const struct sock *sk)
451 {
452         return sk->sk_state != TCP_LISTEN &&
453                (refcount_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
454 }
455
456 static void unix_write_space(struct sock *sk)
457 {
458         struct socket_wq *wq;
459
460         rcu_read_lock();
461         if (unix_writable(sk)) {
462                 wq = rcu_dereference(sk->sk_wq);
463                 if (skwq_has_sleeper(wq))
464                         wake_up_interruptible_sync_poll(&wq->wait,
465                                 EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND);
466                 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
467         }
468         rcu_read_unlock();
469 }
470
471 /* When dgram socket disconnects (or changes its peer), we clear its receive
472  * queue of packets arrived from previous peer. First, it allows to do
473  * flow control based only on wmem_alloc; second, sk connected to peer
474  * may receive messages only from that peer. */
475 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
476 {
477         if (!skb_queue_empty(&sk->sk_receive_queue)) {
478                 skb_queue_purge(&sk->sk_receive_queue);
479                 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
480
481                 /* If one link of bidirectional dgram pipe is disconnected,
482                  * we signal error. Messages are lost. Do not make this,
483                  * when peer was not connected to us.
484                  */
485                 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
486                         other->sk_err = ECONNRESET;
487                         other->sk_error_report(other);
488                 }
489         }
490 }
491
492 static void unix_sock_destructor(struct sock *sk)
493 {
494         struct unix_sock *u = unix_sk(sk);
495
496         skb_queue_purge(&sk->sk_receive_queue);
497
498         WARN_ON(refcount_read(&sk->sk_wmem_alloc));
499         WARN_ON(!sk_unhashed(sk));
500         WARN_ON(sk->sk_socket);
501         if (!sock_flag(sk, SOCK_DEAD)) {
502                 pr_info("Attempt to release alive unix socket: %p\n", sk);
503                 return;
504         }
505
506         if (u->addr)
507                 unix_release_addr(u->addr);
508
509         atomic_long_dec(&unix_nr_socks);
510         local_bh_disable();
511         sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
512         local_bh_enable();
513 #ifdef UNIX_REFCNT_DEBUG
514         pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
515                 atomic_long_read(&unix_nr_socks));
516 #endif
517 }
518
519 static void unix_release_sock(struct sock *sk, int embrion)
520 {
521         struct unix_sock *u = unix_sk(sk);
522         struct path path;
523         struct sock *skpair;
524         struct sk_buff *skb;
525         int state;
526
527         unix_remove_socket(sk);
528
529         /* Clear state */
530         unix_state_lock(sk);
531         sock_orphan(sk);
532         sk->sk_shutdown = SHUTDOWN_MASK;
533         path         = u->path;
534         u->path.dentry = NULL;
535         u->path.mnt = NULL;
536         state = sk->sk_state;
537         sk->sk_state = TCP_CLOSE;
538         unix_state_unlock(sk);
539
540         wake_up_interruptible_all(&u->peer_wait);
541
542         skpair = unix_peer(sk);
543
544         if (skpair != NULL) {
545                 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
546                         unix_state_lock(skpair);
547                         /* No more writes */
548                         skpair->sk_shutdown = SHUTDOWN_MASK;
549                         if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
550                                 skpair->sk_err = ECONNRESET;
551                         unix_state_unlock(skpair);
552                         skpair->sk_state_change(skpair);
553                         sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
554                 }
555
556                 unix_dgram_peer_wake_disconnect(sk, skpair);
557                 sock_put(skpair); /* It may now die */
558                 unix_peer(sk) = NULL;
559         }
560
561         /* Try to flush out this socket. Throw out buffers at least */
562
563         while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
564                 if (state == TCP_LISTEN)
565                         unix_release_sock(skb->sk, 1);
566                 /* passed fds are erased in the kfree_skb hook        */
567                 UNIXCB(skb).consumed = skb->len;
568                 kfree_skb(skb);
569         }
570
571         if (path.dentry)
572                 path_put(&path);
573
574         sock_put(sk);
575
576         /* ---- Socket is dead now and most probably destroyed ---- */
577
578         /*
579          * Fixme: BSD difference: In BSD all sockets connected to us get
580          *        ECONNRESET and we die on the spot. In Linux we behave
581          *        like files and pipes do and wait for the last
582          *        dereference.
583          *
584          * Can't we simply set sock->err?
585          *
586          *        What the above comment does talk about? --ANK(980817)
587          */
588
589         if (unix_tot_inflight)
590                 unix_gc();              /* Garbage collect fds */
591 }
592
593 static void init_peercred(struct sock *sk)
594 {
595         put_pid(sk->sk_peer_pid);
596         if (sk->sk_peer_cred)
597                 put_cred(sk->sk_peer_cred);
598         sk->sk_peer_pid  = get_pid(task_tgid(current));
599         sk->sk_peer_cred = get_current_cred();
600 }
601
602 static void copy_peercred(struct sock *sk, struct sock *peersk)
603 {
604         put_pid(sk->sk_peer_pid);
605         if (sk->sk_peer_cred)
606                 put_cred(sk->sk_peer_cred);
607         sk->sk_peer_pid  = get_pid(peersk->sk_peer_pid);
608         sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
609 }
610
611 static int unix_listen(struct socket *sock, int backlog)
612 {
613         int err;
614         struct sock *sk = sock->sk;
615         struct unix_sock *u = unix_sk(sk);
616         struct pid *old_pid = NULL;
617
618         err = -EOPNOTSUPP;
619         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
620                 goto out;       /* Only stream/seqpacket sockets accept */
621         err = -EINVAL;
622         if (!u->addr)
623                 goto out;       /* No listens on an unbound socket */
624         unix_state_lock(sk);
625         if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
626                 goto out_unlock;
627         if (backlog > sk->sk_max_ack_backlog)
628                 wake_up_interruptible_all(&u->peer_wait);
629         sk->sk_max_ack_backlog  = backlog;
630         sk->sk_state            = TCP_LISTEN;
631         /* set credentials so connect can copy them */
632         init_peercred(sk);
633         err = 0;
634
635 out_unlock:
636         unix_state_unlock(sk);
637         put_pid(old_pid);
638 out:
639         return err;
640 }
641
642 static int unix_release(struct socket *);
643 static int unix_bind(struct socket *, struct sockaddr *, int);
644 static int unix_stream_connect(struct socket *, struct sockaddr *,
645                                int addr_len, int flags);
646 static int unix_socketpair(struct socket *, struct socket *);
647 static int unix_accept(struct socket *, struct socket *, int, bool);
648 static int unix_getname(struct socket *, struct sockaddr *, int);
649 static __poll_t unix_poll(struct file *, struct socket *, poll_table *);
650 static __poll_t unix_dgram_poll(struct file *, struct socket *,
651                                     poll_table *);
652 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
653 #ifdef CONFIG_COMPAT
654 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
655 #endif
656 static int unix_shutdown(struct socket *, int);
657 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
658 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
659 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
660                                     size_t size, int flags);
661 static ssize_t unix_stream_splice_read(struct socket *,  loff_t *ppos,
662                                        struct pipe_inode_info *, size_t size,
663                                        unsigned int flags);
664 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
665 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
666 static int unix_dgram_connect(struct socket *, struct sockaddr *,
667                               int, int);
668 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
669 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
670                                   int);
671
672 static int unix_set_peek_off(struct sock *sk, int val)
673 {
674         struct unix_sock *u = unix_sk(sk);
675
676         if (mutex_lock_interruptible(&u->iolock))
677                 return -EINTR;
678
679         sk->sk_peek_off = val;
680         mutex_unlock(&u->iolock);
681
682         return 0;
683 }
684
685 static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
686 {
687         struct sock *sk = sock->sk;
688         struct unix_sock *u;
689
690         if (sk) {
691                 u = unix_sk(sock->sk);
692                 seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds));
693         }
694 }
695
696 static const struct proto_ops unix_stream_ops = {
697         .family =       PF_UNIX,
698         .owner =        THIS_MODULE,
699         .release =      unix_release,
700         .bind =         unix_bind,
701         .connect =      unix_stream_connect,
702         .socketpair =   unix_socketpair,
703         .accept =       unix_accept,
704         .getname =      unix_getname,
705         .poll =         unix_poll,
706         .ioctl =        unix_ioctl,
707 #ifdef CONFIG_COMPAT
708         .compat_ioctl = unix_compat_ioctl,
709 #endif
710         .listen =       unix_listen,
711         .shutdown =     unix_shutdown,
712         .setsockopt =   sock_no_setsockopt,
713         .getsockopt =   sock_no_getsockopt,
714         .sendmsg =      unix_stream_sendmsg,
715         .recvmsg =      unix_stream_recvmsg,
716         .mmap =         sock_no_mmap,
717         .sendpage =     unix_stream_sendpage,
718         .splice_read =  unix_stream_splice_read,
719         .set_peek_off = unix_set_peek_off,
720         .show_fdinfo =  unix_show_fdinfo,
721 };
722
723 static const struct proto_ops unix_dgram_ops = {
724         .family =       PF_UNIX,
725         .owner =        THIS_MODULE,
726         .release =      unix_release,
727         .bind =         unix_bind,
728         .connect =      unix_dgram_connect,
729         .socketpair =   unix_socketpair,
730         .accept =       sock_no_accept,
731         .getname =      unix_getname,
732         .poll =         unix_dgram_poll,
733         .ioctl =        unix_ioctl,
734 #ifdef CONFIG_COMPAT
735         .compat_ioctl = unix_compat_ioctl,
736 #endif
737         .listen =       sock_no_listen,
738         .shutdown =     unix_shutdown,
739         .setsockopt =   sock_no_setsockopt,
740         .getsockopt =   sock_no_getsockopt,
741         .sendmsg =      unix_dgram_sendmsg,
742         .recvmsg =      unix_dgram_recvmsg,
743         .mmap =         sock_no_mmap,
744         .sendpage =     sock_no_sendpage,
745         .set_peek_off = unix_set_peek_off,
746         .show_fdinfo =  unix_show_fdinfo,
747 };
748
749 static const struct proto_ops unix_seqpacket_ops = {
750         .family =       PF_UNIX,
751         .owner =        THIS_MODULE,
752         .release =      unix_release,
753         .bind =         unix_bind,
754         .connect =      unix_stream_connect,
755         .socketpair =   unix_socketpair,
756         .accept =       unix_accept,
757         .getname =      unix_getname,
758         .poll =         unix_dgram_poll,
759         .ioctl =        unix_ioctl,
760 #ifdef CONFIG_COMPAT
761         .compat_ioctl = unix_compat_ioctl,
762 #endif
763         .listen =       unix_listen,
764         .shutdown =     unix_shutdown,
765         .setsockopt =   sock_no_setsockopt,
766         .getsockopt =   sock_no_getsockopt,
767         .sendmsg =      unix_seqpacket_sendmsg,
768         .recvmsg =      unix_seqpacket_recvmsg,
769         .mmap =         sock_no_mmap,
770         .sendpage =     sock_no_sendpage,
771         .set_peek_off = unix_set_peek_off,
772         .show_fdinfo =  unix_show_fdinfo,
773 };
774
775 static struct proto unix_proto = {
776         .name                   = "UNIX",
777         .owner                  = THIS_MODULE,
778         .obj_size               = sizeof(struct unix_sock),
779 };
780
781 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
782 {
783         struct sock *sk = NULL;
784         struct unix_sock *u;
785
786         atomic_long_inc(&unix_nr_socks);
787         if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
788                 goto out;
789
790         sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
791         if (!sk)
792                 goto out;
793
794         sock_init_data(sock, sk);
795
796         sk->sk_allocation       = GFP_KERNEL_ACCOUNT;
797         sk->sk_write_space      = unix_write_space;
798         sk->sk_max_ack_backlog  = net->unx.sysctl_max_dgram_qlen;
799         sk->sk_destruct         = unix_sock_destructor;
800         u         = unix_sk(sk);
801         u->path.dentry = NULL;
802         u->path.mnt = NULL;
803         spin_lock_init(&u->lock);
804         atomic_long_set(&u->inflight, 0);
805         INIT_LIST_HEAD(&u->link);
806         mutex_init(&u->iolock); /* single task reading lock */
807         mutex_init(&u->bindlock); /* single task binding lock */
808         init_waitqueue_head(&u->peer_wait);
809         init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
810         memset(&u->scm_stat, 0, sizeof(struct scm_stat));
811         unix_insert_socket(unix_sockets_unbound(sk), sk);
812 out:
813         if (sk == NULL)
814                 atomic_long_dec(&unix_nr_socks);
815         else {
816                 local_bh_disable();
817                 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
818                 local_bh_enable();
819         }
820         return sk;
821 }
822
823 static int unix_create(struct net *net, struct socket *sock, int protocol,
824                        int kern)
825 {
826         if (protocol && protocol != PF_UNIX)
827                 return -EPROTONOSUPPORT;
828
829         sock->state = SS_UNCONNECTED;
830
831         switch (sock->type) {
832         case SOCK_STREAM:
833                 sock->ops = &unix_stream_ops;
834                 break;
835                 /*
836                  *      Believe it or not BSD has AF_UNIX, SOCK_RAW though
837                  *      nothing uses it.
838                  */
839         case SOCK_RAW:
840                 sock->type = SOCK_DGRAM;
841                 /* fall through */
842         case SOCK_DGRAM:
843                 sock->ops = &unix_dgram_ops;
844                 break;
845         case SOCK_SEQPACKET:
846                 sock->ops = &unix_seqpacket_ops;
847                 break;
848         default:
849                 return -ESOCKTNOSUPPORT;
850         }
851
852         return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
853 }
854
855 static int unix_release(struct socket *sock)
856 {
857         struct sock *sk = sock->sk;
858
859         if (!sk)
860                 return 0;
861
862         unix_release_sock(sk, 0);
863         sock->sk = NULL;
864
865         return 0;
866 }
867
868 static int unix_autobind(struct socket *sock)
869 {
870         struct sock *sk = sock->sk;
871         struct net *net = sock_net(sk);
872         struct unix_sock *u = unix_sk(sk);
873         static u32 ordernum = 1;
874         struct unix_address *addr;
875         int err;
876         unsigned int retries = 0;
877
878         err = mutex_lock_interruptible(&u->bindlock);
879         if (err)
880                 return err;
881
882         err = 0;
883         if (u->addr)
884                 goto out;
885
886         err = -ENOMEM;
887         addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
888         if (!addr)
889                 goto out;
890
891         addr->name->sun_family = AF_UNIX;
892         refcount_set(&addr->refcnt, 1);
893
894 retry:
895         addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
896         addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
897
898         spin_lock(&unix_table_lock);
899         ordernum = (ordernum+1)&0xFFFFF;
900
901         if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
902                                       addr->hash)) {
903                 spin_unlock(&unix_table_lock);
904                 /*
905                  * __unix_find_socket_byname() may take long time if many names
906                  * are already in use.
907                  */
908                 cond_resched();
909                 /* Give up if all names seems to be in use. */
910                 if (retries++ == 0xFFFFF) {
911                         err = -ENOSPC;
912                         kfree(addr);
913                         goto out;
914                 }
915                 goto retry;
916         }
917         addr->hash ^= sk->sk_type;
918
919         __unix_remove_socket(sk);
920         smp_store_release(&u->addr, addr);
921         __unix_insert_socket(&unix_socket_table[addr->hash], sk);
922         spin_unlock(&unix_table_lock);
923         err = 0;
924
925 out:    mutex_unlock(&u->bindlock);
926         return err;
927 }
928
929 static struct sock *unix_find_other(struct net *net,
930                                     struct sockaddr_un *sunname, int len,
931                                     int type, unsigned int hash, int *error)
932 {
933         struct sock *u;
934         struct path path;
935         int err = 0;
936
937         if (sunname->sun_path[0]) {
938                 struct inode *inode;
939                 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
940                 if (err)
941                         goto fail;
942                 inode = d_backing_inode(path.dentry);
943                 err = inode_permission(inode, MAY_WRITE);
944                 if (err)
945                         goto put_fail;
946
947                 err = -ECONNREFUSED;
948                 if (!S_ISSOCK(inode->i_mode))
949                         goto put_fail;
950                 u = unix_find_socket_byinode(inode);
951                 if (!u)
952                         goto put_fail;
953
954                 if (u->sk_type == type)
955                         touch_atime(&path);
956
957                 path_put(&path);
958
959                 err = -EPROTOTYPE;
960                 if (u->sk_type != type) {
961                         sock_put(u);
962                         goto fail;
963                 }
964         } else {
965                 err = -ECONNREFUSED;
966                 u = unix_find_socket_byname(net, sunname, len, type, hash);
967                 if (u) {
968                         struct dentry *dentry;
969                         dentry = unix_sk(u)->path.dentry;
970                         if (dentry)
971                                 touch_atime(&unix_sk(u)->path);
972                 } else
973                         goto fail;
974         }
975         return u;
976
977 put_fail:
978         path_put(&path);
979 fail:
980         *error = err;
981         return NULL;
982 }
983
984 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
985 {
986         struct dentry *dentry;
987         struct path path;
988         int err = 0;
989         /*
990          * Get the parent directory, calculate the hash for last
991          * component.
992          */
993         dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
994         err = PTR_ERR(dentry);
995         if (IS_ERR(dentry))
996                 return err;
997
998         /*
999          * All right, let's create it.
1000          */
1001         err = security_path_mknod(&path, dentry, mode, 0);
1002         if (!err) {
1003                 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
1004                 if (!err) {
1005                         res->mnt = mntget(path.mnt);
1006                         res->dentry = dget(dentry);
1007                 }
1008         }
1009         done_path_create(&path, dentry);
1010         return err;
1011 }
1012
1013 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
1014 {
1015         struct sock *sk = sock->sk;
1016         struct net *net = sock_net(sk);
1017         struct unix_sock *u = unix_sk(sk);
1018         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1019         char *sun_path = sunaddr->sun_path;
1020         int err;
1021         unsigned int hash;
1022         struct unix_address *addr;
1023         struct hlist_head *list;
1024         struct path path = { };
1025
1026         err = -EINVAL;
1027         if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1028             sunaddr->sun_family != AF_UNIX)
1029                 goto out;
1030
1031         if (addr_len == sizeof(short)) {
1032                 err = unix_autobind(sock);
1033                 goto out;
1034         }
1035
1036         err = unix_mkname(sunaddr, addr_len, &hash);
1037         if (err < 0)
1038                 goto out;
1039         addr_len = err;
1040
1041         if (sun_path[0]) {
1042                 umode_t mode = S_IFSOCK |
1043                        (SOCK_INODE(sock)->i_mode & ~current_umask());
1044                 err = unix_mknod(sun_path, mode, &path);
1045                 if (err) {
1046                         if (err == -EEXIST)
1047                                 err = -EADDRINUSE;
1048                         goto out;
1049                 }
1050         }
1051
1052         err = mutex_lock_interruptible(&u->bindlock);
1053         if (err)
1054                 goto out_put;
1055
1056         err = -EINVAL;
1057         if (u->addr)
1058                 goto out_up;
1059
1060         err = -ENOMEM;
1061         addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1062         if (!addr)
1063                 goto out_up;
1064
1065         memcpy(addr->name, sunaddr, addr_len);
1066         addr->len = addr_len;
1067         addr->hash = hash ^ sk->sk_type;
1068         refcount_set(&addr->refcnt, 1);
1069
1070         if (sun_path[0]) {
1071                 addr->hash = UNIX_HASH_SIZE;
1072                 hash = d_backing_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1073                 spin_lock(&unix_table_lock);
1074                 u->path = path;
1075                 list = &unix_socket_table[hash];
1076         } else {
1077                 spin_lock(&unix_table_lock);
1078                 err = -EADDRINUSE;
1079                 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1080                                               sk->sk_type, hash)) {
1081                         unix_release_addr(addr);
1082                         goto out_unlock;
1083                 }
1084
1085                 list = &unix_socket_table[addr->hash];
1086         }
1087
1088         err = 0;
1089         __unix_remove_socket(sk);
1090         smp_store_release(&u->addr, addr);
1091         __unix_insert_socket(list, sk);
1092
1093 out_unlock:
1094         spin_unlock(&unix_table_lock);
1095 out_up:
1096         mutex_unlock(&u->bindlock);
1097 out_put:
1098         if (err)
1099                 path_put(&path);
1100 out:
1101         return err;
1102 }
1103
1104 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1105 {
1106         if (unlikely(sk1 == sk2) || !sk2) {
1107                 unix_state_lock(sk1);
1108                 return;
1109         }
1110         if (sk1 < sk2) {
1111                 unix_state_lock(sk1);
1112                 unix_state_lock_nested(sk2);
1113         } else {
1114                 unix_state_lock(sk2);
1115                 unix_state_lock_nested(sk1);
1116         }
1117 }
1118
1119 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1120 {
1121         if (unlikely(sk1 == sk2) || !sk2) {
1122                 unix_state_unlock(sk1);
1123                 return;
1124         }
1125         unix_state_unlock(sk1);
1126         unix_state_unlock(sk2);
1127 }
1128
1129 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1130                               int alen, int flags)
1131 {
1132         struct sock *sk = sock->sk;
1133         struct net *net = sock_net(sk);
1134         struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1135         struct sock *other;
1136         unsigned int hash;
1137         int err;
1138
1139         err = -EINVAL;
1140         if (alen < offsetofend(struct sockaddr, sa_family))
1141                 goto out;
1142
1143         if (addr->sa_family != AF_UNSPEC) {
1144                 err = unix_mkname(sunaddr, alen, &hash);
1145                 if (err < 0)
1146                         goto out;
1147                 alen = err;
1148
1149                 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1150                     !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1151                         goto out;
1152
1153 restart:
1154                 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1155                 if (!other)
1156                         goto out;
1157
1158                 unix_state_double_lock(sk, other);
1159
1160                 /* Apparently VFS overslept socket death. Retry. */
1161                 if (sock_flag(other, SOCK_DEAD)) {
1162                         unix_state_double_unlock(sk, other);
1163                         sock_put(other);
1164                         goto restart;
1165                 }
1166
1167                 err = -EPERM;
1168                 if (!unix_may_send(sk, other))
1169                         goto out_unlock;
1170
1171                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1172                 if (err)
1173                         goto out_unlock;
1174
1175         } else {
1176                 /*
1177                  *      1003.1g breaking connected state with AF_UNSPEC
1178                  */
1179                 other = NULL;
1180                 unix_state_double_lock(sk, other);
1181         }
1182
1183         /*
1184          * If it was connected, reconnect.
1185          */
1186         if (unix_peer(sk)) {
1187                 struct sock *old_peer = unix_peer(sk);
1188                 unix_peer(sk) = other;
1189                 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1190
1191                 unix_state_double_unlock(sk, other);
1192
1193                 if (other != old_peer)
1194                         unix_dgram_disconnected(sk, old_peer);
1195                 sock_put(old_peer);
1196         } else {
1197                 unix_peer(sk) = other;
1198                 unix_state_double_unlock(sk, other);
1199         }
1200         return 0;
1201
1202 out_unlock:
1203         unix_state_double_unlock(sk, other);
1204         sock_put(other);
1205 out:
1206         return err;
1207 }
1208
1209 static long unix_wait_for_peer(struct sock *other, long timeo)
1210 {
1211         struct unix_sock *u = unix_sk(other);
1212         int sched;
1213         DEFINE_WAIT(wait);
1214
1215         prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1216
1217         sched = !sock_flag(other, SOCK_DEAD) &&
1218                 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1219                 unix_recvq_full(other);
1220
1221         unix_state_unlock(other);
1222
1223         if (sched)
1224                 timeo = schedule_timeout(timeo);
1225
1226         finish_wait(&u->peer_wait, &wait);
1227         return timeo;
1228 }
1229
1230 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1231                                int addr_len, int flags)
1232 {
1233         struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1234         struct sock *sk = sock->sk;
1235         struct net *net = sock_net(sk);
1236         struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1237         struct sock *newsk = NULL;
1238         struct sock *other = NULL;
1239         struct sk_buff *skb = NULL;
1240         unsigned int hash;
1241         int st;
1242         int err;
1243         long timeo;
1244
1245         err = unix_mkname(sunaddr, addr_len, &hash);
1246         if (err < 0)
1247                 goto out;
1248         addr_len = err;
1249
1250         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1251             (err = unix_autobind(sock)) != 0)
1252                 goto out;
1253
1254         timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1255
1256         /* First of all allocate resources.
1257            If we will make it after state is locked,
1258            we will have to recheck all again in any case.
1259          */
1260
1261         err = -ENOMEM;
1262
1263         /* create new sock for complete connection */
1264         newsk = unix_create1(sock_net(sk), NULL, 0);
1265         if (newsk == NULL)
1266                 goto out;
1267
1268         /* Allocate skb for sending to listening sock */
1269         skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1270         if (skb == NULL)
1271                 goto out;
1272
1273 restart:
1274         /*  Find listening sock. */
1275         other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1276         if (!other)
1277                 goto out;
1278
1279         /* Latch state of peer */
1280         unix_state_lock(other);
1281
1282         /* Apparently VFS overslept socket death. Retry. */
1283         if (sock_flag(other, SOCK_DEAD)) {
1284                 unix_state_unlock(other);
1285                 sock_put(other);
1286                 goto restart;
1287         }
1288
1289         err = -ECONNREFUSED;
1290         if (other->sk_state != TCP_LISTEN)
1291                 goto out_unlock;
1292         if (other->sk_shutdown & RCV_SHUTDOWN)
1293                 goto out_unlock;
1294
1295         if (unix_recvq_full(other)) {
1296                 err = -EAGAIN;
1297                 if (!timeo)
1298                         goto out_unlock;
1299
1300                 timeo = unix_wait_for_peer(other, timeo);
1301
1302                 err = sock_intr_errno(timeo);
1303                 if (signal_pending(current))
1304                         goto out;
1305                 sock_put(other);
1306                 goto restart;
1307         }
1308
1309         /* Latch our state.
1310
1311            It is tricky place. We need to grab our state lock and cannot
1312            drop lock on peer. It is dangerous because deadlock is
1313            possible. Connect to self case and simultaneous
1314            attempt to connect are eliminated by checking socket
1315            state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1316            check this before attempt to grab lock.
1317
1318            Well, and we have to recheck the state after socket locked.
1319          */
1320         st = sk->sk_state;
1321
1322         switch (st) {
1323         case TCP_CLOSE:
1324                 /* This is ok... continue with connect */
1325                 break;
1326         case TCP_ESTABLISHED:
1327                 /* Socket is already connected */
1328                 err = -EISCONN;
1329                 goto out_unlock;
1330         default:
1331                 err = -EINVAL;
1332                 goto out_unlock;
1333         }
1334
1335         unix_state_lock_nested(sk);
1336
1337         if (sk->sk_state != st) {
1338                 unix_state_unlock(sk);
1339                 unix_state_unlock(other);
1340                 sock_put(other);
1341                 goto restart;
1342         }
1343
1344         err = security_unix_stream_connect(sk, other, newsk);
1345         if (err) {
1346                 unix_state_unlock(sk);
1347                 goto out_unlock;
1348         }
1349
1350         /* The way is open! Fastly set all the necessary fields... */
1351
1352         sock_hold(sk);
1353         unix_peer(newsk)        = sk;
1354         newsk->sk_state         = TCP_ESTABLISHED;
1355         newsk->sk_type          = sk->sk_type;
1356         init_peercred(newsk);
1357         newu = unix_sk(newsk);
1358         RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1359         otheru = unix_sk(other);
1360
1361         /* copy address information from listening to new sock
1362          *
1363          * The contents of *(otheru->addr) and otheru->path
1364          * are seen fully set up here, since we have found
1365          * otheru in hash under unix_table_lock.  Insertion
1366          * into the hash chain we'd found it in had been done
1367          * in an earlier critical area protected by unix_table_lock,
1368          * the same one where we'd set *(otheru->addr) contents,
1369          * as well as otheru->path and otheru->addr itself.
1370          *
1371          * Using smp_store_release() here to set newu->addr
1372          * is enough to make those stores, as well as stores
1373          * to newu->path visible to anyone who gets newu->addr
1374          * by smp_load_acquire().  IOW, the same warranties
1375          * as for unix_sock instances bound in unix_bind() or
1376          * in unix_autobind().
1377          */
1378         if (otheru->path.dentry) {
1379                 path_get(&otheru->path);
1380                 newu->path = otheru->path;
1381         }
1382         refcount_inc(&otheru->addr->refcnt);
1383         smp_store_release(&newu->addr, otheru->addr);
1384
1385         /* Set credentials */
1386         copy_peercred(sk, other);
1387
1388         sock->state     = SS_CONNECTED;
1389         sk->sk_state    = TCP_ESTABLISHED;
1390         sock_hold(newsk);
1391
1392         smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1393         unix_peer(sk)   = newsk;
1394
1395         unix_state_unlock(sk);
1396
1397         /* take ten and and send info to listening sock */
1398         spin_lock(&other->sk_receive_queue.lock);
1399         __skb_queue_tail(&other->sk_receive_queue, skb);
1400         spin_unlock(&other->sk_receive_queue.lock);
1401         unix_state_unlock(other);
1402         other->sk_data_ready(other);
1403         sock_put(other);
1404         return 0;
1405
1406 out_unlock:
1407         if (other)
1408                 unix_state_unlock(other);
1409
1410 out:
1411         kfree_skb(skb);
1412         if (newsk)
1413                 unix_release_sock(newsk, 0);
1414         if (other)
1415                 sock_put(other);
1416         return err;
1417 }
1418
1419 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1420 {
1421         struct sock *ska = socka->sk, *skb = sockb->sk;
1422
1423         /* Join our sockets back to back */
1424         sock_hold(ska);
1425         sock_hold(skb);
1426         unix_peer(ska) = skb;
1427         unix_peer(skb) = ska;
1428         init_peercred(ska);
1429         init_peercred(skb);
1430
1431         if (ska->sk_type != SOCK_DGRAM) {
1432                 ska->sk_state = TCP_ESTABLISHED;
1433                 skb->sk_state = TCP_ESTABLISHED;
1434                 socka->state  = SS_CONNECTED;
1435                 sockb->state  = SS_CONNECTED;
1436         }
1437         return 0;
1438 }
1439
1440 static void unix_sock_inherit_flags(const struct socket *old,
1441                                     struct socket *new)
1442 {
1443         if (test_bit(SOCK_PASSCRED, &old->flags))
1444                 set_bit(SOCK_PASSCRED, &new->flags);
1445         if (test_bit(SOCK_PASSSEC, &old->flags))
1446                 set_bit(SOCK_PASSSEC, &new->flags);
1447 }
1448
1449 static int unix_accept(struct socket *sock, struct socket *newsock, int flags,
1450                        bool kern)
1451 {
1452         struct sock *sk = sock->sk;
1453         struct sock *tsk;
1454         struct sk_buff *skb;
1455         int err;
1456
1457         err = -EOPNOTSUPP;
1458         if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1459                 goto out;
1460
1461         err = -EINVAL;
1462         if (sk->sk_state != TCP_LISTEN)
1463                 goto out;
1464
1465         /* If socket state is TCP_LISTEN it cannot change (for now...),
1466          * so that no locks are necessary.
1467          */
1468
1469         skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1470         if (!skb) {
1471                 /* This means receive shutdown. */
1472                 if (err == 0)
1473                         err = -EINVAL;
1474                 goto out;
1475         }
1476
1477         tsk = skb->sk;
1478         skb_free_datagram(sk, skb);
1479         wake_up_interruptible(&unix_sk(sk)->peer_wait);
1480
1481         /* attach accepted sock to socket */
1482         unix_state_lock(tsk);
1483         newsock->state = SS_CONNECTED;
1484         unix_sock_inherit_flags(sock, newsock);
1485         sock_graft(tsk, newsock);
1486         unix_state_unlock(tsk);
1487         return 0;
1488
1489 out:
1490         return err;
1491 }
1492
1493
1494 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int peer)
1495 {
1496         struct sock *sk = sock->sk;
1497         struct unix_address *addr;
1498         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1499         int err = 0;
1500
1501         if (peer) {
1502                 sk = unix_peer_get(sk);
1503
1504                 err = -ENOTCONN;
1505                 if (!sk)
1506                         goto out;
1507                 err = 0;
1508         } else {
1509                 sock_hold(sk);
1510         }
1511
1512         addr = smp_load_acquire(&unix_sk(sk)->addr);
1513         if (!addr) {
1514                 sunaddr->sun_family = AF_UNIX;
1515                 sunaddr->sun_path[0] = 0;
1516                 err = sizeof(short);
1517         } else {
1518                 err = addr->len;
1519                 memcpy(sunaddr, addr->name, addr->len);
1520         }
1521         sock_put(sk);
1522 out:
1523         return err;
1524 }
1525
1526 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1527 {
1528         int err = 0;
1529
1530         UNIXCB(skb).pid  = get_pid(scm->pid);
1531         UNIXCB(skb).uid = scm->creds.uid;
1532         UNIXCB(skb).gid = scm->creds.gid;
1533         UNIXCB(skb).fp = NULL;
1534         unix_get_secdata(scm, skb);
1535         if (scm->fp && send_fds)
1536                 err = unix_attach_fds(scm, skb);
1537
1538         skb->destructor = unix_destruct_scm;
1539         return err;
1540 }
1541
1542 static bool unix_passcred_enabled(const struct socket *sock,
1543                                   const struct sock *other)
1544 {
1545         return test_bit(SOCK_PASSCRED, &sock->flags) ||
1546                !other->sk_socket ||
1547                test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1548 }
1549
1550 /*
1551  * Some apps rely on write() giving SCM_CREDENTIALS
1552  * We include credentials if source or destination socket
1553  * asserted SOCK_PASSCRED.
1554  */
1555 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1556                             const struct sock *other)
1557 {
1558         if (UNIXCB(skb).pid)
1559                 return;
1560         if (unix_passcred_enabled(sock, other)) {
1561                 UNIXCB(skb).pid  = get_pid(task_tgid(current));
1562                 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1563         }
1564 }
1565
1566 static int maybe_init_creds(struct scm_cookie *scm,
1567                             struct socket *socket,
1568                             const struct sock *other)
1569 {
1570         int err;
1571         struct msghdr msg = { .msg_controllen = 0 };
1572
1573         err = scm_send(socket, &msg, scm, false);
1574         if (err)
1575                 return err;
1576
1577         if (unix_passcred_enabled(socket, other)) {
1578                 scm->pid = get_pid(task_tgid(current));
1579                 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1580         }
1581         return err;
1582 }
1583
1584 static bool unix_skb_scm_eq(struct sk_buff *skb,
1585                             struct scm_cookie *scm)
1586 {
1587         const struct unix_skb_parms *u = &UNIXCB(skb);
1588
1589         return u->pid == scm->pid &&
1590                uid_eq(u->uid, scm->creds.uid) &&
1591                gid_eq(u->gid, scm->creds.gid) &&
1592                unix_secdata_eq(scm, skb);
1593 }
1594
1595 static void scm_stat_add(struct sock *sk, struct sk_buff *skb)
1596 {
1597         struct scm_fp_list *fp = UNIXCB(skb).fp;
1598         struct unix_sock *u = unix_sk(sk);
1599
1600         lockdep_assert_held(&sk->sk_receive_queue.lock);
1601
1602         if (unlikely(fp && fp->count))
1603                 u->scm_stat.nr_fds += fp->count;
1604 }
1605
1606 static void scm_stat_del(struct sock *sk, struct sk_buff *skb)
1607 {
1608         struct scm_fp_list *fp = UNIXCB(skb).fp;
1609         struct unix_sock *u = unix_sk(sk);
1610
1611         lockdep_assert_held(&sk->sk_receive_queue.lock);
1612
1613         if (unlikely(fp && fp->count))
1614                 u->scm_stat.nr_fds -= fp->count;
1615 }
1616
1617 /*
1618  *      Send AF_UNIX data.
1619  */
1620
1621 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1622                               size_t len)
1623 {
1624         struct sock *sk = sock->sk;
1625         struct net *net = sock_net(sk);
1626         struct unix_sock *u = unix_sk(sk);
1627         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1628         struct sock *other = NULL;
1629         int namelen = 0; /* fake GCC */
1630         int err;
1631         unsigned int hash;
1632         struct sk_buff *skb;
1633         long timeo;
1634         struct scm_cookie scm;
1635         int data_len = 0;
1636         int sk_locked;
1637
1638         wait_for_unix_gc();
1639         err = scm_send(sock, msg, &scm, false);
1640         if (err < 0)
1641                 return err;
1642
1643         err = -EOPNOTSUPP;
1644         if (msg->msg_flags&MSG_OOB)
1645                 goto out;
1646
1647         if (msg->msg_namelen) {
1648                 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1649                 if (err < 0)
1650                         goto out;
1651                 namelen = err;
1652         } else {
1653                 sunaddr = NULL;
1654                 err = -ENOTCONN;
1655                 other = unix_peer_get(sk);
1656                 if (!other)
1657                         goto out;
1658         }
1659
1660         if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1661             && (err = unix_autobind(sock)) != 0)
1662                 goto out;
1663
1664         err = -EMSGSIZE;
1665         if (len > sk->sk_sndbuf - 32)
1666                 goto out;
1667
1668         if (len > SKB_MAX_ALLOC) {
1669                 data_len = min_t(size_t,
1670                                  len - SKB_MAX_ALLOC,
1671                                  MAX_SKB_FRAGS * PAGE_SIZE);
1672                 data_len = PAGE_ALIGN(data_len);
1673
1674                 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1675         }
1676
1677         skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1678                                    msg->msg_flags & MSG_DONTWAIT, &err,
1679                                    PAGE_ALLOC_COSTLY_ORDER);
1680         if (skb == NULL)
1681                 goto out;
1682
1683         err = unix_scm_to_skb(&scm, skb, true);
1684         if (err < 0)
1685                 goto out_free;
1686
1687         skb_put(skb, len - data_len);
1688         skb->data_len = data_len;
1689         skb->len = len;
1690         err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1691         if (err)
1692                 goto out_free;
1693
1694         timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1695
1696 restart:
1697         if (!other) {
1698                 err = -ECONNRESET;
1699                 if (sunaddr == NULL)
1700                         goto out_free;
1701
1702                 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1703                                         hash, &err);
1704                 if (other == NULL)
1705                         goto out_free;
1706         }
1707
1708         if (sk_filter(other, skb) < 0) {
1709                 /* Toss the packet but do not return any error to the sender */
1710                 err = len;
1711                 goto out_free;
1712         }
1713
1714         sk_locked = 0;
1715         unix_state_lock(other);
1716 restart_locked:
1717         err = -EPERM;
1718         if (!unix_may_send(sk, other))
1719                 goto out_unlock;
1720
1721         if (unlikely(sock_flag(other, SOCK_DEAD))) {
1722                 /*
1723                  *      Check with 1003.1g - what should
1724                  *      datagram error
1725                  */
1726                 unix_state_unlock(other);
1727                 sock_put(other);
1728
1729                 if (!sk_locked)
1730                         unix_state_lock(sk);
1731
1732                 err = 0;
1733                 if (unix_peer(sk) == other) {
1734                         unix_peer(sk) = NULL;
1735                         unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1736
1737                         unix_state_unlock(sk);
1738
1739                         unix_dgram_disconnected(sk, other);
1740                         sock_put(other);
1741                         err = -ECONNREFUSED;
1742                 } else {
1743                         unix_state_unlock(sk);
1744                 }
1745
1746                 other = NULL;
1747                 if (err)
1748                         goto out_free;
1749                 goto restart;
1750         }
1751
1752         err = -EPIPE;
1753         if (other->sk_shutdown & RCV_SHUTDOWN)
1754                 goto out_unlock;
1755
1756         if (sk->sk_type != SOCK_SEQPACKET) {
1757                 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1758                 if (err)
1759                         goto out_unlock;
1760         }
1761
1762         /* other == sk && unix_peer(other) != sk if
1763          * - unix_peer(sk) == NULL, destination address bound to sk
1764          * - unix_peer(sk) == sk by time of get but disconnected before lock
1765          */
1766         if (other != sk &&
1767             unlikely(unix_peer(other) != sk &&
1768             unix_recvq_full_lockless(other))) {
1769                 if (timeo) {
1770                         timeo = unix_wait_for_peer(other, timeo);
1771
1772                         err = sock_intr_errno(timeo);
1773                         if (signal_pending(current))
1774                                 goto out_free;
1775
1776                         goto restart;
1777                 }
1778
1779                 if (!sk_locked) {
1780                         unix_state_unlock(other);
1781                         unix_state_double_lock(sk, other);
1782                 }
1783
1784                 if (unix_peer(sk) != other ||
1785                     unix_dgram_peer_wake_me(sk, other)) {
1786                         err = -EAGAIN;
1787                         sk_locked = 1;
1788                         goto out_unlock;
1789                 }
1790
1791                 if (!sk_locked) {
1792                         sk_locked = 1;
1793                         goto restart_locked;
1794                 }
1795         }
1796
1797         if (unlikely(sk_locked))
1798                 unix_state_unlock(sk);
1799
1800         if (sock_flag(other, SOCK_RCVTSTAMP))
1801                 __net_timestamp(skb);
1802         maybe_add_creds(skb, sock, other);
1803         spin_lock(&other->sk_receive_queue.lock);
1804         scm_stat_add(other, skb);
1805         __skb_queue_tail(&other->sk_receive_queue, skb);
1806         spin_unlock(&other->sk_receive_queue.lock);
1807         unix_state_unlock(other);
1808         other->sk_data_ready(other);
1809         sock_put(other);
1810         scm_destroy(&scm);
1811         return len;
1812
1813 out_unlock:
1814         if (sk_locked)
1815                 unix_state_unlock(sk);
1816         unix_state_unlock(other);
1817 out_free:
1818         kfree_skb(skb);
1819 out:
1820         if (other)
1821                 sock_put(other);
1822         scm_destroy(&scm);
1823         return err;
1824 }
1825
1826 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1827  * bytes, and a minimum of a full page.
1828  */
1829 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1830
1831 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1832                                size_t len)
1833 {
1834         struct sock *sk = sock->sk;
1835         struct sock *other = NULL;
1836         int err, size;
1837         struct sk_buff *skb;
1838         int sent = 0;
1839         struct scm_cookie scm;
1840         bool fds_sent = false;
1841         int data_len;
1842
1843         wait_for_unix_gc();
1844         err = scm_send(sock, msg, &scm, false);
1845         if (err < 0)
1846                 return err;
1847
1848         err = -EOPNOTSUPP;
1849         if (msg->msg_flags&MSG_OOB)
1850                 goto out_err;
1851
1852         if (msg->msg_namelen) {
1853                 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1854                 goto out_err;
1855         } else {
1856                 err = -ENOTCONN;
1857                 other = unix_peer(sk);
1858                 if (!other)
1859                         goto out_err;
1860         }
1861
1862         if (sk->sk_shutdown & SEND_SHUTDOWN)
1863                 goto pipe_err;
1864
1865         while (sent < len) {
1866                 size = len - sent;
1867
1868                 /* Keep two messages in the pipe so it schedules better */
1869                 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1870
1871                 /* allow fallback to order-0 allocations */
1872                 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1873
1874                 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1875
1876                 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1877
1878                 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1879                                            msg->msg_flags & MSG_DONTWAIT, &err,
1880                                            get_order(UNIX_SKB_FRAGS_SZ));
1881                 if (!skb)
1882                         goto out_err;
1883
1884                 /* Only send the fds in the first buffer */
1885                 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1886                 if (err < 0) {
1887                         kfree_skb(skb);
1888                         goto out_err;
1889                 }
1890                 fds_sent = true;
1891
1892                 skb_put(skb, size - data_len);
1893                 skb->data_len = data_len;
1894                 skb->len = size;
1895                 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1896                 if (err) {
1897                         kfree_skb(skb);
1898                         goto out_err;
1899                 }
1900
1901                 unix_state_lock(other);
1902
1903                 if (sock_flag(other, SOCK_DEAD) ||
1904                     (other->sk_shutdown & RCV_SHUTDOWN))
1905                         goto pipe_err_free;
1906
1907                 maybe_add_creds(skb, sock, other);
1908                 spin_lock(&other->sk_receive_queue.lock);
1909                 scm_stat_add(other, skb);
1910                 __skb_queue_tail(&other->sk_receive_queue, skb);
1911                 spin_unlock(&other->sk_receive_queue.lock);
1912                 unix_state_unlock(other);
1913                 other->sk_data_ready(other);
1914                 sent += size;
1915         }
1916
1917         scm_destroy(&scm);
1918
1919         return sent;
1920
1921 pipe_err_free:
1922         unix_state_unlock(other);
1923         kfree_skb(skb);
1924 pipe_err:
1925         if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1926                 send_sig(SIGPIPE, current, 0);
1927         err = -EPIPE;
1928 out_err:
1929         scm_destroy(&scm);
1930         return sent ? : err;
1931 }
1932
1933 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1934                                     int offset, size_t size, int flags)
1935 {
1936         int err;
1937         bool send_sigpipe = false;
1938         bool init_scm = true;
1939         struct scm_cookie scm;
1940         struct sock *other, *sk = socket->sk;
1941         struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1942
1943         if (flags & MSG_OOB)
1944                 return -EOPNOTSUPP;
1945
1946         other = unix_peer(sk);
1947         if (!other || sk->sk_state != TCP_ESTABLISHED)
1948                 return -ENOTCONN;
1949
1950         if (false) {
1951 alloc_skb:
1952                 unix_state_unlock(other);
1953                 mutex_unlock(&unix_sk(other)->iolock);
1954                 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1955                                               &err, 0);
1956                 if (!newskb)
1957                         goto err;
1958         }
1959
1960         /* we must acquire iolock as we modify already present
1961          * skbs in the sk_receive_queue and mess with skb->len
1962          */
1963         err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1964         if (err) {
1965                 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1966                 goto err;
1967         }
1968
1969         if (sk->sk_shutdown & SEND_SHUTDOWN) {
1970                 err = -EPIPE;
1971                 send_sigpipe = true;
1972                 goto err_unlock;
1973         }
1974
1975         unix_state_lock(other);
1976
1977         if (sock_flag(other, SOCK_DEAD) ||
1978             other->sk_shutdown & RCV_SHUTDOWN) {
1979                 err = -EPIPE;
1980                 send_sigpipe = true;
1981                 goto err_state_unlock;
1982         }
1983
1984         if (init_scm) {
1985                 err = maybe_init_creds(&scm, socket, other);
1986                 if (err)
1987                         goto err_state_unlock;
1988                 init_scm = false;
1989         }
1990
1991         skb = skb_peek_tail(&other->sk_receive_queue);
1992         if (tail && tail == skb) {
1993                 skb = newskb;
1994         } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
1995                 if (newskb) {
1996                         skb = newskb;
1997                 } else {
1998                         tail = skb;
1999                         goto alloc_skb;
2000                 }
2001         } else if (newskb) {
2002                 /* this is fast path, we don't necessarily need to
2003                  * call to kfree_skb even though with newskb == NULL
2004                  * this - does no harm
2005                  */
2006                 consume_skb(newskb);
2007                 newskb = NULL;
2008         }
2009
2010         if (skb_append_pagefrags(skb, page, offset, size)) {
2011                 tail = skb;
2012                 goto alloc_skb;
2013         }
2014
2015         skb->len += size;
2016         skb->data_len += size;
2017         skb->truesize += size;
2018         refcount_add(size, &sk->sk_wmem_alloc);
2019
2020         if (newskb) {
2021                 err = unix_scm_to_skb(&scm, skb, false);
2022                 if (err)
2023                         goto err_state_unlock;
2024                 spin_lock(&other->sk_receive_queue.lock);
2025                 __skb_queue_tail(&other->sk_receive_queue, newskb);
2026                 spin_unlock(&other->sk_receive_queue.lock);
2027         }
2028
2029         unix_state_unlock(other);
2030         mutex_unlock(&unix_sk(other)->iolock);
2031
2032         other->sk_data_ready(other);
2033         scm_destroy(&scm);
2034         return size;
2035
2036 err_state_unlock:
2037         unix_state_unlock(other);
2038 err_unlock:
2039         mutex_unlock(&unix_sk(other)->iolock);
2040 err:
2041         kfree_skb(newskb);
2042         if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2043                 send_sig(SIGPIPE, current, 0);
2044         if (!init_scm)
2045                 scm_destroy(&scm);
2046         return err;
2047 }
2048
2049 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2050                                   size_t len)
2051 {
2052         int err;
2053         struct sock *sk = sock->sk;
2054
2055         err = sock_error(sk);
2056         if (err)
2057                 return err;
2058
2059         if (sk->sk_state != TCP_ESTABLISHED)
2060                 return -ENOTCONN;
2061
2062         if (msg->msg_namelen)
2063                 msg->msg_namelen = 0;
2064
2065         return unix_dgram_sendmsg(sock, msg, len);
2066 }
2067
2068 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2069                                   size_t size, int flags)
2070 {
2071         struct sock *sk = sock->sk;
2072
2073         if (sk->sk_state != TCP_ESTABLISHED)
2074                 return -ENOTCONN;
2075
2076         return unix_dgram_recvmsg(sock, msg, size, flags);
2077 }
2078
2079 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2080 {
2081         struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2082
2083         if (addr) {
2084                 msg->msg_namelen = addr->len;
2085                 memcpy(msg->msg_name, addr->name, addr->len);
2086         }
2087 }
2088
2089 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2090                               size_t size, int flags)
2091 {
2092         struct scm_cookie scm;
2093         struct sock *sk = sock->sk;
2094         struct unix_sock *u = unix_sk(sk);
2095         struct sk_buff *skb, *last;
2096         long timeo;
2097         int skip;
2098         int err;
2099
2100         err = -EOPNOTSUPP;
2101         if (flags&MSG_OOB)
2102                 goto out;
2103
2104         timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT);
2105
2106         do {
2107                 mutex_lock(&u->iolock);
2108
2109                 skip = sk_peek_offset(sk, flags);
2110                 skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags,
2111                                               scm_stat_del, &skip, &err, &last);
2112                 if (skb)
2113                         break;
2114
2115                 mutex_unlock(&u->iolock);
2116
2117                 if (err != -EAGAIN)
2118                         break;
2119         } while (timeo &&
2120                  !__skb_wait_for_more_packets(sk, &sk->sk_receive_queue,
2121                                               &err, &timeo, last));
2122
2123         if (!skb) { /* implies iolock unlocked */
2124                 unix_state_lock(sk);
2125                 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2126                 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2127                     (sk->sk_shutdown & RCV_SHUTDOWN))
2128                         err = 0;
2129                 unix_state_unlock(sk);
2130                 goto out;
2131         }
2132
2133         if (wq_has_sleeper(&u->peer_wait))
2134                 wake_up_interruptible_sync_poll(&u->peer_wait,
2135                                                 EPOLLOUT | EPOLLWRNORM |
2136                                                 EPOLLWRBAND);
2137
2138         if (msg->msg_name)
2139                 unix_copy_addr(msg, skb->sk);
2140
2141         if (size > skb->len - skip)
2142                 size = skb->len - skip;
2143         else if (size < skb->len - skip)
2144                 msg->msg_flags |= MSG_TRUNC;
2145
2146         err = skb_copy_datagram_msg(skb, skip, msg, size);
2147         if (err)
2148                 goto out_free;
2149
2150         if (sock_flag(sk, SOCK_RCVTSTAMP))
2151                 __sock_recv_timestamp(msg, sk, skb);
2152
2153         memset(&scm, 0, sizeof(scm));
2154
2155         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2156         unix_set_secdata(&scm, skb);
2157
2158         if (!(flags & MSG_PEEK)) {
2159                 if (UNIXCB(skb).fp)
2160                         unix_detach_fds(&scm, skb);
2161
2162                 sk_peek_offset_bwd(sk, skb->len);
2163         } else {
2164                 /* It is questionable: on PEEK we could:
2165                    - do not return fds - good, but too simple 8)
2166                    - return fds, and do not return them on read (old strategy,
2167                      apparently wrong)
2168                    - clone fds (I chose it for now, it is the most universal
2169                      solution)
2170
2171                    POSIX 1003.1g does not actually define this clearly
2172                    at all. POSIX 1003.1g doesn't define a lot of things
2173                    clearly however!
2174
2175                 */
2176
2177                 sk_peek_offset_fwd(sk, size);
2178
2179                 if (UNIXCB(skb).fp)
2180                         scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2181         }
2182         err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2183
2184         scm_recv(sock, msg, &scm, flags);
2185
2186 out_free:
2187         skb_free_datagram(sk, skb);
2188         mutex_unlock(&u->iolock);
2189 out:
2190         return err;
2191 }
2192
2193 /*
2194  *      Sleep until more data has arrived. But check for races..
2195  */
2196 static long unix_stream_data_wait(struct sock *sk, long timeo,
2197                                   struct sk_buff *last, unsigned int last_len,
2198                                   bool freezable)
2199 {
2200         struct sk_buff *tail;
2201         DEFINE_WAIT(wait);
2202
2203         unix_state_lock(sk);
2204
2205         for (;;) {
2206                 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2207
2208                 tail = skb_peek_tail(&sk->sk_receive_queue);
2209                 if (tail != last ||
2210                     (tail && tail->len != last_len) ||
2211                     sk->sk_err ||
2212                     (sk->sk_shutdown & RCV_SHUTDOWN) ||
2213                     signal_pending(current) ||
2214                     !timeo)
2215                         break;
2216
2217                 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2218                 unix_state_unlock(sk);
2219                 if (freezable)
2220                         timeo = freezable_schedule_timeout(timeo);
2221                 else
2222                         timeo = schedule_timeout(timeo);
2223                 unix_state_lock(sk);
2224
2225                 if (sock_flag(sk, SOCK_DEAD))
2226                         break;
2227
2228                 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2229         }
2230
2231         finish_wait(sk_sleep(sk), &wait);
2232         unix_state_unlock(sk);
2233         return timeo;
2234 }
2235
2236 static unsigned int unix_skb_len(const struct sk_buff *skb)
2237 {
2238         return skb->len - UNIXCB(skb).consumed;
2239 }
2240
2241 struct unix_stream_read_state {
2242         int (*recv_actor)(struct sk_buff *, int, int,
2243                           struct unix_stream_read_state *);
2244         struct socket *socket;
2245         struct msghdr *msg;
2246         struct pipe_inode_info *pipe;
2247         size_t size;
2248         int flags;
2249         unsigned int splice_flags;
2250 };
2251
2252 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2253                                     bool freezable)
2254 {
2255         struct scm_cookie scm;
2256         struct socket *sock = state->socket;
2257         struct sock *sk = sock->sk;
2258         struct unix_sock *u = unix_sk(sk);
2259         int copied = 0;
2260         int flags = state->flags;
2261         int noblock = flags & MSG_DONTWAIT;
2262         bool check_creds = false;
2263         int target;
2264         int err = 0;
2265         long timeo;
2266         int skip;
2267         size_t size = state->size;
2268         unsigned int last_len;
2269
2270         if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2271                 err = -EINVAL;
2272                 goto out;
2273         }
2274
2275         if (unlikely(flags & MSG_OOB)) {
2276                 err = -EOPNOTSUPP;
2277                 goto out;
2278         }
2279
2280         target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2281         timeo = sock_rcvtimeo(sk, noblock);
2282
2283         memset(&scm, 0, sizeof(scm));
2284
2285         /* Lock the socket to prevent queue disordering
2286          * while sleeps in memcpy_tomsg
2287          */
2288         mutex_lock(&u->iolock);
2289
2290         skip = max(sk_peek_offset(sk, flags), 0);
2291
2292         do {
2293                 int chunk;
2294                 bool drop_skb;
2295                 struct sk_buff *skb, *last;
2296
2297 redo:
2298                 unix_state_lock(sk);
2299                 if (sock_flag(sk, SOCK_DEAD)) {
2300                         err = -ECONNRESET;
2301                         goto unlock;
2302                 }
2303                 last = skb = skb_peek(&sk->sk_receive_queue);
2304                 last_len = last ? last->len : 0;
2305 again:
2306                 if (skb == NULL) {
2307                         if (copied >= target)
2308                                 goto unlock;
2309
2310                         /*
2311                          *      POSIX 1003.1g mandates this order.
2312                          */
2313
2314                         err = sock_error(sk);
2315                         if (err)
2316                                 goto unlock;
2317                         if (sk->sk_shutdown & RCV_SHUTDOWN)
2318                                 goto unlock;
2319
2320                         unix_state_unlock(sk);
2321                         if (!timeo) {
2322                                 err = -EAGAIN;
2323                                 break;
2324                         }
2325
2326                         mutex_unlock(&u->iolock);
2327
2328                         timeo = unix_stream_data_wait(sk, timeo, last,
2329                                                       last_len, freezable);
2330
2331                         if (signal_pending(current)) {
2332                                 err = sock_intr_errno(timeo);
2333                                 scm_destroy(&scm);
2334                                 goto out;
2335                         }
2336
2337                         mutex_lock(&u->iolock);
2338                         goto redo;
2339 unlock:
2340                         unix_state_unlock(sk);
2341                         break;
2342                 }
2343
2344                 while (skip >= unix_skb_len(skb)) {
2345                         skip -= unix_skb_len(skb);
2346                         last = skb;
2347                         last_len = skb->len;
2348                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2349                         if (!skb)
2350                                 goto again;
2351                 }
2352
2353                 unix_state_unlock(sk);
2354
2355                 if (check_creds) {
2356                         /* Never glue messages from different writers */
2357                         if (!unix_skb_scm_eq(skb, &scm))
2358                                 break;
2359                 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2360                         /* Copy credentials */
2361                         scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2362                         unix_set_secdata(&scm, skb);
2363                         check_creds = true;
2364                 }
2365
2366                 /* Copy address just once */
2367                 if (state->msg && state->msg->msg_name) {
2368                         DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2369                                          state->msg->msg_name);
2370                         unix_copy_addr(state->msg, skb->sk);
2371                         sunaddr = NULL;
2372                 }
2373
2374                 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2375                 skb_get(skb);
2376                 chunk = state->recv_actor(skb, skip, chunk, state);
2377                 drop_skb = !unix_skb_len(skb);
2378                 /* skb is only safe to use if !drop_skb */
2379                 consume_skb(skb);
2380                 if (chunk < 0) {
2381                         if (copied == 0)
2382                                 copied = -EFAULT;
2383                         break;
2384                 }
2385                 copied += chunk;
2386                 size -= chunk;
2387
2388                 if (drop_skb) {
2389                         /* the skb was touched by a concurrent reader;
2390                          * we should not expect anything from this skb
2391                          * anymore and assume it invalid - we can be
2392                          * sure it was dropped from the socket queue
2393                          *
2394                          * let's report a short read
2395                          */
2396                         err = 0;
2397                         break;
2398                 }
2399
2400                 /* Mark read part of skb as used */
2401                 if (!(flags & MSG_PEEK)) {
2402                         UNIXCB(skb).consumed += chunk;
2403
2404                         sk_peek_offset_bwd(sk, chunk);
2405
2406                         if (UNIXCB(skb).fp) {
2407                                 spin_lock(&sk->sk_receive_queue.lock);
2408                                 scm_stat_del(sk, skb);
2409                                 spin_unlock(&sk->sk_receive_queue.lock);
2410                                 unix_detach_fds(&scm, skb);
2411                         }
2412
2413                         if (unix_skb_len(skb))
2414                                 break;
2415
2416                         skb_unlink(skb, &sk->sk_receive_queue);
2417                         consume_skb(skb);
2418
2419                         if (scm.fp)
2420                                 break;
2421                 } else {
2422                         /* It is questionable, see note in unix_dgram_recvmsg.
2423                          */
2424                         if (UNIXCB(skb).fp)
2425                                 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2426
2427                         sk_peek_offset_fwd(sk, chunk);
2428
2429                         if (UNIXCB(skb).fp)
2430                                 break;
2431
2432                         skip = 0;
2433                         last = skb;
2434                         last_len = skb->len;
2435                         unix_state_lock(sk);
2436                         skb = skb_peek_next(skb, &sk->sk_receive_queue);
2437                         if (skb)
2438                                 goto again;
2439                         unix_state_unlock(sk);
2440                         break;
2441                 }
2442         } while (size);
2443
2444         mutex_unlock(&u->iolock);
2445         if (state->msg)
2446                 scm_recv(sock, state->msg, &scm, flags);
2447         else
2448                 scm_destroy(&scm);
2449 out:
2450         return copied ? : err;
2451 }
2452
2453 static int unix_stream_read_actor(struct sk_buff *skb,
2454                                   int skip, int chunk,
2455                                   struct unix_stream_read_state *state)
2456 {
2457         int ret;
2458
2459         ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2460                                     state->msg, chunk);
2461         return ret ?: chunk;
2462 }
2463
2464 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2465                                size_t size, int flags)
2466 {
2467         struct unix_stream_read_state state = {
2468                 .recv_actor = unix_stream_read_actor,
2469                 .socket = sock,
2470                 .msg = msg,
2471                 .size = size,
2472                 .flags = flags
2473         };
2474
2475         return unix_stream_read_generic(&state, true);
2476 }
2477
2478 static int unix_stream_splice_actor(struct sk_buff *skb,
2479                                     int skip, int chunk,
2480                                     struct unix_stream_read_state *state)
2481 {
2482         return skb_splice_bits(skb, state->socket->sk,
2483                                UNIXCB(skb).consumed + skip,
2484                                state->pipe, chunk, state->splice_flags);
2485 }
2486
2487 static ssize_t unix_stream_splice_read(struct socket *sock,  loff_t *ppos,
2488                                        struct pipe_inode_info *pipe,
2489                                        size_t size, unsigned int flags)
2490 {
2491         struct unix_stream_read_state state = {
2492                 .recv_actor = unix_stream_splice_actor,
2493                 .socket = sock,
2494                 .pipe = pipe,
2495                 .size = size,
2496                 .splice_flags = flags,
2497         };
2498
2499         if (unlikely(*ppos))
2500                 return -ESPIPE;
2501
2502         if (sock->file->f_flags & O_NONBLOCK ||
2503             flags & SPLICE_F_NONBLOCK)
2504                 state.flags = MSG_DONTWAIT;
2505
2506         return unix_stream_read_generic(&state, false);
2507 }
2508
2509 static int unix_shutdown(struct socket *sock, int mode)
2510 {
2511         struct sock *sk = sock->sk;
2512         struct sock *other;
2513
2514         if (mode < SHUT_RD || mode > SHUT_RDWR)
2515                 return -EINVAL;
2516         /* This maps:
2517          * SHUT_RD   (0) -> RCV_SHUTDOWN  (1)
2518          * SHUT_WR   (1) -> SEND_SHUTDOWN (2)
2519          * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2520          */
2521         ++mode;
2522
2523         unix_state_lock(sk);
2524         sk->sk_shutdown |= mode;
2525         other = unix_peer(sk);
2526         if (other)
2527                 sock_hold(other);
2528         unix_state_unlock(sk);
2529         sk->sk_state_change(sk);
2530
2531         if (other &&
2532                 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2533
2534                 int peer_mode = 0;
2535
2536                 if (mode&RCV_SHUTDOWN)
2537                         peer_mode |= SEND_SHUTDOWN;
2538                 if (mode&SEND_SHUTDOWN)
2539                         peer_mode |= RCV_SHUTDOWN;
2540                 unix_state_lock(other);
2541                 other->sk_shutdown |= peer_mode;
2542                 unix_state_unlock(other);
2543                 other->sk_state_change(other);
2544                 if (peer_mode == SHUTDOWN_MASK)
2545                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2546                 else if (peer_mode & RCV_SHUTDOWN)
2547                         sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2548         }
2549         if (other)
2550                 sock_put(other);
2551
2552         return 0;
2553 }
2554
2555 long unix_inq_len(struct sock *sk)
2556 {
2557         struct sk_buff *skb;
2558         long amount = 0;
2559
2560         if (sk->sk_state == TCP_LISTEN)
2561                 return -EINVAL;
2562
2563         spin_lock(&sk->sk_receive_queue.lock);
2564         if (sk->sk_type == SOCK_STREAM ||
2565             sk->sk_type == SOCK_SEQPACKET) {
2566                 skb_queue_walk(&sk->sk_receive_queue, skb)
2567                         amount += unix_skb_len(skb);
2568         } else {
2569                 skb = skb_peek(&sk->sk_receive_queue);
2570                 if (skb)
2571                         amount = skb->len;
2572         }
2573         spin_unlock(&sk->sk_receive_queue.lock);
2574
2575         return amount;
2576 }
2577 EXPORT_SYMBOL_GPL(unix_inq_len);
2578
2579 long unix_outq_len(struct sock *sk)
2580 {
2581         return sk_wmem_alloc_get(sk);
2582 }
2583 EXPORT_SYMBOL_GPL(unix_outq_len);
2584
2585 static int unix_open_file(struct sock *sk)
2586 {
2587         struct path path;
2588         struct file *f;
2589         int fd;
2590
2591         if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))
2592                 return -EPERM;
2593
2594         if (!smp_load_acquire(&unix_sk(sk)->addr))
2595                 return -ENOENT;
2596
2597         path = unix_sk(sk)->path;
2598         if (!path.dentry)
2599                 return -ENOENT;
2600
2601         path_get(&path);
2602
2603         fd = get_unused_fd_flags(O_CLOEXEC);
2604         if (fd < 0)
2605                 goto out;
2606
2607         f = dentry_open(&path, O_PATH, current_cred());
2608         if (IS_ERR(f)) {
2609                 put_unused_fd(fd);
2610                 fd = PTR_ERR(f);
2611                 goto out;
2612         }
2613
2614         fd_install(fd, f);
2615 out:
2616         path_put(&path);
2617
2618         return fd;
2619 }
2620
2621 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2622 {
2623         struct sock *sk = sock->sk;
2624         long amount = 0;
2625         int err;
2626
2627         switch (cmd) {
2628         case SIOCOUTQ:
2629                 amount = unix_outq_len(sk);
2630                 err = put_user(amount, (int __user *)arg);
2631                 break;
2632         case SIOCINQ:
2633                 amount = unix_inq_len(sk);
2634                 if (amount < 0)
2635                         err = amount;
2636                 else
2637                         err = put_user(amount, (int __user *)arg);
2638                 break;
2639         case SIOCUNIXFILE:
2640                 err = unix_open_file(sk);
2641                 break;
2642         default:
2643                 err = -ENOIOCTLCMD;
2644                 break;
2645         }
2646         return err;
2647 }
2648
2649 #ifdef CONFIG_COMPAT
2650 static int unix_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2651 {
2652         return unix_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
2653 }
2654 #endif
2655
2656 static __poll_t unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2657 {
2658         struct sock *sk = sock->sk;
2659         __poll_t mask;
2660
2661         sock_poll_wait(file, sock, wait);
2662         mask = 0;
2663
2664         /* exceptional events? */
2665         if (sk->sk_err)
2666                 mask |= EPOLLERR;
2667         if (sk->sk_shutdown == SHUTDOWN_MASK)
2668                 mask |= EPOLLHUP;
2669         if (sk->sk_shutdown & RCV_SHUTDOWN)
2670                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2671
2672         /* readable? */
2673         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2674                 mask |= EPOLLIN | EPOLLRDNORM;
2675
2676         /* Connection-based need to check for termination and startup */
2677         if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2678             sk->sk_state == TCP_CLOSE)
2679                 mask |= EPOLLHUP;
2680
2681         /*
2682          * we set writable also when the other side has shut down the
2683          * connection. This prevents stuck sockets.
2684          */
2685         if (unix_writable(sk))
2686                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2687
2688         return mask;
2689 }
2690
2691 static __poll_t unix_dgram_poll(struct file *file, struct socket *sock,
2692                                     poll_table *wait)
2693 {
2694         struct sock *sk = sock->sk, *other;
2695         unsigned int writable;
2696         __poll_t mask;
2697
2698         sock_poll_wait(file, sock, wait);
2699         mask = 0;
2700
2701         /* exceptional events? */
2702         if (sk->sk_err || !skb_queue_empty_lockless(&sk->sk_error_queue))
2703                 mask |= EPOLLERR |
2704                         (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? EPOLLPRI : 0);
2705
2706         if (sk->sk_shutdown & RCV_SHUTDOWN)
2707                 mask |= EPOLLRDHUP | EPOLLIN | EPOLLRDNORM;
2708         if (sk->sk_shutdown == SHUTDOWN_MASK)
2709                 mask |= EPOLLHUP;
2710
2711         /* readable? */
2712         if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
2713                 mask |= EPOLLIN | EPOLLRDNORM;
2714
2715         /* Connection-based need to check for termination and startup */
2716         if (sk->sk_type == SOCK_SEQPACKET) {
2717                 if (sk->sk_state == TCP_CLOSE)
2718                         mask |= EPOLLHUP;
2719                 /* connection hasn't started yet? */
2720                 if (sk->sk_state == TCP_SYN_SENT)
2721                         return mask;
2722         }
2723
2724         /* No write status requested, avoid expensive OUT tests. */
2725         if (!(poll_requested_events(wait) & (EPOLLWRBAND|EPOLLWRNORM|EPOLLOUT)))
2726                 return mask;
2727
2728         writable = unix_writable(sk);
2729         if (writable) {
2730                 unix_state_lock(sk);
2731
2732                 other = unix_peer(sk);
2733                 if (other && unix_peer(other) != sk &&
2734                     unix_recvq_full(other) &&
2735                     unix_dgram_peer_wake_me(sk, other))
2736                         writable = 0;
2737
2738                 unix_state_unlock(sk);
2739         }
2740
2741         if (writable)
2742                 mask |= EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND;
2743         else
2744                 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2745
2746         return mask;
2747 }
2748
2749 #ifdef CONFIG_PROC_FS
2750
2751 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2752
2753 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2754 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2755 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2756
2757 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2758 {
2759         unsigned long offset = get_offset(*pos);
2760         unsigned long bucket = get_bucket(*pos);
2761         struct sock *sk;
2762         unsigned long count = 0;
2763
2764         for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2765                 if (sock_net(sk) != seq_file_net(seq))
2766                         continue;
2767                 if (++count == offset)
2768                         break;
2769         }
2770
2771         return sk;
2772 }
2773
2774 static struct sock *unix_next_socket(struct seq_file *seq,
2775                                      struct sock *sk,
2776                                      loff_t *pos)
2777 {
2778         unsigned long bucket;
2779
2780         while (sk > (struct sock *)SEQ_START_TOKEN) {
2781                 sk = sk_next(sk);
2782                 if (!sk)
2783                         goto next_bucket;
2784                 if (sock_net(sk) == seq_file_net(seq))
2785                         return sk;
2786         }
2787
2788         do {
2789                 sk = unix_from_bucket(seq, pos);
2790                 if (sk)
2791                         return sk;
2792
2793 next_bucket:
2794                 bucket = get_bucket(*pos) + 1;
2795                 *pos = set_bucket_offset(bucket, 1);
2796         } while (bucket < ARRAY_SIZE(unix_socket_table));
2797
2798         return NULL;
2799 }
2800
2801 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2802         __acquires(unix_table_lock)
2803 {
2804         spin_lock(&unix_table_lock);
2805
2806         if (!*pos)
2807                 return SEQ_START_TOKEN;
2808
2809         if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2810                 return NULL;
2811
2812         return unix_next_socket(seq, NULL, pos);
2813 }
2814
2815 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2816 {
2817         ++*pos;
2818         return unix_next_socket(seq, v, pos);
2819 }
2820
2821 static void unix_seq_stop(struct seq_file *seq, void *v)
2822         __releases(unix_table_lock)
2823 {
2824         spin_unlock(&unix_table_lock);
2825 }
2826
2827 static int unix_seq_show(struct seq_file *seq, void *v)
2828 {
2829
2830         if (v == SEQ_START_TOKEN)
2831                 seq_puts(seq, "Num       RefCount Protocol Flags    Type St "
2832                          "Inode Path\n");
2833         else {
2834                 struct sock *s = v;
2835                 struct unix_sock *u = unix_sk(s);
2836                 unix_state_lock(s);
2837
2838                 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2839                         s,
2840                         refcount_read(&s->sk_refcnt),
2841                         0,
2842                         s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2843                         s->sk_type,
2844                         s->sk_socket ?
2845                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2846                         (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2847                         sock_i_ino(s));
2848
2849                 if (u->addr) {  // under unix_table_lock here
2850                         int i, len;
2851                         seq_putc(seq, ' ');
2852
2853                         i = 0;
2854                         len = u->addr->len - sizeof(short);
2855                         if (!UNIX_ABSTRACT(s))
2856                                 len--;
2857                         else {
2858                                 seq_putc(seq, '@');
2859                                 i++;
2860                         }
2861                         for ( ; i < len; i++)
2862                                 seq_putc(seq, u->addr->name->sun_path[i] ?:
2863                                          '@');
2864                 }
2865                 unix_state_unlock(s);
2866                 seq_putc(seq, '\n');
2867         }
2868
2869         return 0;
2870 }
2871
2872 static const struct seq_operations unix_seq_ops = {
2873         .start  = unix_seq_start,
2874         .next   = unix_seq_next,
2875         .stop   = unix_seq_stop,
2876         .show   = unix_seq_show,
2877 };
2878 #endif
2879
2880 static const struct net_proto_family unix_family_ops = {
2881         .family = PF_UNIX,
2882         .create = unix_create,
2883         .owner  = THIS_MODULE,
2884 };
2885
2886
2887 static int __net_init unix_net_init(struct net *net)
2888 {
2889         int error = -ENOMEM;
2890
2891         net->unx.sysctl_max_dgram_qlen = 10;
2892         if (unix_sysctl_register(net))
2893                 goto out;
2894
2895 #ifdef CONFIG_PROC_FS
2896         if (!proc_create_net("unix", 0, net->proc_net, &unix_seq_ops,
2897                         sizeof(struct seq_net_private))) {
2898                 unix_sysctl_unregister(net);
2899                 goto out;
2900         }
2901 #endif
2902         error = 0;
2903 out:
2904         return error;
2905 }
2906
2907 static void __net_exit unix_net_exit(struct net *net)
2908 {
2909         unix_sysctl_unregister(net);
2910         remove_proc_entry("unix", net->proc_net);
2911 }
2912
2913 static struct pernet_operations unix_net_ops = {
2914         .init = unix_net_init,
2915         .exit = unix_net_exit,
2916 };
2917
2918 static int __init af_unix_init(void)
2919 {
2920         int rc = -1;
2921
2922         BUILD_BUG_ON(sizeof(struct unix_skb_parms) > sizeof_field(struct sk_buff, cb));
2923
2924         rc = proto_register(&unix_proto, 1);
2925         if (rc != 0) {
2926                 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2927                 goto out;
2928         }
2929
2930         sock_register(&unix_family_ops);
2931         register_pernet_subsys(&unix_net_ops);
2932 out:
2933         return rc;
2934 }
2935
2936 static void __exit af_unix_exit(void)
2937 {
2938         sock_unregister(PF_UNIX);
2939         proto_unregister(&unix_proto);
2940         unregister_pernet_subsys(&unix_net_ops);
2941 }
2942
2943 /* Earlier than device_initcall() so that other drivers invoking
2944    request_module() don't end up in a loop when modprobe tries
2945    to use a UNIX socket. But later than subsys_initcall() because
2946    we depend on stuff initialised there */
2947 fs_initcall(af_unix_init);
2948 module_exit(af_unix_exit);
2949
2950 MODULE_LICENSE("GPL");
2951 MODULE_ALIAS_NETPROTO(PF_UNIX);