2 * NET4: Implementation of BSD Unix domain sockets.
4 * Authors: Alan Cox, <alan@lxorguk.ukuu.org.uk>
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License
8 * as published by the Free Software Foundation; either version
9 * 2 of the License, or (at your option) any later version.
12 * Linus Torvalds : Assorted bug cures.
13 * Niibe Yutaka : async I/O support.
14 * Carsten Paeth : PF_UNIX check, address fixes.
15 * Alan Cox : Limit size of allocated blocks.
16 * Alan Cox : Fixed the stupid socketpair bug.
17 * Alan Cox : BSD compatibility fine tuning.
18 * Alan Cox : Fixed a bug in connect when interrupted.
19 * Alan Cox : Sorted out a proper draft version of
20 * file descriptor passing hacked up from
22 * Marty Leisner : Fixes to fd passing
23 * Nick Nevin : recvmsg bugfix.
24 * Alan Cox : Started proper garbage collector
25 * Heiko EiBfeldt : Missing verify_area check
26 * Alan Cox : Started POSIXisms
27 * Andreas Schwab : Replace inode by dentry for proper
29 * Kirk Petersen : Made this a module
30 * Christoph Rohland : Elegant non-blocking accept/connect algorithm.
32 * Alexey Kuznetosv : Repaired (I hope) bugs introduces
33 * by above two patches.
34 * Andrea Arcangeli : If possible we block in connect(2)
35 * if the max backlog of the listen socket
36 * is been reached. This won't break
37 * old apps and it will avoid huge amount
38 * of socks hashed (this for unix_gc()
39 * performances reasons).
40 * Security fix that limits the max
41 * number of socks to 2*max_files and
42 * the number of skb queueable in the
44 * Artur Skawina : Hash function optimizations
45 * Alexey Kuznetsov : Full scale SMP. Lot of bugs are introduced 8)
46 * Malcolm Beattie : Set peercred for socketpair
47 * Michal Ostrowski : Module initialization cleanup.
48 * Arnaldo C. Melo : Remove MOD_{INC,DEC}_USE_COUNT,
49 * the core infrastructure is doing that
50 * for all net proto families now (2.5.69+)
53 * Known differences from reference BSD that was tested:
56 * ECONNREFUSED is not returned from one end of a connected() socket to the
57 * other the moment one end closes.
58 * fstat() doesn't return st_dev=0, and give the blksize as high water mark
59 * and a fake inode identifier (nor the BSD first socket fstat twice bug).
61 * accept() returns a path name even if the connecting socket has closed
62 * in the meantime (BSD loses the path and gives up).
63 * accept() returns 0 length path for an unbound connector. BSD returns 16
64 * and a null first byte in the path (but not for gethost/peername - BSD bug ??)
65 * socketpair(...SOCK_RAW..) doesn't panic the kernel.
66 * BSD af_unix apparently has connect forgetting to block properly.
67 * (need to check this with the POSIX spec in detail)
69 * Differences from 2.0.0-11-... (ANK)
70 * Bug fixes and improvements.
71 * - client shutdown killed server socket.
72 * - removed all useless cli/sti pairs.
74 * Semantic changes/extensions.
75 * - generic control message passing.
76 * - SCM_CREDENTIALS control message.
77 * - "Abstract" (not FS based) socket bindings.
78 * Abstract names are sequences of bytes (not zero terminated)
79 * started by 0, so that this name space does not intersect
83 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
85 #include <linux/module.h>
86 #include <linux/kernel.h>
87 #include <linux/signal.h>
88 #include <linux/sched.h>
89 #include <linux/errno.h>
90 #include <linux/string.h>
91 #include <linux/stat.h>
92 #include <linux/dcache.h>
93 #include <linux/namei.h>
94 #include <linux/socket.h>
96 #include <linux/fcntl.h>
97 #include <linux/termios.h>
98 #include <linux/sockios.h>
99 #include <linux/net.h>
100 #include <linux/in.h>
101 #include <linux/fs.h>
102 #include <linux/slab.h>
103 #include <asm/uaccess.h>
104 #include <linux/skbuff.h>
105 #include <linux/netdevice.h>
106 #include <net/net_namespace.h>
107 #include <net/sock.h>
108 #include <net/tcp_states.h>
109 #include <net/af_unix.h>
110 #include <linux/proc_fs.h>
111 #include <linux/seq_file.h>
113 #include <linux/init.h>
114 #include <linux/poll.h>
115 #include <linux/rtnetlink.h>
116 #include <linux/mount.h>
117 #include <net/checksum.h>
118 #include <linux/security.h>
119 #include <linux/freezer.h>
121 struct hlist_head unix_socket_table[2 * UNIX_HASH_SIZE];
122 EXPORT_SYMBOL_GPL(unix_socket_table);
123 DEFINE_SPINLOCK(unix_table_lock);
124 EXPORT_SYMBOL_GPL(unix_table_lock);
125 static atomic_long_t unix_nr_socks;
128 static struct hlist_head *unix_sockets_unbound(void *addr)
130 unsigned long hash = (unsigned long)addr;
134 hash %= UNIX_HASH_SIZE;
135 return &unix_socket_table[UNIX_HASH_SIZE + hash];
138 #define UNIX_ABSTRACT(sk) (unix_sk(sk)->addr->hash < UNIX_HASH_SIZE)
140 #ifdef CONFIG_SECURITY_NETWORK
141 static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
143 UNIXCB(skb).secid = scm->secid;
146 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
148 scm->secid = UNIXCB(skb).secid;
151 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
153 return (scm->secid == UNIXCB(skb).secid);
156 static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb)
159 static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb)
162 static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb)
166 #endif /* CONFIG_SECURITY_NETWORK */
169 * SMP locking strategy:
170 * hash table is protected with spinlock unix_table_lock
171 * each socket state is protected by separate spin lock.
174 static inline unsigned int unix_hash_fold(__wsum n)
176 unsigned int hash = (__force unsigned int)csum_fold(n);
179 return hash&(UNIX_HASH_SIZE-1);
182 #define unix_peer(sk) (unix_sk(sk)->peer)
184 static inline int unix_our_peer(struct sock *sk, struct sock *osk)
186 return unix_peer(osk) == sk;
189 static inline int unix_may_send(struct sock *sk, struct sock *osk)
191 return unix_peer(osk) == NULL || unix_our_peer(sk, osk);
194 static inline int unix_recvq_full(struct sock const *sk)
196 return skb_queue_len(&sk->sk_receive_queue) > sk->sk_max_ack_backlog;
199 struct sock *unix_peer_get(struct sock *s)
207 unix_state_unlock(s);
210 EXPORT_SYMBOL_GPL(unix_peer_get);
212 static inline void unix_release_addr(struct unix_address *addr)
214 if (atomic_dec_and_test(&addr->refcnt))
219 * Check unix socket name:
220 * - should be not zero length.
221 * - if started by not zero, should be NULL terminated (FS object)
222 * - if started by zero, it is abstract name.
225 static int unix_mkname(struct sockaddr_un *sunaddr, int len, unsigned int *hashp)
229 if (len <= sizeof(short) || len > sizeof(*sunaddr))
231 if (!sunaddr || sunaddr->sun_family != AF_UNIX)
233 if (sunaddr->sun_path[0]) {
235 * This may look like an off by one error but it is a bit more
236 * subtle. 108 is the longest valid AF_UNIX path for a binding.
237 * sun_path[108] doesn't as such exist. However in kernel space
238 * we are guaranteed that it is a valid memory location in our
239 * kernel address buffer.
241 ((char *)sunaddr)[len] = 0;
242 len = strlen(sunaddr->sun_path)+1+sizeof(short);
246 *hashp = unix_hash_fold(csum_partial(sunaddr, len, 0));
250 static void __unix_remove_socket(struct sock *sk)
252 sk_del_node_init(sk);
255 static void __unix_insert_socket(struct hlist_head *list, struct sock *sk)
257 WARN_ON(!sk_unhashed(sk));
258 sk_add_node(sk, list);
261 static inline void unix_remove_socket(struct sock *sk)
263 spin_lock(&unix_table_lock);
264 __unix_remove_socket(sk);
265 spin_unlock(&unix_table_lock);
268 static inline void unix_insert_socket(struct hlist_head *list, struct sock *sk)
270 spin_lock(&unix_table_lock);
271 __unix_insert_socket(list, sk);
272 spin_unlock(&unix_table_lock);
275 static struct sock *__unix_find_socket_byname(struct net *net,
276 struct sockaddr_un *sunname,
277 int len, int type, unsigned int hash)
281 sk_for_each(s, &unix_socket_table[hash ^ type]) {
282 struct unix_sock *u = unix_sk(s);
284 if (!net_eq(sock_net(s), net))
287 if (u->addr->len == len &&
288 !memcmp(u->addr->name, sunname, len))
296 static inline struct sock *unix_find_socket_byname(struct net *net,
297 struct sockaddr_un *sunname,
303 spin_lock(&unix_table_lock);
304 s = __unix_find_socket_byname(net, sunname, len, type, hash);
307 spin_unlock(&unix_table_lock);
311 static struct sock *unix_find_socket_byinode(struct inode *i)
315 spin_lock(&unix_table_lock);
317 &unix_socket_table[i->i_ino & (UNIX_HASH_SIZE - 1)]) {
318 struct dentry *dentry = unix_sk(s)->path.dentry;
320 if (dentry && d_real_inode(dentry) == i) {
327 spin_unlock(&unix_table_lock);
331 /* Support code for asymmetrically connected dgram sockets
333 * If a datagram socket is connected to a socket not itself connected
334 * to the first socket (eg, /dev/log), clients may only enqueue more
335 * messages if the present receive queue of the server socket is not
336 * "too large". This means there's a second writeability condition
337 * poll and sendmsg need to test. The dgram recv code will do a wake
338 * up on the peer_wait wait queue of a socket upon reception of a
339 * datagram which needs to be propagated to sleeping would-be writers
340 * since these might not have sent anything so far. This can't be
341 * accomplished via poll_wait because the lifetime of the server
342 * socket might be less than that of its clients if these break their
343 * association with it or if the server socket is closed while clients
344 * are still connected to it and there's no way to inform "a polling
345 * implementation" that it should let go of a certain wait queue
347 * In order to propagate a wake up, a wait_queue_t of the client
348 * socket is enqueued on the peer_wait queue of the server socket
349 * whose wake function does a wake_up on the ordinary client socket
350 * wait queue. This connection is established whenever a write (or
351 * poll for write) hit the flow control condition and broken when the
352 * association to the server socket is dissolved or after a wake up
356 static int unix_dgram_peer_wake_relay(wait_queue_t *q, unsigned mode, int flags,
360 wait_queue_head_t *u_sleep;
362 u = container_of(q, struct unix_sock, peer_wake);
364 __remove_wait_queue(&unix_sk(u->peer_wake.private)->peer_wait,
366 u->peer_wake.private = NULL;
368 /* relaying can only happen while the wq still exists */
369 u_sleep = sk_sleep(&u->sk);
371 wake_up_interruptible_poll(u_sleep, key);
376 static int unix_dgram_peer_wake_connect(struct sock *sk, struct sock *other)
378 struct unix_sock *u, *u_other;
382 u_other = unix_sk(other);
384 spin_lock(&u_other->peer_wait.lock);
386 if (!u->peer_wake.private) {
387 u->peer_wake.private = other;
388 __add_wait_queue(&u_other->peer_wait, &u->peer_wake);
393 spin_unlock(&u_other->peer_wait.lock);
397 static void unix_dgram_peer_wake_disconnect(struct sock *sk,
400 struct unix_sock *u, *u_other;
403 u_other = unix_sk(other);
404 spin_lock(&u_other->peer_wait.lock);
406 if (u->peer_wake.private == other) {
407 __remove_wait_queue(&u_other->peer_wait, &u->peer_wake);
408 u->peer_wake.private = NULL;
411 spin_unlock(&u_other->peer_wait.lock);
414 static void unix_dgram_peer_wake_disconnect_wakeup(struct sock *sk,
417 unix_dgram_peer_wake_disconnect(sk, other);
418 wake_up_interruptible_poll(sk_sleep(sk),
425 * - unix_peer(sk) == other
426 * - association is stable
428 static int unix_dgram_peer_wake_me(struct sock *sk, struct sock *other)
432 connected = unix_dgram_peer_wake_connect(sk, other);
434 if (unix_recvq_full(other))
438 unix_dgram_peer_wake_disconnect(sk, other);
443 static int unix_writable(const struct sock *sk)
445 return sk->sk_state != TCP_LISTEN &&
446 (atomic_read(&sk->sk_wmem_alloc) << 2) <= sk->sk_sndbuf;
449 static void unix_write_space(struct sock *sk)
451 struct socket_wq *wq;
454 if (unix_writable(sk)) {
455 wq = rcu_dereference(sk->sk_wq);
456 if (wq_has_sleeper(wq))
457 wake_up_interruptible_sync_poll(&wq->wait,
458 POLLOUT | POLLWRNORM | POLLWRBAND);
459 sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT);
464 /* When dgram socket disconnects (or changes its peer), we clear its receive
465 * queue of packets arrived from previous peer. First, it allows to do
466 * flow control based only on wmem_alloc; second, sk connected to peer
467 * may receive messages only from that peer. */
468 static void unix_dgram_disconnected(struct sock *sk, struct sock *other)
470 if (!skb_queue_empty(&sk->sk_receive_queue)) {
471 skb_queue_purge(&sk->sk_receive_queue);
472 wake_up_interruptible_all(&unix_sk(sk)->peer_wait);
474 /* If one link of bidirectional dgram pipe is disconnected,
475 * we signal error. Messages are lost. Do not make this,
476 * when peer was not connected to us.
478 if (!sock_flag(other, SOCK_DEAD) && unix_peer(other) == sk) {
479 other->sk_err = ECONNRESET;
480 other->sk_error_report(other);
485 static void unix_sock_destructor(struct sock *sk)
487 struct unix_sock *u = unix_sk(sk);
489 skb_queue_purge(&sk->sk_receive_queue);
491 WARN_ON(atomic_read(&sk->sk_wmem_alloc));
492 WARN_ON(!sk_unhashed(sk));
493 WARN_ON(sk->sk_socket);
494 if (!sock_flag(sk, SOCK_DEAD)) {
495 pr_info("Attempt to release alive unix socket: %p\n", sk);
500 unix_release_addr(u->addr);
502 atomic_long_dec(&unix_nr_socks);
504 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
506 #ifdef UNIX_REFCNT_DEBUG
507 pr_debug("UNIX %p is destroyed, %ld are still alive.\n", sk,
508 atomic_long_read(&unix_nr_socks));
512 static void unix_release_sock(struct sock *sk, int embrion)
514 struct unix_sock *u = unix_sk(sk);
520 unix_remove_socket(sk);
525 sk->sk_shutdown = SHUTDOWN_MASK;
527 u->path.dentry = NULL;
529 state = sk->sk_state;
530 sk->sk_state = TCP_CLOSE;
531 unix_state_unlock(sk);
533 wake_up_interruptible_all(&u->peer_wait);
535 skpair = unix_peer(sk);
537 if (skpair != NULL) {
538 if (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) {
539 unix_state_lock(skpair);
541 skpair->sk_shutdown = SHUTDOWN_MASK;
542 if (!skb_queue_empty(&sk->sk_receive_queue) || embrion)
543 skpair->sk_err = ECONNRESET;
544 unix_state_unlock(skpair);
545 skpair->sk_state_change(skpair);
546 sk_wake_async(skpair, SOCK_WAKE_WAITD, POLL_HUP);
549 unix_dgram_peer_wake_disconnect(sk, skpair);
550 sock_put(skpair); /* It may now die */
551 unix_peer(sk) = NULL;
554 /* Try to flush out this socket. Throw out buffers at least */
556 while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) {
557 if (state == TCP_LISTEN)
558 unix_release_sock(skb->sk, 1);
559 /* passed fds are erased in the kfree_skb hook */
560 UNIXCB(skb).consumed = skb->len;
569 /* ---- Socket is dead now and most probably destroyed ---- */
572 * Fixme: BSD difference: In BSD all sockets connected to us get
573 * ECONNRESET and we die on the spot. In Linux we behave
574 * like files and pipes do and wait for the last
577 * Can't we simply set sock->err?
579 * What the above comment does talk about? --ANK(980817)
582 if (unix_tot_inflight)
583 unix_gc(); /* Garbage collect fds */
586 static void init_peercred(struct sock *sk)
588 put_pid(sk->sk_peer_pid);
589 if (sk->sk_peer_cred)
590 put_cred(sk->sk_peer_cred);
591 sk->sk_peer_pid = get_pid(task_tgid(current));
592 sk->sk_peer_cred = get_current_cred();
595 static void copy_peercred(struct sock *sk, struct sock *peersk)
597 put_pid(sk->sk_peer_pid);
598 if (sk->sk_peer_cred)
599 put_cred(sk->sk_peer_cred);
600 sk->sk_peer_pid = get_pid(peersk->sk_peer_pid);
601 sk->sk_peer_cred = get_cred(peersk->sk_peer_cred);
604 static int unix_listen(struct socket *sock, int backlog)
607 struct sock *sk = sock->sk;
608 struct unix_sock *u = unix_sk(sk);
609 struct pid *old_pid = NULL;
612 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
613 goto out; /* Only stream/seqpacket sockets accept */
616 goto out; /* No listens on an unbound socket */
618 if (sk->sk_state != TCP_CLOSE && sk->sk_state != TCP_LISTEN)
620 if (backlog > sk->sk_max_ack_backlog)
621 wake_up_interruptible_all(&u->peer_wait);
622 sk->sk_max_ack_backlog = backlog;
623 sk->sk_state = TCP_LISTEN;
624 /* set credentials so connect can copy them */
629 unix_state_unlock(sk);
635 static int unix_release(struct socket *);
636 static int unix_bind(struct socket *, struct sockaddr *, int);
637 static int unix_stream_connect(struct socket *, struct sockaddr *,
638 int addr_len, int flags);
639 static int unix_socketpair(struct socket *, struct socket *);
640 static int unix_accept(struct socket *, struct socket *, int);
641 static int unix_getname(struct socket *, struct sockaddr *, int *, int);
642 static unsigned int unix_poll(struct file *, struct socket *, poll_table *);
643 static unsigned int unix_dgram_poll(struct file *, struct socket *,
645 static int unix_ioctl(struct socket *, unsigned int, unsigned long);
646 static int unix_shutdown(struct socket *, int);
647 static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t);
648 static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int);
649 static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset,
650 size_t size, int flags);
651 static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos,
652 struct pipe_inode_info *, size_t size,
654 static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t);
655 static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int);
656 static int unix_dgram_connect(struct socket *, struct sockaddr *,
658 static int unix_seqpacket_sendmsg(struct socket *, struct msghdr *, size_t);
659 static int unix_seqpacket_recvmsg(struct socket *, struct msghdr *, size_t,
662 static int unix_set_peek_off(struct sock *sk, int val)
664 struct unix_sock *u = unix_sk(sk);
666 if (mutex_lock_interruptible(&u->iolock))
669 sk->sk_peek_off = val;
670 mutex_unlock(&u->iolock);
676 static const struct proto_ops unix_stream_ops = {
678 .owner = THIS_MODULE,
679 .release = unix_release,
681 .connect = unix_stream_connect,
682 .socketpair = unix_socketpair,
683 .accept = unix_accept,
684 .getname = unix_getname,
687 .listen = unix_listen,
688 .shutdown = unix_shutdown,
689 .setsockopt = sock_no_setsockopt,
690 .getsockopt = sock_no_getsockopt,
691 .sendmsg = unix_stream_sendmsg,
692 .recvmsg = unix_stream_recvmsg,
693 .mmap = sock_no_mmap,
694 .sendpage = unix_stream_sendpage,
695 .splice_read = unix_stream_splice_read,
696 .set_peek_off = unix_set_peek_off,
699 static const struct proto_ops unix_dgram_ops = {
701 .owner = THIS_MODULE,
702 .release = unix_release,
704 .connect = unix_dgram_connect,
705 .socketpair = unix_socketpair,
706 .accept = sock_no_accept,
707 .getname = unix_getname,
708 .poll = unix_dgram_poll,
710 .listen = sock_no_listen,
711 .shutdown = unix_shutdown,
712 .setsockopt = sock_no_setsockopt,
713 .getsockopt = sock_no_getsockopt,
714 .sendmsg = unix_dgram_sendmsg,
715 .recvmsg = unix_dgram_recvmsg,
716 .mmap = sock_no_mmap,
717 .sendpage = sock_no_sendpage,
718 .set_peek_off = unix_set_peek_off,
721 static const struct proto_ops unix_seqpacket_ops = {
723 .owner = THIS_MODULE,
724 .release = unix_release,
726 .connect = unix_stream_connect,
727 .socketpair = unix_socketpair,
728 .accept = unix_accept,
729 .getname = unix_getname,
730 .poll = unix_dgram_poll,
732 .listen = unix_listen,
733 .shutdown = unix_shutdown,
734 .setsockopt = sock_no_setsockopt,
735 .getsockopt = sock_no_getsockopt,
736 .sendmsg = unix_seqpacket_sendmsg,
737 .recvmsg = unix_seqpacket_recvmsg,
738 .mmap = sock_no_mmap,
739 .sendpage = sock_no_sendpage,
740 .set_peek_off = unix_set_peek_off,
743 static struct proto unix_proto = {
745 .owner = THIS_MODULE,
746 .obj_size = sizeof(struct unix_sock),
750 * AF_UNIX sockets do not interact with hardware, hence they
751 * dont trigger interrupts - so it's safe for them to have
752 * bh-unsafe locking for their sk_receive_queue.lock. Split off
753 * this special lock-class by reinitializing the spinlock key:
755 static struct lock_class_key af_unix_sk_receive_queue_lock_key;
757 static struct sock *unix_create1(struct net *net, struct socket *sock, int kern)
759 struct sock *sk = NULL;
762 atomic_long_inc(&unix_nr_socks);
763 if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files())
766 sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern);
770 sock_init_data(sock, sk);
771 lockdep_set_class(&sk->sk_receive_queue.lock,
772 &af_unix_sk_receive_queue_lock_key);
774 sk->sk_write_space = unix_write_space;
775 sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen;
776 sk->sk_destruct = unix_sock_destructor;
778 u->path.dentry = NULL;
780 spin_lock_init(&u->lock);
781 atomic_long_set(&u->inflight, 0);
782 INIT_LIST_HEAD(&u->link);
783 mutex_init(&u->iolock); /* single task reading lock */
784 mutex_init(&u->bindlock); /* single task binding lock */
785 init_waitqueue_head(&u->peer_wait);
786 init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay);
787 unix_insert_socket(unix_sockets_unbound(sk), sk);
790 atomic_long_dec(&unix_nr_socks);
793 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
799 static int unix_create(struct net *net, struct socket *sock, int protocol,
802 if (protocol && protocol != PF_UNIX)
803 return -EPROTONOSUPPORT;
805 sock->state = SS_UNCONNECTED;
807 switch (sock->type) {
809 sock->ops = &unix_stream_ops;
812 * Believe it or not BSD has AF_UNIX, SOCK_RAW though
816 sock->type = SOCK_DGRAM;
818 sock->ops = &unix_dgram_ops;
821 sock->ops = &unix_seqpacket_ops;
824 return -ESOCKTNOSUPPORT;
827 return unix_create1(net, sock, kern) ? 0 : -ENOMEM;
830 static int unix_release(struct socket *sock)
832 struct sock *sk = sock->sk;
837 unix_release_sock(sk, 0);
843 static int unix_autobind(struct socket *sock)
845 struct sock *sk = sock->sk;
846 struct net *net = sock_net(sk);
847 struct unix_sock *u = unix_sk(sk);
848 static u32 ordernum = 1;
849 struct unix_address *addr;
851 unsigned int retries = 0;
853 err = mutex_lock_interruptible(&u->bindlock);
862 addr = kzalloc(sizeof(*addr) + sizeof(short) + 16, GFP_KERNEL);
866 addr->name->sun_family = AF_UNIX;
867 atomic_set(&addr->refcnt, 1);
870 addr->len = sprintf(addr->name->sun_path+1, "%05x", ordernum) + 1 + sizeof(short);
871 addr->hash = unix_hash_fold(csum_partial(addr->name, addr->len, 0));
873 spin_lock(&unix_table_lock);
874 ordernum = (ordernum+1)&0xFFFFF;
876 if (__unix_find_socket_byname(net, addr->name, addr->len, sock->type,
878 spin_unlock(&unix_table_lock);
880 * __unix_find_socket_byname() may take long time if many names
881 * are already in use.
884 /* Give up if all names seems to be in use. */
885 if (retries++ == 0xFFFFF) {
892 addr->hash ^= sk->sk_type;
894 __unix_remove_socket(sk);
895 smp_store_release(&u->addr, addr);
896 __unix_insert_socket(&unix_socket_table[addr->hash], sk);
897 spin_unlock(&unix_table_lock);
900 out: mutex_unlock(&u->bindlock);
904 static struct sock *unix_find_other(struct net *net,
905 struct sockaddr_un *sunname, int len,
906 int type, unsigned int hash, int *error)
912 if (sunname->sun_path[0]) {
914 err = kern_path(sunname->sun_path, LOOKUP_FOLLOW, &path);
917 inode = d_real_inode(path.dentry);
918 err = inode_permission(inode, MAY_WRITE);
923 if (!S_ISSOCK(inode->i_mode))
925 u = unix_find_socket_byinode(inode);
929 if (u->sk_type == type)
935 if (u->sk_type != type) {
941 u = unix_find_socket_byname(net, sunname, len, type, hash);
943 struct dentry *dentry;
944 dentry = unix_sk(u)->path.dentry;
946 touch_atime(&unix_sk(u)->path);
959 static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
961 struct dentry *dentry;
965 * Get the parent directory, calculate the hash for last
968 dentry = kern_path_create(AT_FDCWD, sun_path, &path, 0);
969 err = PTR_ERR(dentry);
974 * All right, let's create it.
976 err = security_path_mknod(&path, dentry, mode, 0);
978 err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
980 res->mnt = mntget(path.mnt);
981 res->dentry = dget(dentry);
984 done_path_create(&path, dentry);
988 static int unix_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
990 struct sock *sk = sock->sk;
991 struct net *net = sock_net(sk);
992 struct unix_sock *u = unix_sk(sk);
993 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
994 char *sun_path = sunaddr->sun_path;
997 struct unix_address *addr;
998 struct hlist_head *list;
999 struct path path = { NULL, NULL };
1002 if (addr_len < offsetofend(struct sockaddr_un, sun_family) ||
1003 sunaddr->sun_family != AF_UNIX)
1006 if (addr_len == sizeof(short)) {
1007 err = unix_autobind(sock);
1011 err = unix_mkname(sunaddr, addr_len, &hash);
1017 umode_t mode = S_IFSOCK |
1018 (SOCK_INODE(sock)->i_mode & ~current_umask());
1019 err = unix_mknod(sun_path, mode, &path);
1027 err = mutex_lock_interruptible(&u->bindlock);
1036 addr = kmalloc(sizeof(*addr)+addr_len, GFP_KERNEL);
1040 memcpy(addr->name, sunaddr, addr_len);
1041 addr->len = addr_len;
1042 addr->hash = hash ^ sk->sk_type;
1043 atomic_set(&addr->refcnt, 1);
1046 addr->hash = UNIX_HASH_SIZE;
1047 hash = d_real_inode(path.dentry)->i_ino & (UNIX_HASH_SIZE - 1);
1048 spin_lock(&unix_table_lock);
1050 list = &unix_socket_table[hash];
1052 spin_lock(&unix_table_lock);
1054 if (__unix_find_socket_byname(net, sunaddr, addr_len,
1055 sk->sk_type, hash)) {
1056 unix_release_addr(addr);
1060 list = &unix_socket_table[addr->hash];
1064 __unix_remove_socket(sk);
1065 smp_store_release(&u->addr, addr);
1066 __unix_insert_socket(list, sk);
1069 spin_unlock(&unix_table_lock);
1071 mutex_unlock(&u->bindlock);
1079 static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
1081 if (unlikely(sk1 == sk2) || !sk2) {
1082 unix_state_lock(sk1);
1086 unix_state_lock(sk1);
1087 unix_state_lock_nested(sk2);
1089 unix_state_lock(sk2);
1090 unix_state_lock_nested(sk1);
1094 static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
1096 if (unlikely(sk1 == sk2) || !sk2) {
1097 unix_state_unlock(sk1);
1100 unix_state_unlock(sk1);
1101 unix_state_unlock(sk2);
1104 static int unix_dgram_connect(struct socket *sock, struct sockaddr *addr,
1105 int alen, int flags)
1107 struct sock *sk = sock->sk;
1108 struct net *net = sock_net(sk);
1109 struct sockaddr_un *sunaddr = (struct sockaddr_un *)addr;
1115 if (alen < offsetofend(struct sockaddr, sa_family))
1118 if (addr->sa_family != AF_UNSPEC) {
1119 err = unix_mkname(sunaddr, alen, &hash);
1124 if (test_bit(SOCK_PASSCRED, &sock->flags) &&
1125 !unix_sk(sk)->addr && (err = unix_autobind(sock)) != 0)
1129 other = unix_find_other(net, sunaddr, alen, sock->type, hash, &err);
1133 unix_state_double_lock(sk, other);
1135 /* Apparently VFS overslept socket death. Retry. */
1136 if (sock_flag(other, SOCK_DEAD)) {
1137 unix_state_double_unlock(sk, other);
1143 if (!unix_may_send(sk, other))
1146 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1152 * 1003.1g breaking connected state with AF_UNSPEC
1155 unix_state_double_lock(sk, other);
1159 * If it was connected, reconnect.
1161 if (unix_peer(sk)) {
1162 struct sock *old_peer = unix_peer(sk);
1163 unix_peer(sk) = other;
1164 unix_dgram_peer_wake_disconnect_wakeup(sk, old_peer);
1166 unix_state_double_unlock(sk, other);
1168 if (other != old_peer)
1169 unix_dgram_disconnected(sk, old_peer);
1172 unix_peer(sk) = other;
1173 unix_state_double_unlock(sk, other);
1178 unix_state_double_unlock(sk, other);
1184 static long unix_wait_for_peer(struct sock *other, long timeo)
1186 struct unix_sock *u = unix_sk(other);
1190 prepare_to_wait_exclusive(&u->peer_wait, &wait, TASK_INTERRUPTIBLE);
1192 sched = !sock_flag(other, SOCK_DEAD) &&
1193 !(other->sk_shutdown & RCV_SHUTDOWN) &&
1194 unix_recvq_full(other);
1196 unix_state_unlock(other);
1199 timeo = schedule_timeout(timeo);
1201 finish_wait(&u->peer_wait, &wait);
1205 static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr,
1206 int addr_len, int flags)
1208 struct sockaddr_un *sunaddr = (struct sockaddr_un *)uaddr;
1209 struct sock *sk = sock->sk;
1210 struct net *net = sock_net(sk);
1211 struct unix_sock *u = unix_sk(sk), *newu, *otheru;
1212 struct sock *newsk = NULL;
1213 struct sock *other = NULL;
1214 struct sk_buff *skb = NULL;
1220 err = unix_mkname(sunaddr, addr_len, &hash);
1225 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr &&
1226 (err = unix_autobind(sock)) != 0)
1229 timeo = sock_sndtimeo(sk, flags & O_NONBLOCK);
1231 /* First of all allocate resources.
1232 If we will make it after state is locked,
1233 we will have to recheck all again in any case.
1238 /* create new sock for complete connection */
1239 newsk = unix_create1(sock_net(sk), NULL, 0);
1243 /* Allocate skb for sending to listening sock */
1244 skb = sock_wmalloc(newsk, 1, 0, GFP_KERNEL);
1249 /* Find listening sock. */
1250 other = unix_find_other(net, sunaddr, addr_len, sk->sk_type, hash, &err);
1254 /* Latch state of peer */
1255 unix_state_lock(other);
1257 /* Apparently VFS overslept socket death. Retry. */
1258 if (sock_flag(other, SOCK_DEAD)) {
1259 unix_state_unlock(other);
1264 err = -ECONNREFUSED;
1265 if (other->sk_state != TCP_LISTEN)
1267 if (other->sk_shutdown & RCV_SHUTDOWN)
1270 if (unix_recvq_full(other)) {
1275 timeo = unix_wait_for_peer(other, timeo);
1277 err = sock_intr_errno(timeo);
1278 if (signal_pending(current))
1286 It is tricky place. We need to grab our state lock and cannot
1287 drop lock on peer. It is dangerous because deadlock is
1288 possible. Connect to self case and simultaneous
1289 attempt to connect are eliminated by checking socket
1290 state. other is TCP_LISTEN, if sk is TCP_LISTEN we
1291 check this before attempt to grab lock.
1293 Well, and we have to recheck the state after socket locked.
1299 /* This is ok... continue with connect */
1301 case TCP_ESTABLISHED:
1302 /* Socket is already connected */
1310 unix_state_lock_nested(sk);
1312 if (sk->sk_state != st) {
1313 unix_state_unlock(sk);
1314 unix_state_unlock(other);
1319 err = security_unix_stream_connect(sk, other, newsk);
1321 unix_state_unlock(sk);
1325 /* The way is open! Fastly set all the necessary fields... */
1328 unix_peer(newsk) = sk;
1329 newsk->sk_state = TCP_ESTABLISHED;
1330 newsk->sk_type = sk->sk_type;
1331 init_peercred(newsk);
1332 newu = unix_sk(newsk);
1333 RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq);
1334 otheru = unix_sk(other);
1336 /* copy address information from listening to new sock
1338 * The contents of *(otheru->addr) and otheru->path
1339 * are seen fully set up here, since we have found
1340 * otheru in hash under unix_table_lock. Insertion
1341 * into the hash chain we'd found it in had been done
1342 * in an earlier critical area protected by unix_table_lock,
1343 * the same one where we'd set *(otheru->addr) contents,
1344 * as well as otheru->path and otheru->addr itself.
1346 * Using smp_store_release() here to set newu->addr
1347 * is enough to make those stores, as well as stores
1348 * to newu->path visible to anyone who gets newu->addr
1349 * by smp_load_acquire(). IOW, the same warranties
1350 * as for unix_sock instances bound in unix_bind() or
1351 * in unix_autobind().
1353 if (otheru->path.dentry) {
1354 path_get(&otheru->path);
1355 newu->path = otheru->path;
1357 atomic_inc(&otheru->addr->refcnt);
1358 smp_store_release(&newu->addr, otheru->addr);
1360 /* Set credentials */
1361 copy_peercred(sk, other);
1363 sock->state = SS_CONNECTED;
1364 sk->sk_state = TCP_ESTABLISHED;
1367 smp_mb__after_atomic(); /* sock_hold() does an atomic_inc() */
1368 unix_peer(sk) = newsk;
1370 unix_state_unlock(sk);
1372 /* take ten and and send info to listening sock */
1373 spin_lock(&other->sk_receive_queue.lock);
1374 __skb_queue_tail(&other->sk_receive_queue, skb);
1375 spin_unlock(&other->sk_receive_queue.lock);
1376 unix_state_unlock(other);
1377 other->sk_data_ready(other);
1383 unix_state_unlock(other);
1388 unix_release_sock(newsk, 0);
1394 static int unix_socketpair(struct socket *socka, struct socket *sockb)
1396 struct sock *ska = socka->sk, *skb = sockb->sk;
1398 /* Join our sockets back to back */
1401 unix_peer(ska) = skb;
1402 unix_peer(skb) = ska;
1406 if (ska->sk_type != SOCK_DGRAM) {
1407 ska->sk_state = TCP_ESTABLISHED;
1408 skb->sk_state = TCP_ESTABLISHED;
1409 socka->state = SS_CONNECTED;
1410 sockb->state = SS_CONNECTED;
1415 static void unix_sock_inherit_flags(const struct socket *old,
1418 if (test_bit(SOCK_PASSCRED, &old->flags))
1419 set_bit(SOCK_PASSCRED, &new->flags);
1420 if (test_bit(SOCK_PASSSEC, &old->flags))
1421 set_bit(SOCK_PASSSEC, &new->flags);
1424 static int unix_accept(struct socket *sock, struct socket *newsock, int flags)
1426 struct sock *sk = sock->sk;
1428 struct sk_buff *skb;
1432 if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET)
1436 if (sk->sk_state != TCP_LISTEN)
1439 /* If socket state is TCP_LISTEN it cannot change (for now...),
1440 * so that no locks are necessary.
1443 skb = skb_recv_datagram(sk, 0, flags&O_NONBLOCK, &err);
1445 /* This means receive shutdown. */
1452 skb_free_datagram(sk, skb);
1453 wake_up_interruptible(&unix_sk(sk)->peer_wait);
1455 /* attach accepted sock to socket */
1456 unix_state_lock(tsk);
1457 newsock->state = SS_CONNECTED;
1458 unix_sock_inherit_flags(sock, newsock);
1459 sock_graft(tsk, newsock);
1460 unix_state_unlock(tsk);
1468 static int unix_getname(struct socket *sock, struct sockaddr *uaddr, int *uaddr_len, int peer)
1470 struct sock *sk = sock->sk;
1471 struct unix_address *addr;
1472 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, uaddr);
1476 sk = unix_peer_get(sk);
1486 addr = smp_load_acquire(&unix_sk(sk)->addr);
1488 sunaddr->sun_family = AF_UNIX;
1489 sunaddr->sun_path[0] = 0;
1490 *uaddr_len = sizeof(short);
1492 *uaddr_len = addr->len;
1493 memcpy(sunaddr, addr->name, *uaddr_len);
1500 static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1504 scm->fp = UNIXCB(skb).fp;
1505 UNIXCB(skb).fp = NULL;
1507 for (i = scm->fp->count-1; i >= 0; i--)
1508 unix_notinflight(scm->fp->user, scm->fp->fp[i]);
1511 static void unix_destruct_scm(struct sk_buff *skb)
1513 struct scm_cookie scm;
1514 memset(&scm, 0, sizeof(scm));
1515 scm.pid = UNIXCB(skb).pid;
1517 unix_detach_fds(&scm, skb);
1519 /* Alas, it calls VFS */
1520 /* So fscking what? fput() had been SMP-safe since the last Summer */
1526 * The "user->unix_inflight" variable is protected by the garbage
1527 * collection lock, and we just read it locklessly here. If you go
1528 * over the limit, there might be a tiny race in actually noticing
1529 * it across threads. Tough.
1531 static inline bool too_many_unix_fds(struct task_struct *p)
1533 struct user_struct *user = current_user();
1535 if (unlikely(user->unix_inflight > task_rlimit(p, RLIMIT_NOFILE)))
1536 return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN);
1540 #define MAX_RECURSION_LEVEL 4
1542 static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
1545 unsigned char max_level = 0;
1547 if (too_many_unix_fds(current))
1548 return -ETOOMANYREFS;
1550 for (i = scm->fp->count - 1; i >= 0; i--) {
1551 struct sock *sk = unix_get_socket(scm->fp->fp[i]);
1554 max_level = max(max_level,
1555 unix_sk(sk)->recursion_level);
1557 if (unlikely(max_level > MAX_RECURSION_LEVEL))
1558 return -ETOOMANYREFS;
1561 * Need to duplicate file references for the sake of garbage
1562 * collection. Otherwise a socket in the fps might become a
1563 * candidate for GC while the skb is not yet queued.
1565 UNIXCB(skb).fp = scm_fp_dup(scm->fp);
1566 if (!UNIXCB(skb).fp)
1569 for (i = scm->fp->count - 1; i >= 0; i--)
1570 unix_inflight(scm->fp->user, scm->fp->fp[i]);
1574 static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds)
1578 UNIXCB(skb).pid = get_pid(scm->pid);
1579 UNIXCB(skb).uid = scm->creds.uid;
1580 UNIXCB(skb).gid = scm->creds.gid;
1581 UNIXCB(skb).fp = NULL;
1582 unix_get_secdata(scm, skb);
1583 if (scm->fp && send_fds)
1584 err = unix_attach_fds(scm, skb);
1586 skb->destructor = unix_destruct_scm;
1590 static bool unix_passcred_enabled(const struct socket *sock,
1591 const struct sock *other)
1593 return test_bit(SOCK_PASSCRED, &sock->flags) ||
1594 !other->sk_socket ||
1595 test_bit(SOCK_PASSCRED, &other->sk_socket->flags);
1599 * Some apps rely on write() giving SCM_CREDENTIALS
1600 * We include credentials if source or destination socket
1601 * asserted SOCK_PASSCRED.
1603 static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
1604 const struct sock *other)
1606 if (UNIXCB(skb).pid)
1608 if (unix_passcred_enabled(sock, other)) {
1609 UNIXCB(skb).pid = get_pid(task_tgid(current));
1610 current_uid_gid(&UNIXCB(skb).uid, &UNIXCB(skb).gid);
1614 static int maybe_init_creds(struct scm_cookie *scm,
1615 struct socket *socket,
1616 const struct sock *other)
1619 struct msghdr msg = { .msg_controllen = 0 };
1621 err = scm_send(socket, &msg, scm, false);
1625 if (unix_passcred_enabled(socket, other)) {
1626 scm->pid = get_pid(task_tgid(current));
1627 current_uid_gid(&scm->creds.uid, &scm->creds.gid);
1632 static bool unix_skb_scm_eq(struct sk_buff *skb,
1633 struct scm_cookie *scm)
1635 const struct unix_skb_parms *u = &UNIXCB(skb);
1637 return u->pid == scm->pid &&
1638 uid_eq(u->uid, scm->creds.uid) &&
1639 gid_eq(u->gid, scm->creds.gid) &&
1640 unix_secdata_eq(scm, skb);
1644 * Send AF_UNIX data.
1647 static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg,
1650 struct sock *sk = sock->sk;
1651 struct net *net = sock_net(sk);
1652 struct unix_sock *u = unix_sk(sk);
1653 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
1654 struct sock *other = NULL;
1655 int namelen = 0; /* fake GCC */
1658 struct sk_buff *skb;
1660 struct scm_cookie scm;
1666 err = scm_send(sock, msg, &scm, false);
1671 if (msg->msg_flags&MSG_OOB)
1674 if (msg->msg_namelen) {
1675 err = unix_mkname(sunaddr, msg->msg_namelen, &hash);
1682 other = unix_peer_get(sk);
1687 if (test_bit(SOCK_PASSCRED, &sock->flags) && !u->addr
1688 && (err = unix_autobind(sock)) != 0)
1692 if (len > sk->sk_sndbuf - 32)
1695 if (len > SKB_MAX_ALLOC) {
1696 data_len = min_t(size_t,
1697 len - SKB_MAX_ALLOC,
1698 MAX_SKB_FRAGS * PAGE_SIZE);
1699 data_len = PAGE_ALIGN(data_len);
1701 BUILD_BUG_ON(SKB_MAX_ALLOC < PAGE_SIZE);
1704 skb = sock_alloc_send_pskb(sk, len - data_len, data_len,
1705 msg->msg_flags & MSG_DONTWAIT, &err,
1706 PAGE_ALLOC_COSTLY_ORDER);
1710 err = unix_scm_to_skb(&scm, skb, true);
1713 max_level = err + 1;
1715 skb_put(skb, len - data_len);
1716 skb->data_len = data_len;
1718 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
1722 timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
1727 if (sunaddr == NULL)
1730 other = unix_find_other(net, sunaddr, namelen, sk->sk_type,
1736 if (sk_filter(other, skb) < 0) {
1737 /* Toss the packet but do not return any error to the sender */
1743 unix_state_lock(other);
1746 if (!unix_may_send(sk, other))
1749 if (unlikely(sock_flag(other, SOCK_DEAD))) {
1751 * Check with 1003.1g - what should
1754 unix_state_unlock(other);
1758 unix_state_lock(sk);
1761 if (unix_peer(sk) == other) {
1762 unix_peer(sk) = NULL;
1763 unix_dgram_peer_wake_disconnect_wakeup(sk, other);
1765 unix_state_unlock(sk);
1767 unix_dgram_disconnected(sk, other);
1769 err = -ECONNREFUSED;
1771 unix_state_unlock(sk);
1781 if (other->sk_shutdown & RCV_SHUTDOWN)
1784 if (sk->sk_type != SOCK_SEQPACKET) {
1785 err = security_unix_may_send(sk->sk_socket, other->sk_socket);
1790 /* other == sk && unix_peer(other) != sk if
1791 * - unix_peer(sk) == NULL, destination address bound to sk
1792 * - unix_peer(sk) == sk by time of get but disconnected before lock
1795 unlikely(unix_peer(other) != sk && unix_recvq_full(other))) {
1797 timeo = unix_wait_for_peer(other, timeo);
1799 err = sock_intr_errno(timeo);
1800 if (signal_pending(current))
1807 unix_state_unlock(other);
1808 unix_state_double_lock(sk, other);
1811 if (unix_peer(sk) != other ||
1812 unix_dgram_peer_wake_me(sk, other)) {
1820 goto restart_locked;
1824 if (unlikely(sk_locked))
1825 unix_state_unlock(sk);
1827 if (sock_flag(other, SOCK_RCVTSTAMP))
1828 __net_timestamp(skb);
1829 maybe_add_creds(skb, sock, other);
1830 skb_queue_tail(&other->sk_receive_queue, skb);
1831 if (max_level > unix_sk(other)->recursion_level)
1832 unix_sk(other)->recursion_level = max_level;
1833 unix_state_unlock(other);
1834 other->sk_data_ready(other);
1841 unix_state_unlock(sk);
1842 unix_state_unlock(other);
1852 /* We use paged skbs for stream sockets, and limit occupancy to 32768
1853 * bytes, and a minimun of a full page.
1855 #define UNIX_SKB_FRAGS_SZ (PAGE_SIZE << get_order(32768))
1857 static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg,
1860 struct sock *sk = sock->sk;
1861 struct sock *other = NULL;
1863 struct sk_buff *skb;
1865 struct scm_cookie scm;
1866 bool fds_sent = false;
1871 err = scm_send(sock, msg, &scm, false);
1876 if (msg->msg_flags&MSG_OOB)
1879 if (msg->msg_namelen) {
1880 err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP;
1884 other = unix_peer(sk);
1889 if (sk->sk_shutdown & SEND_SHUTDOWN)
1892 while (sent < len) {
1895 /* Keep two messages in the pipe so it schedules better */
1896 size = min_t(int, size, (sk->sk_sndbuf >> 1) - 64);
1898 /* allow fallback to order-0 allocations */
1899 size = min_t(int, size, SKB_MAX_HEAD(0) + UNIX_SKB_FRAGS_SZ);
1901 data_len = max_t(int, 0, size - SKB_MAX_HEAD(0));
1903 data_len = min_t(size_t, size, PAGE_ALIGN(data_len));
1905 skb = sock_alloc_send_pskb(sk, size - data_len, data_len,
1906 msg->msg_flags & MSG_DONTWAIT, &err,
1907 get_order(UNIX_SKB_FRAGS_SZ));
1911 /* Only send the fds in the first buffer */
1912 err = unix_scm_to_skb(&scm, skb, !fds_sent);
1917 max_level = err + 1;
1920 skb_put(skb, size - data_len);
1921 skb->data_len = data_len;
1923 err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
1929 unix_state_lock(other);
1931 if (sock_flag(other, SOCK_DEAD) ||
1932 (other->sk_shutdown & RCV_SHUTDOWN))
1935 maybe_add_creds(skb, sock, other);
1936 skb_queue_tail(&other->sk_receive_queue, skb);
1937 if (max_level > unix_sk(other)->recursion_level)
1938 unix_sk(other)->recursion_level = max_level;
1939 unix_state_unlock(other);
1940 other->sk_data_ready(other);
1949 unix_state_unlock(other);
1952 if (sent == 0 && !(msg->msg_flags&MSG_NOSIGNAL))
1953 send_sig(SIGPIPE, current, 0);
1957 return sent ? : err;
1960 static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page,
1961 int offset, size_t size, int flags)
1964 bool send_sigpipe = false;
1965 bool init_scm = true;
1966 struct scm_cookie scm;
1967 struct sock *other, *sk = socket->sk;
1968 struct sk_buff *skb, *newskb = NULL, *tail = NULL;
1970 if (flags & MSG_OOB)
1973 other = unix_peer(sk);
1974 if (!other || sk->sk_state != TCP_ESTABLISHED)
1979 unix_state_unlock(other);
1980 mutex_unlock(&unix_sk(other)->iolock);
1981 newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT,
1987 /* we must acquire iolock as we modify already present
1988 * skbs in the sk_receive_queue and mess with skb->len
1990 err = mutex_lock_interruptible(&unix_sk(other)->iolock);
1992 err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS;
1996 if (sk->sk_shutdown & SEND_SHUTDOWN) {
1998 send_sigpipe = true;
2002 unix_state_lock(other);
2004 if (sock_flag(other, SOCK_DEAD) ||
2005 other->sk_shutdown & RCV_SHUTDOWN) {
2007 send_sigpipe = true;
2008 goto err_state_unlock;
2012 err = maybe_init_creds(&scm, socket, other);
2014 goto err_state_unlock;
2018 skb = skb_peek_tail(&other->sk_receive_queue);
2019 if (tail && tail == skb) {
2021 } else if (!skb || !unix_skb_scm_eq(skb, &scm)) {
2028 } else if (newskb) {
2029 /* this is fast path, we don't necessarily need to
2030 * call to kfree_skb even though with newskb == NULL
2031 * this - does no harm
2033 consume_skb(newskb);
2037 if (skb_append_pagefrags(skb, page, offset, size)) {
2043 skb->data_len += size;
2044 skb->truesize += size;
2045 atomic_add(size, &sk->sk_wmem_alloc);
2048 err = unix_scm_to_skb(&scm, skb, false);
2050 goto err_state_unlock;
2051 spin_lock(&other->sk_receive_queue.lock);
2052 __skb_queue_tail(&other->sk_receive_queue, newskb);
2053 spin_unlock(&other->sk_receive_queue.lock);
2056 unix_state_unlock(other);
2057 mutex_unlock(&unix_sk(other)->iolock);
2059 other->sk_data_ready(other);
2064 unix_state_unlock(other);
2066 mutex_unlock(&unix_sk(other)->iolock);
2069 if (send_sigpipe && !(flags & MSG_NOSIGNAL))
2070 send_sig(SIGPIPE, current, 0);
2076 static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg,
2080 struct sock *sk = sock->sk;
2082 err = sock_error(sk);
2086 if (sk->sk_state != TCP_ESTABLISHED)
2089 if (msg->msg_namelen)
2090 msg->msg_namelen = 0;
2092 return unix_dgram_sendmsg(sock, msg, len);
2095 static int unix_seqpacket_recvmsg(struct socket *sock, struct msghdr *msg,
2096 size_t size, int flags)
2098 struct sock *sk = sock->sk;
2100 if (sk->sk_state != TCP_ESTABLISHED)
2103 return unix_dgram_recvmsg(sock, msg, size, flags);
2106 static void unix_copy_addr(struct msghdr *msg, struct sock *sk)
2108 struct unix_address *addr = smp_load_acquire(&unix_sk(sk)->addr);
2111 msg->msg_namelen = addr->len;
2112 memcpy(msg->msg_name, addr->name, addr->len);
2116 static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg,
2117 size_t size, int flags)
2119 struct scm_cookie scm;
2120 struct sock *sk = sock->sk;
2121 struct unix_sock *u = unix_sk(sk);
2122 int noblock = flags & MSG_DONTWAIT;
2123 struct sk_buff *skb;
2131 err = mutex_lock_interruptible(&u->iolock);
2132 if (unlikely(err)) {
2133 /* recvmsg() in non blocking mode is supposed to return -EAGAIN
2134 * sk_rcvtimeo is not honored by mutex_lock_interruptible()
2136 err = noblock ? -EAGAIN : -ERESTARTSYS;
2140 skip = sk_peek_offset(sk, flags);
2142 skb = __skb_recv_datagram(sk, flags, &peeked, &skip, &err);
2144 unix_state_lock(sk);
2145 /* Signal EOF on disconnected non-blocking SEQPACKET socket. */
2146 if (sk->sk_type == SOCK_SEQPACKET && err == -EAGAIN &&
2147 (sk->sk_shutdown & RCV_SHUTDOWN))
2149 unix_state_unlock(sk);
2153 wake_up_interruptible_sync_poll(&u->peer_wait,
2154 POLLOUT | POLLWRNORM | POLLWRBAND);
2157 unix_copy_addr(msg, skb->sk);
2159 if (size > skb->len - skip)
2160 size = skb->len - skip;
2161 else if (size < skb->len - skip)
2162 msg->msg_flags |= MSG_TRUNC;
2164 err = skb_copy_datagram_msg(skb, skip, msg, size);
2168 if (sock_flag(sk, SOCK_RCVTSTAMP))
2169 __sock_recv_timestamp(msg, sk, skb);
2171 memset(&scm, 0, sizeof(scm));
2173 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2174 unix_set_secdata(&scm, skb);
2176 if (!(flags & MSG_PEEK)) {
2178 unix_detach_fds(&scm, skb);
2180 sk_peek_offset_bwd(sk, skb->len);
2182 /* It is questionable: on PEEK we could:
2183 - do not return fds - good, but too simple 8)
2184 - return fds, and do not return them on read (old strategy,
2186 - clone fds (I chose it for now, it is the most universal
2189 POSIX 1003.1g does not actually define this clearly
2190 at all. POSIX 1003.1g doesn't define a lot of things
2195 sk_peek_offset_fwd(sk, size);
2198 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2200 err = (flags & MSG_TRUNC) ? skb->len - skip : size;
2202 scm_recv(sock, msg, &scm, flags);
2205 skb_free_datagram(sk, skb);
2207 mutex_unlock(&u->iolock);
2213 * Sleep until more data has arrived. But check for races..
2215 static long unix_stream_data_wait(struct sock *sk, long timeo,
2216 struct sk_buff *last, unsigned int last_len,
2219 struct sk_buff *tail;
2222 unix_state_lock(sk);
2225 prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE);
2227 tail = skb_peek_tail(&sk->sk_receive_queue);
2229 (tail && tail->len != last_len) ||
2231 (sk->sk_shutdown & RCV_SHUTDOWN) ||
2232 signal_pending(current) ||
2236 sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2237 unix_state_unlock(sk);
2239 timeo = freezable_schedule_timeout(timeo);
2241 timeo = schedule_timeout(timeo);
2242 unix_state_lock(sk);
2244 if (sock_flag(sk, SOCK_DEAD))
2247 sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
2250 finish_wait(sk_sleep(sk), &wait);
2251 unix_state_unlock(sk);
2255 static unsigned int unix_skb_len(const struct sk_buff *skb)
2257 return skb->len - UNIXCB(skb).consumed;
2260 struct unix_stream_read_state {
2261 int (*recv_actor)(struct sk_buff *, int, int,
2262 struct unix_stream_read_state *);
2263 struct socket *socket;
2265 struct pipe_inode_info *pipe;
2268 unsigned int splice_flags;
2271 static int unix_stream_read_generic(struct unix_stream_read_state *state,
2274 struct scm_cookie scm;
2275 struct socket *sock = state->socket;
2276 struct sock *sk = sock->sk;
2277 struct unix_sock *u = unix_sk(sk);
2279 int flags = state->flags;
2280 int noblock = flags & MSG_DONTWAIT;
2281 bool check_creds = false;
2286 size_t size = state->size;
2287 unsigned int last_len;
2289 if (unlikely(sk->sk_state != TCP_ESTABLISHED)) {
2294 if (unlikely(flags & MSG_OOB)) {
2299 target = sock_rcvlowat(sk, flags & MSG_WAITALL, size);
2300 timeo = sock_rcvtimeo(sk, noblock);
2302 memset(&scm, 0, sizeof(scm));
2304 /* Lock the socket to prevent queue disordering
2305 * while sleeps in memcpy_tomsg
2307 mutex_lock(&u->iolock);
2309 if (flags & MSG_PEEK)
2310 skip = sk_peek_offset(sk, flags);
2317 struct sk_buff *skb, *last;
2319 unix_state_lock(sk);
2320 if (sock_flag(sk, SOCK_DEAD)) {
2324 last = skb = skb_peek(&sk->sk_receive_queue);
2325 last_len = last ? last->len : 0;
2328 unix_sk(sk)->recursion_level = 0;
2329 if (copied >= target)
2333 * POSIX 1003.1g mandates this order.
2336 err = sock_error(sk);
2339 if (sk->sk_shutdown & RCV_SHUTDOWN)
2342 unix_state_unlock(sk);
2348 mutex_unlock(&u->iolock);
2350 timeo = unix_stream_data_wait(sk, timeo, last,
2351 last_len, freezable);
2353 if (signal_pending(current)) {
2354 err = sock_intr_errno(timeo);
2359 mutex_lock(&u->iolock);
2362 unix_state_unlock(sk);
2366 while (skip >= unix_skb_len(skb)) {
2367 skip -= unix_skb_len(skb);
2369 last_len = skb->len;
2370 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2375 unix_state_unlock(sk);
2378 /* Never glue messages from different writers */
2379 if (!unix_skb_scm_eq(skb, &scm))
2381 } else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
2382 /* Copy credentials */
2383 scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
2384 unix_set_secdata(&scm, skb);
2388 /* Copy address just once */
2389 if (state->msg && state->msg->msg_name) {
2390 DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr,
2391 state->msg->msg_name);
2392 unix_copy_addr(state->msg, skb->sk);
2396 chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
2398 chunk = state->recv_actor(skb, skip, chunk, state);
2399 drop_skb = !unix_skb_len(skb);
2400 /* skb is only safe to use if !drop_skb */
2411 /* the skb was touched by a concurrent reader;
2412 * we should not expect anything from this skb
2413 * anymore and assume it invalid - we can be
2414 * sure it was dropped from the socket queue
2416 * let's report a short read
2422 /* Mark read part of skb as used */
2423 if (!(flags & MSG_PEEK)) {
2424 UNIXCB(skb).consumed += chunk;
2426 sk_peek_offset_bwd(sk, chunk);
2429 unix_detach_fds(&scm, skb);
2431 if (unix_skb_len(skb))
2434 skb_unlink(skb, &sk->sk_receive_queue);
2440 /* It is questionable, see note in unix_dgram_recvmsg.
2443 scm.fp = scm_fp_dup(UNIXCB(skb).fp);
2445 sk_peek_offset_fwd(sk, chunk);
2452 last_len = skb->len;
2453 unix_state_lock(sk);
2454 skb = skb_peek_next(skb, &sk->sk_receive_queue);
2457 unix_state_unlock(sk);
2462 mutex_unlock(&u->iolock);
2464 scm_recv(sock, state->msg, &scm, flags);
2468 return copied ? : err;
2471 static int unix_stream_read_actor(struct sk_buff *skb,
2472 int skip, int chunk,
2473 struct unix_stream_read_state *state)
2477 ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
2479 return ret ?: chunk;
2482 static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg,
2483 size_t size, int flags)
2485 struct unix_stream_read_state state = {
2486 .recv_actor = unix_stream_read_actor,
2493 return unix_stream_read_generic(&state, true);
2496 static ssize_t skb_unix_socket_splice(struct sock *sk,
2497 struct pipe_inode_info *pipe,
2498 struct splice_pipe_desc *spd)
2501 struct unix_sock *u = unix_sk(sk);
2503 mutex_unlock(&u->iolock);
2504 ret = splice_to_pipe(pipe, spd);
2505 mutex_lock(&u->iolock);
2510 static int unix_stream_splice_actor(struct sk_buff *skb,
2511 int skip, int chunk,
2512 struct unix_stream_read_state *state)
2514 return skb_splice_bits(skb, state->socket->sk,
2515 UNIXCB(skb).consumed + skip,
2516 state->pipe, chunk, state->splice_flags,
2517 skb_unix_socket_splice);
2520 static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos,
2521 struct pipe_inode_info *pipe,
2522 size_t size, unsigned int flags)
2524 struct unix_stream_read_state state = {
2525 .recv_actor = unix_stream_splice_actor,
2529 .splice_flags = flags,
2532 if (unlikely(*ppos))
2535 if (sock->file->f_flags & O_NONBLOCK ||
2536 flags & SPLICE_F_NONBLOCK)
2537 state.flags = MSG_DONTWAIT;
2539 return unix_stream_read_generic(&state, false);
2542 static int unix_shutdown(struct socket *sock, int mode)
2544 struct sock *sk = sock->sk;
2547 if (mode < SHUT_RD || mode > SHUT_RDWR)
2550 * SHUT_RD (0) -> RCV_SHUTDOWN (1)
2551 * SHUT_WR (1) -> SEND_SHUTDOWN (2)
2552 * SHUT_RDWR (2) -> SHUTDOWN_MASK (3)
2556 unix_state_lock(sk);
2557 sk->sk_shutdown |= mode;
2558 other = unix_peer(sk);
2561 unix_state_unlock(sk);
2562 sk->sk_state_change(sk);
2565 (sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET)) {
2569 if (mode&RCV_SHUTDOWN)
2570 peer_mode |= SEND_SHUTDOWN;
2571 if (mode&SEND_SHUTDOWN)
2572 peer_mode |= RCV_SHUTDOWN;
2573 unix_state_lock(other);
2574 other->sk_shutdown |= peer_mode;
2575 unix_state_unlock(other);
2576 other->sk_state_change(other);
2577 if (peer_mode == SHUTDOWN_MASK)
2578 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_HUP);
2579 else if (peer_mode & RCV_SHUTDOWN)
2580 sk_wake_async(other, SOCK_WAKE_WAITD, POLL_IN);
2588 long unix_inq_len(struct sock *sk)
2590 struct sk_buff *skb;
2593 if (sk->sk_state == TCP_LISTEN)
2596 spin_lock(&sk->sk_receive_queue.lock);
2597 if (sk->sk_type == SOCK_STREAM ||
2598 sk->sk_type == SOCK_SEQPACKET) {
2599 skb_queue_walk(&sk->sk_receive_queue, skb)
2600 amount += unix_skb_len(skb);
2602 skb = skb_peek(&sk->sk_receive_queue);
2606 spin_unlock(&sk->sk_receive_queue.lock);
2610 EXPORT_SYMBOL_GPL(unix_inq_len);
2612 long unix_outq_len(struct sock *sk)
2614 return sk_wmem_alloc_get(sk);
2616 EXPORT_SYMBOL_GPL(unix_outq_len);
2618 static int unix_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
2620 struct sock *sk = sock->sk;
2626 amount = unix_outq_len(sk);
2627 err = put_user(amount, (int __user *)arg);
2630 amount = unix_inq_len(sk);
2634 err = put_user(amount, (int __user *)arg);
2643 static unsigned int unix_poll(struct file *file, struct socket *sock, poll_table *wait)
2645 struct sock *sk = sock->sk;
2648 sock_poll_wait(file, sk_sleep(sk), wait);
2651 /* exceptional events? */
2654 if (sk->sk_shutdown == SHUTDOWN_MASK)
2656 if (sk->sk_shutdown & RCV_SHUTDOWN)
2657 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2660 if (!skb_queue_empty(&sk->sk_receive_queue))
2661 mask |= POLLIN | POLLRDNORM;
2663 /* Connection-based need to check for termination and startup */
2664 if ((sk->sk_type == SOCK_STREAM || sk->sk_type == SOCK_SEQPACKET) &&
2665 sk->sk_state == TCP_CLOSE)
2669 * we set writable also when the other side has shut down the
2670 * connection. This prevents stuck sockets.
2672 if (unix_writable(sk))
2673 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2678 static unsigned int unix_dgram_poll(struct file *file, struct socket *sock,
2681 struct sock *sk = sock->sk, *other;
2682 unsigned int mask, writable;
2684 sock_poll_wait(file, sk_sleep(sk), wait);
2687 /* exceptional events? */
2688 if (sk->sk_err || !skb_queue_empty(&sk->sk_error_queue))
2690 (sock_flag(sk, SOCK_SELECT_ERR_QUEUE) ? POLLPRI : 0);
2692 if (sk->sk_shutdown & RCV_SHUTDOWN)
2693 mask |= POLLRDHUP | POLLIN | POLLRDNORM;
2694 if (sk->sk_shutdown == SHUTDOWN_MASK)
2698 if (!skb_queue_empty(&sk->sk_receive_queue))
2699 mask |= POLLIN | POLLRDNORM;
2701 /* Connection-based need to check for termination and startup */
2702 if (sk->sk_type == SOCK_SEQPACKET) {
2703 if (sk->sk_state == TCP_CLOSE)
2705 /* connection hasn't started yet? */
2706 if (sk->sk_state == TCP_SYN_SENT)
2710 /* No write status requested, avoid expensive OUT tests. */
2711 if (!(poll_requested_events(wait) & (POLLWRBAND|POLLWRNORM|POLLOUT)))
2714 writable = unix_writable(sk);
2716 unix_state_lock(sk);
2718 other = unix_peer(sk);
2719 if (other && unix_peer(other) != sk &&
2720 unix_recvq_full(other) &&
2721 unix_dgram_peer_wake_me(sk, other))
2724 unix_state_unlock(sk);
2728 mask |= POLLOUT | POLLWRNORM | POLLWRBAND;
2730 sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk);
2735 #ifdef CONFIG_PROC_FS
2737 #define BUCKET_SPACE (BITS_PER_LONG - (UNIX_HASH_BITS + 1) - 1)
2739 #define get_bucket(x) ((x) >> BUCKET_SPACE)
2740 #define get_offset(x) ((x) & ((1L << BUCKET_SPACE) - 1))
2741 #define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
2743 static struct sock *unix_from_bucket(struct seq_file *seq, loff_t *pos)
2745 unsigned long offset = get_offset(*pos);
2746 unsigned long bucket = get_bucket(*pos);
2748 unsigned long count = 0;
2750 for (sk = sk_head(&unix_socket_table[bucket]); sk; sk = sk_next(sk)) {
2751 if (sock_net(sk) != seq_file_net(seq))
2753 if (++count == offset)
2760 static struct sock *unix_next_socket(struct seq_file *seq,
2764 unsigned long bucket;
2766 while (sk > (struct sock *)SEQ_START_TOKEN) {
2770 if (sock_net(sk) == seq_file_net(seq))
2775 sk = unix_from_bucket(seq, pos);
2780 bucket = get_bucket(*pos) + 1;
2781 *pos = set_bucket_offset(bucket, 1);
2782 } while (bucket < ARRAY_SIZE(unix_socket_table));
2787 static void *unix_seq_start(struct seq_file *seq, loff_t *pos)
2788 __acquires(unix_table_lock)
2790 spin_lock(&unix_table_lock);
2793 return SEQ_START_TOKEN;
2795 if (get_bucket(*pos) >= ARRAY_SIZE(unix_socket_table))
2798 return unix_next_socket(seq, NULL, pos);
2801 static void *unix_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2804 return unix_next_socket(seq, v, pos);
2807 static void unix_seq_stop(struct seq_file *seq, void *v)
2808 __releases(unix_table_lock)
2810 spin_unlock(&unix_table_lock);
2813 static int unix_seq_show(struct seq_file *seq, void *v)
2816 if (v == SEQ_START_TOKEN)
2817 seq_puts(seq, "Num RefCount Protocol Flags Type St "
2821 struct unix_sock *u = unix_sk(s);
2824 seq_printf(seq, "%pK: %08X %08X %08X %04X %02X %5lu",
2826 atomic_read(&s->sk_refcnt),
2828 s->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
2831 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
2832 (s->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
2835 if (u->addr) { // under unix_table_lock here
2840 len = u->addr->len - sizeof(short);
2841 if (!UNIX_ABSTRACT(s))
2847 for ( ; i < len; i++)
2848 seq_putc(seq, u->addr->name->sun_path[i]);
2850 unix_state_unlock(s);
2851 seq_putc(seq, '\n');
2857 static const struct seq_operations unix_seq_ops = {
2858 .start = unix_seq_start,
2859 .next = unix_seq_next,
2860 .stop = unix_seq_stop,
2861 .show = unix_seq_show,
2864 static int unix_seq_open(struct inode *inode, struct file *file)
2866 return seq_open_net(inode, file, &unix_seq_ops,
2867 sizeof(struct seq_net_private));
2870 static const struct file_operations unix_seq_fops = {
2871 .owner = THIS_MODULE,
2872 .open = unix_seq_open,
2874 .llseek = seq_lseek,
2875 .release = seq_release_net,
2880 static const struct net_proto_family unix_family_ops = {
2882 .create = unix_create,
2883 .owner = THIS_MODULE,
2887 static int __net_init unix_net_init(struct net *net)
2889 int error = -ENOMEM;
2891 net->unx.sysctl_max_dgram_qlen = 10;
2892 if (unix_sysctl_register(net))
2895 #ifdef CONFIG_PROC_FS
2896 if (!proc_create("unix", 0, net->proc_net, &unix_seq_fops)) {
2897 unix_sysctl_unregister(net);
2906 static void __net_exit unix_net_exit(struct net *net)
2908 unix_sysctl_unregister(net);
2909 remove_proc_entry("unix", net->proc_net);
2912 static struct pernet_operations unix_net_ops = {
2913 .init = unix_net_init,
2914 .exit = unix_net_exit,
2917 static int __init af_unix_init(void)
2921 BUILD_BUG_ON(sizeof(struct unix_skb_parms) > FIELD_SIZEOF(struct sk_buff, cb));
2923 rc = proto_register(&unix_proto, 1);
2925 pr_crit("%s: Cannot create unix_sock SLAB cache!\n", __func__);
2929 sock_register(&unix_family_ops);
2930 register_pernet_subsys(&unix_net_ops);
2935 static void __exit af_unix_exit(void)
2937 sock_unregister(PF_UNIX);
2938 proto_unregister(&unix_proto);
2939 unregister_pernet_subsys(&unix_net_ops);
2942 /* Earlier than device_initcall() so that other drivers invoking
2943 request_module() don't end up in a loop when modprobe tries
2944 to use a UNIX socket. But later than subsys_initcall() because
2945 we depend on stuff initialised there */
2946 fs_initcall(af_unix_init);
2947 module_exit(af_unix_exit);
2949 MODULE_LICENSE("GPL");
2950 MODULE_ALIAS_NETPROTO(PF_UNIX);