2 * QEMU I/O channels sockets driver
4 * Copyright (c) 2015 Red Hat, Inc.
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
20 #include "qemu/osdep.h"
21 #include "qapi/error.h"
22 #include "qapi/qapi-visit-sockets.h"
23 #include "qemu/module.h"
24 #include "io/channel-socket.h"
25 #include "io/channel-util.h"
26 #include "io/channel-watch.h"
28 #include "qapi/clone-visitor.h"
30 #include <linux/errqueue.h>
31 #include <sys/socket.h>
33 #if (defined(MSG_ZEROCOPY) && defined(SO_ZEROCOPY))
34 #define QEMU_MSG_ZEROCOPY
38 #define SOCKET_MAX_FDS 16
41 qio_channel_socket_get_local_address(QIOChannelSocket *ioc,
44 return socket_sockaddr_to_address(&ioc->localAddr,
50 qio_channel_socket_get_remote_address(QIOChannelSocket *ioc,
53 return socket_sockaddr_to_address(&ioc->remoteAddr,
59 qio_channel_socket_new(void)
61 QIOChannelSocket *sioc;
64 sioc = QIO_CHANNEL_SOCKET(object_new(TYPE_QIO_CHANNEL_SOCKET));
66 sioc->zero_copy_queued = 0;
67 sioc->zero_copy_sent = 0;
69 ioc = QIO_CHANNEL(sioc);
70 qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_SHUTDOWN);
73 ioc->event = CreateEvent(NULL, FALSE, FALSE, NULL);
76 trace_qio_channel_socket_new(sioc);
83 qio_channel_socket_set_fd(QIOChannelSocket *sioc,
88 error_setg(errp, "Socket is already open");
93 sioc->remoteAddrLen = sizeof(sioc->remoteAddr);
94 sioc->localAddrLen = sizeof(sioc->localAddr);
97 if (getpeername(fd, (struct sockaddr *)&sioc->remoteAddr,
98 &sioc->remoteAddrLen) < 0) {
99 if (errno == ENOTCONN) {
100 memset(&sioc->remoteAddr, 0, sizeof(sioc->remoteAddr));
101 sioc->remoteAddrLen = sizeof(sioc->remoteAddr);
103 error_setg_errno(errp, errno,
104 "Unable to query remote socket address");
109 if (getsockname(fd, (struct sockaddr *)&sioc->localAddr,
110 &sioc->localAddrLen) < 0) {
111 error_setg_errno(errp, errno,
112 "Unable to query local socket address");
117 if (sioc->localAddr.ss_family == AF_UNIX) {
118 QIOChannel *ioc = QIO_CHANNEL(sioc);
119 qio_channel_set_feature(ioc, QIO_CHANNEL_FEATURE_FD_PASS);
126 sioc->fd = -1; /* Let the caller close FD on failure */
131 qio_channel_socket_new_fd(int fd,
134 QIOChannelSocket *ioc;
136 ioc = qio_channel_socket_new();
137 if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
138 object_unref(OBJECT(ioc));
142 trace_qio_channel_socket_new_fd(ioc, fd);
148 int qio_channel_socket_connect_sync(QIOChannelSocket *ioc,
154 trace_qio_channel_socket_connect_sync(ioc, addr);
155 fd = socket_connect(addr, errp);
157 trace_qio_channel_socket_connect_fail(ioc);
161 trace_qio_channel_socket_connect_complete(ioc, fd);
162 if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
167 #ifdef QEMU_MSG_ZEROCOPY
169 ret = setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &v, sizeof(v));
171 /* Zero copy available on host */
172 qio_channel_set_feature(QIO_CHANNEL(ioc),
173 QIO_CHANNEL_FEATURE_WRITE_ZERO_COPY);
177 qio_channel_set_feature(QIO_CHANNEL(ioc),
178 QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
184 static void qio_channel_socket_connect_worker(QIOTask *task,
187 QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task));
188 SocketAddress *addr = opaque;
191 qio_channel_socket_connect_sync(ioc, addr, &err);
193 qio_task_set_error(task, err);
197 void qio_channel_socket_connect_async(QIOChannelSocket *ioc,
199 QIOTaskFunc callback,
201 GDestroyNotify destroy,
202 GMainContext *context)
204 QIOTask *task = qio_task_new(
205 OBJECT(ioc), callback, opaque, destroy);
206 SocketAddress *addrCopy;
208 addrCopy = QAPI_CLONE(SocketAddress, addr);
210 /* socket_connect() does a non-blocking connect(), but it
211 * still blocks in DNS lookups, so we must use a thread */
212 trace_qio_channel_socket_connect_async(ioc, addr);
213 qio_task_run_in_thread(task,
214 qio_channel_socket_connect_worker,
216 (GDestroyNotify)qapi_free_SocketAddress,
221 int qio_channel_socket_listen_sync(QIOChannelSocket *ioc,
228 trace_qio_channel_socket_listen_sync(ioc, addr, num);
229 fd = socket_listen(addr, num, errp);
231 trace_qio_channel_socket_listen_fail(ioc);
235 trace_qio_channel_socket_listen_complete(ioc, fd);
236 if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
240 qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_LISTEN);
246 struct QIOChannelListenWorkerData {
248 int num; /* amount of expected connections */
251 static void qio_channel_listen_worker_free(gpointer opaque)
253 struct QIOChannelListenWorkerData *data = opaque;
255 qapi_free_SocketAddress(data->addr);
259 static void qio_channel_socket_listen_worker(QIOTask *task,
262 QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task));
263 struct QIOChannelListenWorkerData *data = opaque;
266 qio_channel_socket_listen_sync(ioc, data->addr, data->num, &err);
268 qio_task_set_error(task, err);
272 void qio_channel_socket_listen_async(QIOChannelSocket *ioc,
275 QIOTaskFunc callback,
277 GDestroyNotify destroy,
278 GMainContext *context)
280 QIOTask *task = qio_task_new(
281 OBJECT(ioc), callback, opaque, destroy);
282 struct QIOChannelListenWorkerData *data;
284 data = g_new0(struct QIOChannelListenWorkerData, 1);
285 data->addr = QAPI_CLONE(SocketAddress, addr);
288 /* socket_listen() blocks in DNS lookups, so we must use a thread */
289 trace_qio_channel_socket_listen_async(ioc, addr, num);
290 qio_task_run_in_thread(task,
291 qio_channel_socket_listen_worker,
293 qio_channel_listen_worker_free,
298 int qio_channel_socket_dgram_sync(QIOChannelSocket *ioc,
299 SocketAddress *localAddr,
300 SocketAddress *remoteAddr,
305 trace_qio_channel_socket_dgram_sync(ioc, localAddr, remoteAddr);
306 fd = socket_dgram(remoteAddr, localAddr, errp);
308 trace_qio_channel_socket_dgram_fail(ioc);
312 trace_qio_channel_socket_dgram_complete(ioc, fd);
313 if (qio_channel_socket_set_fd(ioc, fd, errp) < 0) {
322 struct QIOChannelSocketDGramWorkerData {
323 SocketAddress *localAddr;
324 SocketAddress *remoteAddr;
328 static void qio_channel_socket_dgram_worker_free(gpointer opaque)
330 struct QIOChannelSocketDGramWorkerData *data = opaque;
331 qapi_free_SocketAddress(data->localAddr);
332 qapi_free_SocketAddress(data->remoteAddr);
336 static void qio_channel_socket_dgram_worker(QIOTask *task,
339 QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(qio_task_get_source(task));
340 struct QIOChannelSocketDGramWorkerData *data = opaque;
343 /* socket_dgram() blocks in DNS lookups, so we must use a thread */
344 qio_channel_socket_dgram_sync(ioc, data->localAddr,
345 data->remoteAddr, &err);
347 qio_task_set_error(task, err);
351 void qio_channel_socket_dgram_async(QIOChannelSocket *ioc,
352 SocketAddress *localAddr,
353 SocketAddress *remoteAddr,
354 QIOTaskFunc callback,
356 GDestroyNotify destroy,
357 GMainContext *context)
359 QIOTask *task = qio_task_new(
360 OBJECT(ioc), callback, opaque, destroy);
361 struct QIOChannelSocketDGramWorkerData *data = g_new0(
362 struct QIOChannelSocketDGramWorkerData, 1);
364 data->localAddr = QAPI_CLONE(SocketAddress, localAddr);
365 data->remoteAddr = QAPI_CLONE(SocketAddress, remoteAddr);
367 trace_qio_channel_socket_dgram_async(ioc, localAddr, remoteAddr);
368 qio_task_run_in_thread(task,
369 qio_channel_socket_dgram_worker,
371 qio_channel_socket_dgram_worker_free,
377 qio_channel_socket_accept(QIOChannelSocket *ioc,
380 QIOChannelSocket *cioc;
382 cioc = qio_channel_socket_new();
383 cioc->remoteAddrLen = sizeof(ioc->remoteAddr);
384 cioc->localAddrLen = sizeof(ioc->localAddr);
387 trace_qio_channel_socket_accept(ioc);
388 cioc->fd = qemu_accept(ioc->fd, (struct sockaddr *)&cioc->remoteAddr,
389 &cioc->remoteAddrLen);
391 if (errno == EINTR) {
394 error_setg_errno(errp, errno, "Unable to accept connection");
395 trace_qio_channel_socket_accept_fail(ioc);
399 if (getsockname(cioc->fd, (struct sockaddr *)&cioc->localAddr,
400 &cioc->localAddrLen) < 0) {
401 error_setg_errno(errp, errno,
402 "Unable to query local socket address");
407 if (cioc->localAddr.ss_family == AF_UNIX) {
408 QIOChannel *ioc_local = QIO_CHANNEL(cioc);
409 qio_channel_set_feature(ioc_local, QIO_CHANNEL_FEATURE_FD_PASS);
413 qio_channel_set_feature(QIO_CHANNEL(cioc),
414 QIO_CHANNEL_FEATURE_READ_MSG_PEEK);
416 trace_qio_channel_socket_accept_complete(ioc, cioc, cioc->fd);
420 object_unref(OBJECT(cioc));
424 static void qio_channel_socket_init(Object *obj)
426 QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
430 static void qio_channel_socket_finalize(Object *obj)
432 QIOChannelSocket *ioc = QIO_CHANNEL_SOCKET(obj);
435 QIOChannel *ioc_local = QIO_CHANNEL(ioc);
436 if (qio_channel_has_feature(ioc_local, QIO_CHANNEL_FEATURE_LISTEN)) {
439 socket_listen_cleanup(ioc->fd, &err);
441 error_report_err(err);
446 qemu_socket_unselect(ioc->fd, NULL);
455 static void qio_channel_socket_copy_fds(struct msghdr *msg,
456 int **fds, size_t *nfds)
458 struct cmsghdr *cmsg;
463 for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
467 if (cmsg->cmsg_len < CMSG_LEN(sizeof(int)) ||
468 cmsg->cmsg_level != SOL_SOCKET ||
469 cmsg->cmsg_type != SCM_RIGHTS) {
473 fd_size = cmsg->cmsg_len - CMSG_LEN(0);
479 gotfds = fd_size / sizeof(int);
480 *fds = g_renew(int, *fds, *nfds + gotfds);
481 memcpy(*fds + *nfds, CMSG_DATA(cmsg), fd_size);
483 for (i = 0; i < gotfds; i++) {
484 int fd = (*fds)[*nfds + i];
489 /* O_NONBLOCK is preserved across SCM_RIGHTS so reset it */
490 qemu_socket_set_block(fd);
492 #ifndef MSG_CMSG_CLOEXEC
493 qemu_set_cloexec(fd);
501 static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
502 const struct iovec *iov,
509 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
511 struct msghdr msg = { NULL, };
512 char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
515 memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
517 msg.msg_iov = (struct iovec *)iov;
518 msg.msg_iovlen = niov;
520 msg.msg_control = control;
521 msg.msg_controllen = sizeof(control);
522 #ifdef MSG_CMSG_CLOEXEC
523 sflags |= MSG_CMSG_CLOEXEC;
528 if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
533 ret = recvmsg(sioc->fd, &msg, sflags);
535 if (errno == EAGAIN) {
536 return QIO_CHANNEL_ERR_BLOCK;
538 if (errno == EINTR) {
542 error_setg_errno(errp, errno,
543 "Unable to read from socket");
548 qio_channel_socket_copy_fds(&msg, fds, nfds);
554 static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
555 const struct iovec *iov,
562 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
564 struct msghdr msg = { NULL, };
565 char control[CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS)];
566 size_t fdsize = sizeof(int) * nfds;
567 struct cmsghdr *cmsg;
570 memset(control, 0, CMSG_SPACE(sizeof(int) * SOCKET_MAX_FDS));
572 msg.msg_iov = (struct iovec *)iov;
573 msg.msg_iovlen = niov;
576 if (nfds > SOCKET_MAX_FDS) {
577 error_setg_errno(errp, EINVAL,
578 "Only %d FDs can be sent, got %zu",
579 SOCKET_MAX_FDS, nfds);
583 msg.msg_control = control;
584 msg.msg_controllen = CMSG_SPACE(sizeof(int) * nfds);
586 cmsg = CMSG_FIRSTHDR(&msg);
587 cmsg->cmsg_len = CMSG_LEN(fdsize);
588 cmsg->cmsg_level = SOL_SOCKET;
589 cmsg->cmsg_type = SCM_RIGHTS;
590 memcpy(CMSG_DATA(cmsg), fds, fdsize);
593 if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
594 #ifdef QEMU_MSG_ZEROCOPY
595 sflags = MSG_ZEROCOPY;
598 * We expect QIOChannel class entry point to have
599 * blocked this code path already
601 g_assert_not_reached();
606 ret = sendmsg(sioc->fd, &msg, sflags);
610 return QIO_CHANNEL_ERR_BLOCK;
614 if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
615 error_setg_errno(errp, errno,
616 "Process can't lock enough memory for using MSG_ZEROCOPY");
622 error_setg_errno(errp, errno,
623 "Unable to write to socket");
627 if (flags & QIO_CHANNEL_WRITE_FLAG_ZERO_COPY) {
628 sioc->zero_copy_queued++;
634 static ssize_t qio_channel_socket_readv(QIOChannel *ioc,
635 const struct iovec *iov,
642 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
647 if (flags & QIO_CHANNEL_READ_FLAG_MSG_PEEK) {
651 for (i = 0; i < niov; i++) {
659 if (errno == EAGAIN) {
663 return QIO_CHANNEL_ERR_BLOCK;
665 } else if (errno == EINTR) {
668 error_setg_errno(errp, errno,
669 "Unable to read from socket");
674 if (ret < iov[i].iov_len) {
682 static ssize_t qio_channel_socket_writev(QIOChannel *ioc,
683 const struct iovec *iov,
690 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
694 for (i = 0; i < niov; i++) {
702 if (errno == EAGAIN) {
706 return QIO_CHANNEL_ERR_BLOCK;
708 } else if (errno == EINTR) {
711 error_setg_errno(errp, errno,
712 "Unable to write to socket");
717 if (ret < iov[i].iov_len) {
727 #ifdef QEMU_MSG_ZEROCOPY
728 static int qio_channel_socket_flush(QIOChannel *ioc,
731 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
732 struct msghdr msg = {};
733 struct sock_extended_err *serr;
735 char control[CMSG_SPACE(sizeof(*serr))];
739 if (sioc->zero_copy_queued == sioc->zero_copy_sent) {
743 msg.msg_control = control;
744 msg.msg_controllen = sizeof(control);
745 memset(control, 0, sizeof(control));
749 while (sioc->zero_copy_sent < sioc->zero_copy_queued) {
750 received = recvmsg(sioc->fd, &msg, MSG_ERRQUEUE);
754 /* Nothing on errqueue, wait until something is available */
755 qio_channel_wait(ioc, G_IO_ERR);
760 error_setg_errno(errp, errno,
761 "Unable to read errqueue");
766 cm = CMSG_FIRSTHDR(&msg);
767 if (cm->cmsg_level != SOL_IP && cm->cmsg_type != IP_RECVERR &&
768 cm->cmsg_level != SOL_IPV6 && cm->cmsg_type != IPV6_RECVERR) {
769 error_setg_errno(errp, EPROTOTYPE,
770 "Wrong cmsg in errqueue");
774 serr = (void *) CMSG_DATA(cm);
775 if (serr->ee_errno != SO_EE_ORIGIN_NONE) {
776 error_setg_errno(errp, serr->ee_errno,
780 if (serr->ee_origin != SO_EE_ORIGIN_ZEROCOPY) {
781 error_setg_errno(errp, serr->ee_origin,
782 "Error not from zero copy");
785 if (serr->ee_data < serr->ee_info) {
786 error_setg_errno(errp, serr->ee_origin,
787 "Wrong notification bounds");
791 /* No errors, count successfully finished sendmsg()*/
792 sioc->zero_copy_sent += serr->ee_data - serr->ee_info + 1;
794 /* If any sendmsg() succeeded using zero copy, return 0 at the end */
795 if (serr->ee_code != SO_EE_CODE_ZEROCOPY_COPIED) {
803 #endif /* QEMU_MSG_ZEROCOPY */
806 qio_channel_socket_set_blocking(QIOChannel *ioc,
810 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
813 qemu_socket_set_block(sioc->fd);
815 qemu_socket_set_nonblock(sioc->fd);
822 qio_channel_socket_set_delay(QIOChannel *ioc,
825 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
826 int v = enabled ? 0 : 1;
829 IPPROTO_TCP, TCP_NODELAY,
835 qio_channel_socket_set_cork(QIOChannel *ioc,
838 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
839 int v = enabled ? 1 : 0;
841 socket_set_cork(sioc->fd, v);
846 qio_channel_socket_close(QIOChannel *ioc,
849 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
853 if (sioc->fd != -1) {
855 qemu_socket_unselect(sioc->fd, NULL);
857 if (qio_channel_has_feature(ioc, QIO_CHANNEL_FEATURE_LISTEN)) {
858 socket_listen_cleanup(sioc->fd, errp);
861 if (close(sioc->fd) < 0) {
863 error_setg_errno(&err, errno, "Unable to close socket");
864 error_propagate(errp, err);
873 qio_channel_socket_shutdown(QIOChannel *ioc,
874 QIOChannelShutdown how,
877 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
881 case QIO_CHANNEL_SHUTDOWN_READ:
884 case QIO_CHANNEL_SHUTDOWN_WRITE:
887 case QIO_CHANNEL_SHUTDOWN_BOTH:
893 if (shutdown(sioc->fd, sockhow) < 0) {
894 error_setg_errno(errp, errno,
895 "Unable to shutdown socket");
901 static void qio_channel_socket_set_aio_fd_handler(QIOChannel *ioc,
902 AioContext *read_ctx,
904 AioContext *write_ctx,
908 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
910 qio_channel_util_set_aio_fd_handler(sioc->fd, read_ctx, io_read,
911 sioc->fd, write_ctx, io_write,
915 static GSource *qio_channel_socket_create_watch(QIOChannel *ioc,
916 GIOCondition condition)
918 QIOChannelSocket *sioc = QIO_CHANNEL_SOCKET(ioc);
919 return qio_channel_create_socket_watch(ioc,
924 static void qio_channel_socket_class_init(ObjectClass *klass,
925 void *class_data G_GNUC_UNUSED)
927 QIOChannelClass *ioc_klass = QIO_CHANNEL_CLASS(klass);
929 ioc_klass->io_writev = qio_channel_socket_writev;
930 ioc_klass->io_readv = qio_channel_socket_readv;
931 ioc_klass->io_set_blocking = qio_channel_socket_set_blocking;
932 ioc_klass->io_close = qio_channel_socket_close;
933 ioc_klass->io_shutdown = qio_channel_socket_shutdown;
934 ioc_klass->io_set_cork = qio_channel_socket_set_cork;
935 ioc_klass->io_set_delay = qio_channel_socket_set_delay;
936 ioc_klass->io_create_watch = qio_channel_socket_create_watch;
937 ioc_klass->io_set_aio_fd_handler = qio_channel_socket_set_aio_fd_handler;
938 #ifdef QEMU_MSG_ZEROCOPY
939 ioc_klass->io_flush = qio_channel_socket_flush;
943 static const TypeInfo qio_channel_socket_info = {
944 .parent = TYPE_QIO_CHANNEL,
945 .name = TYPE_QIO_CHANNEL_SOCKET,
946 .instance_size = sizeof(QIOChannelSocket),
947 .instance_init = qio_channel_socket_init,
948 .instance_finalize = qio_channel_socket_finalize,
949 .class_init = qio_channel_socket_class_init,
952 static void qio_channel_socket_register_types(void)
954 type_register_static(&qio_channel_socket_info);
957 type_init(qio_channel_socket_register_types);