OSDN Git Service

Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jun 2023 18:04:08 +0000 (11:04 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Jun 2023 18:04:08 +0000 (11:04 -0700)
Pull virtio bug fixes from Michael Tsirkin:
 "A bunch of fixes all over the place"

* tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost:
  tools/virtio: use canonical ftrace path
  vhost_vdpa: support PACKED when setting-getting vring_base
  vhost: support PACKED when setting-getting vring_base
  vhost: Fix worker hangs due to missed wake up calls
  vhost: Fix crash during early vhost_transport_send_pkt calls
  vhost_net: revert upend_idx only on retriable error
  vhost_vdpa: tell vqs about the negotiated
  vdpa/mlx5: Fix hang when cvq commands are triggered during device unregister
  tools/virtio: Add .gitignore for ringtest
  tools/virtio: Fix arm64 ringtest compilation error
  vduse: avoid empty string for dev name
  vhost: use kzalloc() instead of kmalloc() followed by memset()

drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/vdpa_user/vduse_dev.c
drivers/vhost/net.c
drivers/vhost/vdpa.c
drivers/vhost/vhost.c
drivers/vhost/vhost.h
kernel/vhost_task.c
tools/virtio/ringtest/.gitignore [new file with mode: 0644]
tools/virtio/ringtest/main.h
tools/virtio/virtio-trace/README
tools/virtio/virtio-trace/trace-agent.c

index e29e32b..279ac6a 100644 (file)
@@ -3349,10 +3349,10 @@ static void mlx5_vdpa_dev_del(struct vdpa_mgmt_dev *v_mdev, struct vdpa_device *
        mlx5_vdpa_remove_debugfs(ndev->debugfs);
        ndev->debugfs = NULL;
        unregister_link_notifier(ndev);
+       _vdpa_unregister_device(dev);
        wq = mvdev->wq;
        mvdev->wq = NULL;
        destroy_workqueue(wq);
-       _vdpa_unregister_device(dev);
        mgtdev->ndev = NULL;
 }
 
index de97e38..5f5c216 100644 (file)
@@ -1685,6 +1685,9 @@ static bool vduse_validate_config(struct vduse_dev_config *config)
        if (config->vq_num > 0xffff)
                return false;
 
+       if (!config->name[0])
+               return false;
+
        if (!device_is_allowed(config->device_id))
                return false;
 
index 07181cd..ae22731 100644 (file)
@@ -935,13 +935,18 @@ static void handle_tx_zerocopy(struct vhost_net *net, struct socket *sock)
 
                err = sock->ops->sendmsg(sock, &msg, len);
                if (unlikely(err < 0)) {
+                       bool retry = err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS;
+
                        if (zcopy_used) {
                                if (vq->heads[ubuf->desc].len == VHOST_DMA_IN_PROGRESS)
                                        vhost_net_ubuf_put(ubufs);
-                               nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
-                                       % UIO_MAXIOV;
+                               if (retry)
+                                       nvq->upend_idx = ((unsigned)nvq->upend_idx - 1)
+                                               % UIO_MAXIOV;
+                               else
+                                       vq->heads[ubuf->desc].len = VHOST_DMA_DONE_LEN;
                        }
-                       if (err == -EAGAIN || err == -ENOMEM || err == -ENOBUFS) {
+                       if (retry) {
                                vhost_discard_vq_desc(vq, 1);
                                vhost_net_enable_vq(net, vq);
                                break;
index 8c1aefc..bf77924 100644 (file)
@@ -407,7 +407,10 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
 {
        struct vdpa_device *vdpa = v->vdpa;
        const struct vdpa_config_ops *ops = vdpa->config;
+       struct vhost_dev *d = &v->vdev;
+       u64 actual_features;
        u64 features;
+       int i;
 
        /*
         * It's not allowed to change the features after they have
@@ -422,6 +425,16 @@ static long vhost_vdpa_set_features(struct vhost_vdpa *v, u64 __user *featurep)
        if (vdpa_set_features(vdpa, features))
                return -EINVAL;
 
+       /* let the vqs know what has been configured */
+       actual_features = ops->get_driver_features(vdpa);
+       for (i = 0; i < d->nvqs; ++i) {
+               struct vhost_virtqueue *vq = d->vqs[i];
+
+               mutex_lock(&vq->mutex);
+               vq->acked_features = actual_features;
+               mutex_unlock(&vq->mutex);
+       }
+
        return 0;
 }
 
@@ -594,7 +607,14 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
                if (r)
                        return r;
 
-               vq->last_avail_idx = vq_state.split.avail_index;
+               if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+                       vq->last_avail_idx = vq_state.packed.last_avail_idx |
+                                            (vq_state.packed.last_avail_counter << 15);
+                       vq->last_used_idx = vq_state.packed.last_used_idx |
+                                           (vq_state.packed.last_used_counter << 15);
+               } else {
+                       vq->last_avail_idx = vq_state.split.avail_index;
+               }
                break;
        }
 
@@ -612,9 +632,15 @@ static long vhost_vdpa_vring_ioctl(struct vhost_vdpa *v, unsigned int cmd,
                break;
 
        case VHOST_SET_VRING_BASE:
-               vq_state.split.avail_index = vq->last_avail_idx;
-               if (ops->set_vq_state(vdpa, idx, &vq_state))
-                       r = -EINVAL;
+               if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+                       vq_state.packed.last_avail_idx = vq->last_avail_idx & 0x7fff;
+                       vq_state.packed.last_avail_counter = !!(vq->last_avail_idx & 0x8000);
+                       vq_state.packed.last_used_idx = vq->last_used_idx & 0x7fff;
+                       vq_state.packed.last_used_counter = !!(vq->last_used_idx & 0x8000);
+               } else {
+                       vq_state.split.avail_index = vq->last_avail_idx;
+               }
+               r = ops->set_vq_state(vdpa, idx, &vq_state);
                break;
 
        case VHOST_SET_VRING_CALL:
index 0742730..60c9ebd 100644 (file)
@@ -235,7 +235,7 @@ void vhost_dev_flush(struct vhost_dev *dev)
 {
        struct vhost_flush_struct flush;
 
-       if (dev->worker) {
+       if (dev->worker.vtsk) {
                init_completion(&flush.wait_event);
                vhost_work_init(&flush.work, vhost_flush_work);
 
@@ -247,7 +247,7 @@ EXPORT_SYMBOL_GPL(vhost_dev_flush);
 
 void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
 {
-       if (!dev->worker)
+       if (!dev->worker.vtsk)
                return;
 
        if (!test_and_set_bit(VHOST_WORK_QUEUED, &work->flags)) {
@@ -255,8 +255,8 @@ void vhost_work_queue(struct vhost_dev *dev, struct vhost_work *work)
                 * sure it was not in the list.
                 * test_and_set_bit() implies a memory barrier.
                 */
-               llist_add(&work->node, &dev->worker->work_list);
-               vhost_task_wake(dev->worker->vtsk);
+               llist_add(&work->node, &dev->worker.work_list);
+               vhost_task_wake(dev->worker.vtsk);
        }
 }
 EXPORT_SYMBOL_GPL(vhost_work_queue);
@@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(vhost_work_queue);
 /* A lockless hint for busy polling code to exit the loop */
 bool vhost_has_work(struct vhost_dev *dev)
 {
-       return dev->worker && !llist_empty(&dev->worker->work_list);
+       return !llist_empty(&dev->worker.work_list);
 }
 EXPORT_SYMBOL_GPL(vhost_has_work);
 
@@ -341,6 +341,8 @@ static bool vhost_worker(void *data)
 
        node = llist_del_all(&worker->work_list);
        if (node) {
+               __set_current_state(TASK_RUNNING);
+
                node = llist_reverse_order(node);
                /* make sure flag is seen after deletion */
                smp_wmb();
@@ -456,7 +458,8 @@ void vhost_dev_init(struct vhost_dev *dev,
        dev->umem = NULL;
        dev->iotlb = NULL;
        dev->mm = NULL;
-       dev->worker = NULL;
+       memset(&dev->worker, 0, sizeof(dev->worker));
+       init_llist_head(&dev->worker.work_list);
        dev->iov_limit = iov_limit;
        dev->weight = weight;
        dev->byte_weight = byte_weight;
@@ -530,47 +533,30 @@ static void vhost_detach_mm(struct vhost_dev *dev)
 
 static void vhost_worker_free(struct vhost_dev *dev)
 {
-       struct vhost_worker *worker = dev->worker;
-
-       if (!worker)
+       if (!dev->worker.vtsk)
                return;
 
-       dev->worker = NULL;
-       WARN_ON(!llist_empty(&worker->work_list));
-       vhost_task_stop(worker->vtsk);
-       kfree(worker);
+       WARN_ON(!llist_empty(&dev->worker.work_list));
+       vhost_task_stop(dev->worker.vtsk);
+       dev->worker.kcov_handle = 0;
+       dev->worker.vtsk = NULL;
 }
 
 static int vhost_worker_create(struct vhost_dev *dev)
 {
-       struct vhost_worker *worker;
        struct vhost_task *vtsk;
        char name[TASK_COMM_LEN];
-       int ret;
-
-       worker = kzalloc(sizeof(*worker), GFP_KERNEL_ACCOUNT);
-       if (!worker)
-               return -ENOMEM;
 
-       dev->worker = worker;
-       worker->kcov_handle = kcov_common_handle();
-       init_llist_head(&worker->work_list);
        snprintf(name, sizeof(name), "vhost-%d", current->pid);
 
-       vtsk = vhost_task_create(vhost_worker, worker, name);
-       if (!vtsk) {
-               ret = -ENOMEM;
-               goto free_worker;
-       }
+       vtsk = vhost_task_create(vhost_worker, &dev->worker, name);
+       if (!vtsk)
+               return -ENOMEM;
 
-       worker->vtsk = vtsk;
+       dev->worker.kcov_handle = kcov_common_handle();
+       dev->worker.vtsk = vtsk;
        vhost_task_start(vtsk);
        return 0;
-
-free_worker:
-       kfree(worker);
-       dev->worker = NULL;
-       return ret;
 }
 
 /* Caller should have device mutex */
@@ -1614,17 +1600,25 @@ long vhost_vring_ioctl(struct vhost_dev *d, unsigned int ioctl, void __user *arg
                        r = -EFAULT;
                        break;
                }
-               if (s.num > 0xffff) {
-                       r = -EINVAL;
-                       break;
+               if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED)) {
+                       vq->last_avail_idx = s.num & 0xffff;
+                       vq->last_used_idx = (s.num >> 16) & 0xffff;
+               } else {
+                       if (s.num > 0xffff) {
+                               r = -EINVAL;
+                               break;
+                       }
+                       vq->last_avail_idx = s.num;
                }
-               vq->last_avail_idx = s.num;
                /* Forget the cached index value. */
                vq->avail_idx = vq->last_avail_idx;
                break;
        case VHOST_GET_VRING_BASE:
                s.index = idx;
-               s.num = vq->last_avail_idx;
+               if (vhost_has_feature(vq, VIRTIO_F_RING_PACKED))
+                       s.num = (u32)vq->last_avail_idx | ((u32)vq->last_used_idx << 16);
+               else
+                       s.num = vq->last_avail_idx;
                if (copy_to_user(argp, &s, sizeof s))
                        r = -EFAULT;
                break;
@@ -2563,12 +2557,11 @@ EXPORT_SYMBOL_GPL(vhost_disable_notify);
 /* Create a new message. */
 struct vhost_msg_node *vhost_new_msg(struct vhost_virtqueue *vq, int type)
 {
-       struct vhost_msg_node *node = kmalloc(sizeof *node, GFP_KERNEL);
+       /* Make sure all padding within the structure is initialized. */
+       struct vhost_msg_node *node = kzalloc(sizeof(*node), GFP_KERNEL);
        if (!node)
                return NULL;
 
-       /* Make sure all padding within the structure is initialized. */
-       memset(&node->msg, 0, sizeof node->msg);
        node->vq = vq;
        node->msg.type = type;
        return node;
index 0308638..fc900be 100644 (file)
@@ -92,13 +92,17 @@ struct vhost_virtqueue {
        /* The routine to call when the Guest pings us, or timeout. */
        vhost_work_fn_t handle_kick;
 
-       /* Last available index we saw. */
+       /* Last available index we saw.
+        * Values are limited to 0x7fff, and the high bit is used as
+        * a wrap counter when using VIRTIO_F_RING_PACKED. */
        u16 last_avail_idx;
 
        /* Caches available index value from user. */
        u16 avail_idx;
 
-       /* Last index we used. */
+       /* Last index we used.
+        * Values are limited to 0x7fff, and the high bit is used as
+        * a wrap counter when using VIRTIO_F_RING_PACKED. */
        u16 last_used_idx;
 
        /* Used flags */
@@ -154,7 +158,7 @@ struct vhost_dev {
        struct vhost_virtqueue **vqs;
        int nvqs;
        struct eventfd_ctx *log_ctx;
-       struct vhost_worker *worker;
+       struct vhost_worker worker;
        struct vhost_iotlb *umem;
        struct vhost_iotlb *iotlb;
        spinlock_t iotlb_lock;
index f80d5c5..da35e5b 100644 (file)
@@ -28,10 +28,6 @@ static int vhost_task_fn(void *data)
        for (;;) {
                bool did_work;
 
-               /* mb paired w/ vhost_task_stop */
-               if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags))
-                       break;
-
                if (!dead && signal_pending(current)) {
                        struct ksignal ksig;
                        /*
@@ -48,11 +44,17 @@ static int vhost_task_fn(void *data)
                                clear_thread_flag(TIF_SIGPENDING);
                }
 
+               /* mb paired w/ vhost_task_stop */
+               set_current_state(TASK_INTERRUPTIBLE);
+
+               if (test_bit(VHOST_TASK_FLAGS_STOP, &vtsk->flags)) {
+                       __set_current_state(TASK_RUNNING);
+                       break;
+               }
+
                did_work = vtsk->fn(vtsk->data);
-               if (!did_work) {
-                       set_current_state(TASK_INTERRUPTIBLE);
+               if (!did_work)
                        schedule();
-               }
        }
 
        complete(&vtsk->exited);
diff --git a/tools/virtio/ringtest/.gitignore b/tools/virtio/ringtest/.gitignore
new file mode 100644 (file)
index 0000000..100b9e3
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/noring
+/ptr_ring
+/ring
+/virtio_ring_0_9
+/virtio_ring_inorder
+/virtio_ring_poll
index b68920d..d18dd31 100644 (file)
@@ -8,6 +8,7 @@
 #ifndef MAIN_H
 #define MAIN_H
 
+#include <assert.h>
 #include <stdbool.h>
 
 extern int param;
@@ -95,6 +96,8 @@ extern unsigned ring_size;
 #define cpu_relax() asm ("rep; nop" ::: "memory")
 #elif defined(__s390x__)
 #define cpu_relax() barrier()
+#elif defined(__aarch64__)
+#define cpu_relax() asm ("yield" ::: "memory")
 #else
 #define cpu_relax() assert(0)
 #endif
@@ -112,6 +115,8 @@ static inline void busy_wait(void)
 
 #if defined(__x86_64__) || defined(__i386__)
 #define smp_mb()     asm volatile("lock; addl $0,-132(%%rsp)" ::: "memory", "cc")
+#elif defined(__aarch64__)
+#define smp_mb()     asm volatile("dmb ish" ::: "memory")
 #else
 /*
  * Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
@@ -136,10 +141,16 @@ static inline void busy_wait(void)
 
 #if defined(__i386__) || defined(__x86_64__) || defined(__s390x__)
 #define smp_wmb() barrier()
+#elif defined(__aarch64__)
+#define smp_wmb() asm volatile("dmb ishst" ::: "memory")
 #else
 #define smp_wmb() smp_release()
 #endif
 
+#ifndef __always_inline
+#define __always_inline inline __attribute__((always_inline))
+#endif
+
 static __always_inline
 void __read_once_size(const volatile void *p, void *res, int size)
 {
index 4fb9368..0127ff0 100644 (file)
@@ -95,7 +95,7 @@ Run
 
 1) Enable ftrace in the guest
  <Example>
-       # echo 1 > /sys/kernel/debug/tracing/events/sched/enable
+       # echo 1 > /sys/kernel/tracing/events/sched/enable
 
 2) Run trace agent in the guest
  This agent must be operated as root.
index cdfe77c..7e2d9bb 100644 (file)
@@ -18,8 +18,9 @@
 #define PIPE_DEF_BUFS          16
 #define PIPE_MIN_SIZE          (PAGE_SIZE*PIPE_DEF_BUFS)
 #define PIPE_MAX_SIZE          (1024*1024)
-#define READ_PATH_FMT  \
-               "/sys/kernel/debug/tracing/per_cpu/cpu%d/trace_pipe_raw"
+#define TRACEFS                "/sys/kernel/tracing"
+#define DEBUGFS                "/sys/kernel/debug/tracing"
+#define READ_PATH_FMT          "%s/per_cpu/cpu%d/trace_pipe_raw"
 #define WRITE_PATH_FMT         "/dev/virtio-ports/trace-path-cpu%d"
 #define CTL_PATH               "/dev/virtio-ports/agent-ctl-path"
 
@@ -120,9 +121,12 @@ static const char *make_path(int cpu_num, bool this_is_write_path)
        if (this_is_write_path)
                /* write(output) path */
                ret = snprintf(buf, PATH_MAX, WRITE_PATH_FMT, cpu_num);
-       else
+       else {
                /* read(input) path */
-               ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, cpu_num);
+               ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, TRACEFS, cpu_num);
+               if (ret > 0 && access(buf, F_OK) != 0)
+                       ret = snprintf(buf, PATH_MAX, READ_PATH_FMT, DEBUGFS, cpu_num);
+       }
 
        if (ret <= 0) {
                pr_err("Failed to generate %s path(CPU#%d):%d\n",