{
struct io_ring_ctx *ctx = req->ctx;
+ if (req->flags & REQ_F_NEED_CLEANUP)
+ io_cleanup_req(req);
+
kfree(req->io);
if (req->file) {
if (req->flags & REQ_F_FIXED_FILE)
{
__io_req_aux_free(req);
- if (req->flags & REQ_F_NEED_CLEANUP)
- io_cleanup_req(req);
-
if (req->flags & REQ_F_INFLIGHT) {
struct io_ring_ctx *ctx = req->ctx;
unsigned long flags;
__attribute__((nonnull))
static void io_put_req_find_next(struct io_kiocb *req, struct io_kiocb **nxtptr)
{
- io_req_find_next(req, nxtptr);
-
- if (refcount_dec_and_test(&req->refs))
+ if (refcount_dec_and_test(&req->refs)) {
+ io_req_find_next(req, nxtptr);
__io_free_req(req);
+ }
}
static void io_put_req(struct io_kiocb *req)
mutex_unlock(&ctx->uring_lock);
}
-static int __io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
+static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
+ long min)
{
int iters = 0, ret = 0;
+ /*
+ * We disallow the app entering submit/complete with polling, but we
+ * still need to lock the ring to prevent racing with polled issue
+ * that got punted to a workqueue.
+ */
+ mutex_lock(&ctx->uring_lock);
do {
int tmin = 0;
ret = 0;
} while (min && !*nr_events && !need_resched());
- return ret;
-}
-
-static int io_iopoll_check(struct io_ring_ctx *ctx, unsigned *nr_events,
- long min)
-{
- int ret;
-
- /*
- * We disallow the app entering submit/complete with polling, but we
- * still need to lock the ring to prevent racing with polled issue
- * that got punted to a workqueue.
- */
- mutex_lock(&ctx->uring_lock);
- ret = __io_iopoll_check(ctx, nr_events, min);
mutex_unlock(&ctx->uring_lock);
return ret;
}
list_add(&req->list, &ctx->poll_list);
else
list_add_tail(&req->list, &ctx->poll_list);
+
+ if ((ctx->flags & IORING_SETUP_SQPOLL) &&
+ wq_has_sleeper(&ctx->sqo_wait))
+ wake_up(&ctx->sqo_wait);
}
static void io_file_put(struct io_submit_state *state)
struct io_kiocb *nxt = NULL;
int ret;
+ if (io_req_cancelled(req))
+ return;
+
ret = vfs_fallocate(req->file, req->sync.mode, req->sync.off,
req->sync.len);
if (ret < 0)
struct io_kiocb *req = container_of(*workptr, struct io_kiocb, work);
struct io_kiocb *nxt = NULL;
+ /* not cancellable, don't do io_req_cancelled() */
__io_close_finish(req, &nxt);
if (nxt)
io_wq_assign_next(workptr, nxt);
if (req->io)
return -EAGAIN;
if (io_alloc_async_ctx(req)) {
- if (kmsg && kmsg->iov != kmsg->fast_iov)
+ if (kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
return -ENOMEM;
}
if (req->io)
return -EAGAIN;
if (io_alloc_async_ctx(req)) {
- if (kmsg && kmsg->iov != kmsg->fast_iov)
+ if (kmsg->iov != kmsg->fast_iov)
kfree(kmsg->iov);
return -ENOMEM;
}
{
struct io_kiocb *linked_timeout;
struct io_kiocb *nxt = NULL;
+ const struct cred *old_creds = NULL;
int ret;
again:
linked_timeout = io_prep_linked_timeout(req);
+ if (req->work.creds && req->work.creds != current_cred()) {
+ if (old_creds)
+ revert_creds(old_creds);
+ if (old_creds == req->work.creds)
+ old_creds = NULL; /* restored original creds */
+ else
+ old_creds = override_creds(req->work.creds);
+ }
+
ret = io_issue_sqe(req, sqe, &nxt, true);
/*
err:
/* drop submission reference */
- io_put_req(req);
+ io_put_req_find_next(req, &nxt);
if (linked_timeout) {
if (!ret)
goto punt;
goto again;
}
+ if (old_creds)
+ revert_creds(old_creds);
}
static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
static bool io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
struct io_submit_state *state, struct io_kiocb **link)
{
- const struct cred *old_creds = NULL;
struct io_ring_ctx *ctx = req->ctx;
unsigned int sqe_flags;
int ret, id;
id = READ_ONCE(sqe->personality);
if (id) {
- const struct cred *personality_creds;
-
- personality_creds = idr_find(&ctx->personality_idr, id);
- if (unlikely(!personality_creds)) {
+ req->work.creds = idr_find(&ctx->personality_idr, id);
+ if (unlikely(!req->work.creds)) {
ret = -EINVAL;
goto err_req;
}
- old_creds = override_creds(personality_creds);
+ get_cred(req->work.creds);
}
/* same numerical values with corresponding REQ_F_*, safe to copy */
err_req:
io_cqring_add_event(req, ret);
io_double_put_req(req);
- if (old_creds)
- revert_creds(old_creds);
return false;
}
}
}
- if (old_creds)
- revert_creds(old_creds);
return true;
}
const struct cred *old_cred;
mm_segment_t old_fs;
DEFINE_WAIT(wait);
- unsigned inflight;
unsigned long timeout;
- int ret;
+ int ret = 0;
complete(&ctx->completions[1]);
set_fs(USER_DS);
old_cred = override_creds(ctx->creds);
- ret = timeout = inflight = 0;
+ timeout = jiffies + ctx->sq_thread_idle;
while (!kthread_should_park()) {
unsigned int to_submit;
- if (inflight) {
+ if (!list_empty(&ctx->poll_list)) {
unsigned nr_events = 0;
- if (ctx->flags & IORING_SETUP_IOPOLL) {
- /*
- * inflight is the count of the maximum possible
- * entries we submitted, but it can be smaller
- * if we dropped some of them. If we don't have
- * poll entries available, then we know that we
- * have nothing left to poll for. Reset the
- * inflight count to zero in that case.
- */
- mutex_lock(&ctx->uring_lock);
- if (!list_empty(&ctx->poll_list))
- __io_iopoll_check(ctx, &nr_events, 0);
- else
- inflight = 0;
- mutex_unlock(&ctx->uring_lock);
- } else {
- /*
- * Normal IO, just pretend everything completed.
- * We don't have to poll completions for that.
- */
- nr_events = inflight;
- }
-
- inflight -= nr_events;
- if (!inflight)
+ mutex_lock(&ctx->uring_lock);
+ if (!list_empty(&ctx->poll_list))
+ io_iopoll_getevents(ctx, &nr_events, 0);
+ else
timeout = jiffies + ctx->sq_thread_idle;
+ mutex_unlock(&ctx->uring_lock);
}
to_submit = io_sqring_entries(ctx);
*/
if (!to_submit || ret == -EBUSY) {
/*
+ * Drop cur_mm before scheduling, we can't hold it for
+ * long periods (or over schedule()). Do this before
+ * adding ourselves to the waitqueue, as the unuse/drop
+ * may sleep.
+ */
+ if (cur_mm) {
+ unuse_mm(cur_mm);
+ mmput(cur_mm);
+ cur_mm = NULL;
+ }
+
+ /*
* We're polling. If we're within the defined idle
* period, then let us spin without work before going
* to sleep. The exception is if we got EBUSY doing
* more IO, we should wait for the application to
* reap events and wake us up.
*/
- if (inflight ||
+ if (!list_empty(&ctx->poll_list) ||
(!time_after(jiffies, timeout) && ret != -EBUSY &&
!percpu_ref_is_dying(&ctx->refs))) {
cond_resched();
continue;
}
+ prepare_to_wait(&ctx->sqo_wait, &wait,
+ TASK_INTERRUPTIBLE);
+
/*
- * Drop cur_mm before scheduling, we can't hold it for
- * long periods (or over schedule()). Do this before
- * adding ourselves to the waitqueue, as the unuse/drop
- * may sleep.
+ * While doing polled IO, before going to sleep, we need
+ * to check if there are new reqs added to poll_list, it
+ * is because reqs may have been punted to io worker and
+ * will be added to poll_list later, hence check the
+ * poll_list again.
*/
- if (cur_mm) {
- unuse_mm(cur_mm);
- mmput(cur_mm);
- cur_mm = NULL;
+ if ((ctx->flags & IORING_SETUP_IOPOLL) &&
+ !list_empty_careful(&ctx->poll_list)) {
+ finish_wait(&ctx->sqo_wait, &wait);
+ continue;
}
- prepare_to_wait(&ctx->sqo_wait, &wait,
- TASK_INTERRUPTIBLE);
-
/* Tell userspace we may need a wakeup call */
ctx->rings->sq_flags |= IORING_SQ_NEED_WAKEUP;
/* make sure to read SQ tail after writing flags */
mutex_lock(&ctx->uring_lock);
ret = io_submit_sqes(ctx, to_submit, NULL, -1, &cur_mm, true);
mutex_unlock(&ctx->uring_lock);
- if (ret > 0)
- inflight += ret;
+ timeout = jiffies + ctx->sq_thread_idle;
}
set_fs(old_fs);
io_sqe_buffer_unregister(ctx);
io_sqe_files_unregister(ctx);
io_eventfd_unregister(ctx);
+ idr_destroy(&ctx->personality_idr);
#if defined(CONFIG_UNIX)
if (ctx->ring_sock) {