fs/nfs/read.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  * linux/fs/nfs/read.c
   4  *
   5  * Block I/O for NFS
   6  *
   7  * Partial copy of Linus' read cache modifications to fs/nfs/file.c
   8  * modified for async RPC by okir@monad.swb.de
   9  */
  10
  11 #include <linux/time.h>
  12 #include <linux/kernel.h>
  13 #include <linux/errno.h>
  14 #include <linux/fcntl.h>
  15 #include <linux/stat.h>
  16 #include <linux/mm.h>
  17 #include <linux/slab.h>
  18 #include <linux/task_io_accounting_ops.h>
  19 #include <linux/pagemap.h>
  20 #include <linux/sunrpc/clnt.h>
  21 #include <linux/nfs_fs.h>
  22 #include <linux/nfs_page.h>
  23 #include <linux/module.h>
  24
  25 #include "nfs4_fs.h"
  26 #include "internal.h"
  27 #include "iostat.h"
  28 #include "fscache.h"
  29 #include "pnfs.h"
  30 #include "nfstrace.h"
  31
  32 #define NFSDBG_FACILITY         NFSDBG_PAGECACHE
  33
  34 const struct nfs_pgio_completion_ops nfs_async_read_completion_ops;
  35 static const struct nfs_rw_ops nfs_rw_read_ops;
  36
  37 static struct kmem_cache *nfs_rdata_cachep;
  38
  39 static struct nfs_pgio_header *nfs_readhdr_alloc(void)
  40 {
  41         struct nfs_pgio_header *p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL);
  42
  43         if (p)
  44                 p->rw_mode = FMODE_READ;
  45         return p;
  46 }
  47
  48 static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
  49 {
  50         kmem_cache_free(nfs_rdata_cachep, rhdr);
  51 }
  52
  53 static int nfs_return_empty_folio(struct folio *folio)
  54 {
  55         folio_zero_segment(folio, 0, folio_size(folio));
  56         folio_mark_uptodate(folio);
  57         folio_unlock(folio);
  58         return 0;
  59 }
  60
  61 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
  62                               struct inode *inode, bool force_mds,
  63                               const struct nfs_pgio_completion_ops *compl_ops)
  64 {
  65         struct nfs_server *server = NFS_SERVER(inode);
  66         const struct nfs_pageio_ops *pg_ops = &nfs_pgio_rw_ops;
  67
  68 #ifdef CONFIG_NFS_V4_1
  69         if (server->pnfs_curr_ld && !force_mds)
  70                 pg_ops = server->pnfs_curr_ld->pg_read_ops;
  71 #endif
  72         nfs_pageio_init(pgio, inode, pg_ops, compl_ops, &nfs_rw_read_ops,
  73                         server->rsize, 0);
  74 }
  75 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
  76
  77 void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio)
  78 {
  79         struct nfs_pgio_mirror *pgm;
  80         unsigned long npages;
  81
  82         nfs_pageio_complete(pgio);
  83
  84         /* It doesn't make sense to do mirrored reads! */
  85         WARN_ON_ONCE(pgio->pg_mirror_count != 1);
  86
  87         pgm = &pgio->pg_mirrors[0];
  88         NFS_I(pgio->pg_inode)->read_io += pgm->pg_bytes_written;
  89         npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
  90         nfs_add_stats(pgio->pg_inode, NFSIOS_READPAGES, npages);
  91 }
  92
  93
  94 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
  95 {
  96         struct nfs_pgio_mirror *mirror;
  97
  98         if (pgio->pg_ops && pgio->pg_ops->pg_cleanup)
  99                 pgio->pg_ops->pg_cleanup(pgio);
 100
 101         pgio->pg_ops = &nfs_pgio_rw_ops;
 102
 103         /* read path should never have more than one mirror */
 104         WARN_ON_ONCE(pgio->pg_mirror_count != 1);
 105
 106         mirror = &pgio->pg_mirrors[0];
 107         mirror->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize;
 108 }
 109 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds);
 110
 111 static void nfs_readpage_release(struct nfs_page *req, int error)
 112 {
 113         struct folio *folio = nfs_page_to_folio(req);
 114
 115         if (nfs_error_is_fatal_on_server(error) && error != -ETIMEDOUT)
 116                 folio_set_error(folio);
 117         if (nfs_page_group_sync_on_bit(req, PG_UNLOCKPAGE))
 118                 if (nfs_netfs_folio_unlock(folio))
 119                         folio_unlock(folio);
 120
 121         nfs_release_request(req);
 122 }
 123
 124 static void nfs_page_group_set_uptodate(struct nfs_page *req)
 125 {
 126         if (nfs_page_group_sync_on_bit(req, PG_UPTODATE))
 127                 folio_mark_uptodate(nfs_page_to_folio(req));
 128 }
 129
 130 static void nfs_read_completion(struct nfs_pgio_header *hdr)
 131 {
 132         unsigned long bytes = 0;
 133         int error;
 134
 135         if (test_bit(NFS_IOHDR_REDO, &hdr->flags))
 136                 goto out;
 137         while (!list_empty(&hdr->pages)) {
 138                 struct nfs_page *req = nfs_list_entry(hdr->pages.next);
 139                 struct folio *folio = nfs_page_to_folio(req);
 140                 unsigned long start = req->wb_pgbase;
 141                 unsigned long end = req->wb_pgbase + req->wb_bytes;
 142
 143                 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
 144                         /* note: regions of the page not covered by a
 145                          * request are zeroed in nfs_read_add_folio
 146                          */
 147                         if (bytes > hdr->good_bytes) {
 148                                 /* nothing in this request was good, so zero
 149                                  * the full extent of the request */
 150                                 folio_zero_segment(folio, start, end);
 151
 152                         } else if (hdr->good_bytes - bytes < req->wb_bytes) {
 153                                 /* part of this request has good bytes, but
 154                                  * not all. zero the bad bytes */
 155                                 start += hdr->good_bytes - bytes;
 156                                 WARN_ON(start < req->wb_pgbase);
 157                                 folio_zero_segment(folio, start, end);
 158                         }
 159                 }
 160                 error = 0;
 161                 bytes += req->wb_bytes;
 162                 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) {
 163                         if (bytes <= hdr->good_bytes)
 164                                 nfs_page_group_set_uptodate(req);
 165                         else {
 166                                 error = hdr->error;
 167                                 xchg(&nfs_req_openctx(req)->error, error);
 168                         }
 169                 } else
 170                         nfs_page_group_set_uptodate(req);
 171                 nfs_list_remove_request(req);
 172                 nfs_readpage_release(req, error);
 173         }
 174         nfs_netfs_read_completion(hdr);
 175
 176 out:
 177         hdr->release(hdr);
 178 }
 179
 180 static void nfs_initiate_read(struct nfs_pgio_header *hdr,
 181                               struct rpc_message *msg,
 182                               const struct nfs_rpc_ops *rpc_ops,
 183                               struct rpc_task_setup *task_setup_data, int how)
 184 {
 185         rpc_ops->read_setup(hdr, msg);
 186         nfs_netfs_initiate_read(hdr);
 187         trace_nfs_initiate_read(hdr);
 188 }
 189
 190 static void
 191 nfs_async_read_error(struct list_head *head, int error)
 192 {
 193         struct nfs_page *req;
 194
 195         while (!list_empty(head)) {
 196                 req = nfs_list_entry(head->next);
 197                 nfs_list_remove_request(req);
 198                 nfs_readpage_release(req, error);
 199         }
 200 }
 201
 202 const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = {
 203         .error_cleanup = nfs_async_read_error,
 204         .completion = nfs_read_completion,
 205 };
 206
 207 /*
 208  * This is the callback from RPC telling us whether a reply was
 209  * received or some error occurred (timeout or socket shutdown).
 210  */
 211 static int nfs_readpage_done(struct rpc_task *task,
 212                              struct nfs_pgio_header *hdr,
 213                              struct inode *inode)
 214 {
 215         int status = NFS_PROTO(inode)->read_done(task, hdr);
 216         if (status != 0)
 217                 return status;
 218
 219         nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, hdr->res.count);
 220         trace_nfs_readpage_done(task, hdr);
 221
 222         if (task->tk_status == -ESTALE) {
 223                 nfs_set_inode_stale(inode);
 224                 nfs_mark_for_revalidate(inode);
 225         }
 226         return 0;
 227 }
 228
 229 static void nfs_readpage_retry(struct rpc_task *task,
 230                                struct nfs_pgio_header *hdr)
 231 {
 232         struct nfs_pgio_args *argp = &hdr->args;
 233         struct nfs_pgio_res  *resp = &hdr->res;
 234
 235         /* This is a short read! */
 236         nfs_inc_stats(hdr->inode, NFSIOS_SHORTREAD);
 237         trace_nfs_readpage_short(task, hdr);
 238
 239         /* Has the server at least made some progress? */
 240         if (resp->count == 0) {
 241                 nfs_set_pgio_error(hdr, -EIO, argp->offset);
 242                 return;
 243         }
 244
 245         /* For non rpc-based layout drivers, retry-through-MDS */
 246         if (!task->tk_ops) {
 247                 hdr->pnfs_error = -EAGAIN;
 248                 return;
 249         }
 250
 251         /* Yes, so retry the read at the end of the hdr */
 252         hdr->mds_offset += resp->count;
 253         argp->offset += resp->count;
 254         argp->pgbase += resp->count;
 255         argp->count -= resp->count;
 256         resp->count = 0;
 257         resp->eof = 0;
 258         rpc_restart_call_prepare(task);
 259 }
 260
 261 static void nfs_readpage_result(struct rpc_task *task,
 262                                 struct nfs_pgio_header *hdr)
 263 {
 264         if (hdr->res.eof) {
 265                 loff_t pos = hdr->args.offset + hdr->res.count;
 266                 unsigned int new = pos - hdr->io_start;
 267
 268                 if (hdr->good_bytes > new) {
 269                         hdr->good_bytes = new;
 270                         set_bit(NFS_IOHDR_EOF, &hdr->flags);
 271                         clear_bit(NFS_IOHDR_ERROR, &hdr->flags);
 272                 }
 273         } else if (hdr->res.count < hdr->args.count)
 274                 nfs_readpage_retry(task, hdr);
 275 }
 276
 277 int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio,
 278                        struct nfs_open_context *ctx,
 279                        struct folio *folio)
 280 {
 281         struct inode *inode = folio_file_mapping(folio)->host;
 282         struct nfs_server *server = NFS_SERVER(inode);
 283         size_t fsize = folio_size(folio);
 284         unsigned int rsize = server->rsize;
 285         struct nfs_page *new;
 286         unsigned int len, aligned_len;
 287         int error;
 288
 289         len = nfs_folio_length(folio);
 290         if (len == 0)
 291                 return nfs_return_empty_folio(folio);
 292
 293         aligned_len = min_t(unsigned int, ALIGN(len, rsize), fsize);
 294
 295         new = nfs_page_create_from_folio(ctx, folio, 0, aligned_len);
 296         if (IS_ERR(new)) {
 297                 error = PTR_ERR(new);
 298                 goto out;
 299         }
 300
 301         if (len < fsize)
 302                 folio_zero_segment(folio, len, fsize);
 303         if (!nfs_pageio_add_request(pgio, new)) {
 304                 nfs_list_remove_request(new);
 305                 error = pgio->pg_error;
 306                 nfs_readpage_release(new, error);
 307                 goto out;
 308         }
 309         return 0;
 310 out:
 311         return error;
 312 }
 313
 314 /*
 315  * Read a page over NFS.
 316  * We read the page synchronously in the following case:
 317  *  -   The error flag is set for this page. This happens only when a
 318  *      previous async read operation failed.
 319  */
 320 int nfs_read_folio(struct file *file, struct folio *folio)
 321 {
 322         struct inode *inode = file_inode(file);
 323         struct nfs_pageio_descriptor pgio;
 324         struct nfs_open_context *ctx;
 325         int ret;
 326
 327         trace_nfs_aop_readpage(inode, folio);
 328         nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
 329         task_io_account_read(folio_size(folio));
 330
 331         /*
 332          * Try to flush any pending writes to the file..
 333          *
 334          * NOTE! Because we own the folio lock, there cannot
 335          * be any new pending writes generated at this point
 336          * for this folio (other folios can be written to).
 337          */
 338         ret = nfs_wb_folio(inode, folio);
 339         if (ret)
 340                 goto out_unlock;
 341         if (folio_test_uptodate(folio))
 342                 goto out_unlock;
 343
 344         ret = -ESTALE;
 345         if (NFS_STALE(inode))
 346                 goto out_unlock;
 347
 348         ret = nfs_netfs_read_folio(file, folio);
 349         if (!ret)
 350                 goto out;
 351
 352         ctx = get_nfs_open_context(nfs_file_open_context(file));
 353
 354         xchg(&ctx->error, 0);
 355         nfs_pageio_init_read(&pgio, inode, false,
 356                              &nfs_async_read_completion_ops);
 357
 358         ret = nfs_read_add_folio(&pgio, ctx, folio);
 359         if (ret)
 360                 goto out_put;
 361
 362         nfs_pageio_complete_read(&pgio);
 363         ret = pgio.pg_error < 0 ? pgio.pg_error : 0;
 364         if (!ret) {
 365                 ret = folio_wait_locked_killable(folio);
 366                 if (!folio_test_uptodate(folio) && !ret)
 367                         ret = xchg(&ctx->error, 0);
 368         }
 369 out_put:
 370         put_nfs_open_context(ctx);
 371 out:
 372         trace_nfs_aop_readpage_done(inode, folio, ret);
 373         return ret;
 374 out_unlock:
 375         folio_unlock(folio);
 376         goto out;
 377 }
 378
 379 void nfs_readahead(struct readahead_control *ractl)
 380 {
 381         struct nfs_pageio_descriptor pgio;
 382         struct nfs_open_context *ctx;
 383         unsigned int nr_pages = readahead_count(ractl);
 384         struct file *file = ractl->file;
 385         struct inode *inode = ractl->mapping->host;
 386         struct folio *folio;
 387         int ret;
 388
 389         trace_nfs_aop_readahead(inode, readahead_pos(ractl), nr_pages);
 390         nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 391         task_io_account_read(readahead_length(ractl));
 392
 393         ret = -ESTALE;
 394         if (NFS_STALE(inode))
 395                 goto out;
 396
 397         ret = nfs_netfs_readahead(ractl);
 398         if (!ret)
 399                 goto out;
 400
 401         if (file == NULL) {
 402                 ret = -EBADF;
 403                 ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
 404                 if (ctx == NULL)
 405                         goto out;
 406         } else
 407                 ctx = get_nfs_open_context(nfs_file_open_context(file));
 408
 409         nfs_pageio_init_read(&pgio, inode, false,
 410                              &nfs_async_read_completion_ops);
 411
 412         while ((folio = readahead_folio(ractl)) != NULL) {
 413                 ret = nfs_read_add_folio(&pgio, ctx, folio);
 414                 if (ret)
 415                         break;
 416         }
 417
 418         nfs_pageio_complete_read(&pgio);
 419
 420         put_nfs_open_context(ctx);
 421 out:
 422         trace_nfs_aop_readahead_done(inode, nr_pages, ret);
 423 }
 424
 425 int __init nfs_init_readpagecache(void)
 426 {
 427         nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
 428                                              sizeof(struct nfs_pgio_header),
 429                                              0, SLAB_HWCACHE_ALIGN,
 430                                              NULL);
 431         if (nfs_rdata_cachep == NULL)
 432                 return -ENOMEM;
 433
 434         return 0;
 435 }
 436
 437 void nfs_destroy_readpagecache(void)
 438 {
 439         kmem_cache_destroy(nfs_rdata_cachep);
 440 }
 441
 442 static const struct nfs_rw_ops nfs_rw_read_ops = {
 443         .rw_alloc_header        = nfs_readhdr_alloc,
 444         .rw_free_header         = nfs_readhdr_free,
 445         .rw_done                = nfs_readpage_done,
 446         .rw_result              = nfs_readpage_result,
 447         .rw_initiate            = nfs_initiate_read,
 448 };