aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/9p/vfs_addr.c11
-rw-r--r--fs/afs/file.c21
-rw-r--r--fs/afs/fsclient.c9
-rw-r--r--fs/afs/yfsclient.c9
-rw-r--r--fs/ceph/addr.c76
-rw-r--r--fs/netfs/Makefile4
-rw-r--r--fs/netfs/buffered_read.c766
-rw-r--r--fs/netfs/direct_read.c147
-rw-r--r--fs/netfs/internal.h35
-rw-r--r--fs/netfs/iterator.c50
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/netfs/objects.c8
-rw-r--r--fs/netfs/read_collect.c544
-rw-r--r--fs/netfs/read_pgpriv2.c264
-rw-r--r--fs/netfs/read_retry.c256
-rw-r--r--fs/netfs/stats.c6
-rw-r--r--fs/netfs/write_collect.c9
-rw-r--r--fs/netfs/write_issue.c17
-rw-r--r--fs/nfs/fscache.c19
-rw-r--r--fs/nfs/fscache.h7
-rw-r--r--fs/smb/client/cifsglob.h1
-rw-r--r--fs/smb/client/cifssmb.c6
-rw-r--r--fs/smb/client/file.c82
-rw-r--r--fs/smb/client/smb2ops.c3
-rw-r--r--fs/smb/client/smb2pdu.c27
-rw-r--r--include/linux/folio_queue.h18
-rw-r--r--include/linux/netfs.h26
-rw-r--r--include/trace/events/netfs.h103
28 files changed, 2058 insertions, 470 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 24fdc74caeba..819c75233235 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -68,17 +68,22 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
struct p9_fid *fid = rreq->netfs_priv;
+ unsigned long long pos = subreq->start + subreq->transferred;
int total, err;
- total = p9_client_read(fid, subreq->start + subreq->transferred,
- &subreq->io_iter, &err);
+ total = p9_client_read(fid, pos, &subreq->io_iter, &err);
/* if we just extended the file size, any portion not in
* cache won't be on server and is zeroes */
if (subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (pos + total >= i_size_read(rreq->inode))
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
- netfs_subreq_terminated(subreq, err ?: total, false);
+ if (!err)
+ subreq->transferred += total;
+
+ netfs_read_subreq_terminated(subreq, err, false);
}
/**
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 5a9d16848ad5..492d857a3fa0 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -16,6 +16,7 @@
#include <linux/mm.h>
#include <linux/swap.h>
#include <linux/netfs.h>
+#include <trace/events/netfs.h>
#include "internal.h"
static int afs_file_mmap(struct file *file, struct vm_area_struct *vma);
@@ -242,9 +243,10 @@ static void afs_fetch_data_notify(struct afs_operation *op)
req->error = error;
if (subreq) {
- if (subreq->rreq->origin != NETFS_DIO_READ)
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
- netfs_subreq_terminated(subreq, error ?: req->actual_len, false);
+ subreq->rreq->i_size = req->file_size;
+ if (req->pos + req->actual_len >= req->file_size)
+ __set_bit(NETFS_SREQ_HIT_EOF, &subreq->flags);
+ netfs_read_subreq_terminated(subreq, error, false);
req->subreq = NULL;
} else if (req->done) {
req->done(req);
@@ -262,6 +264,12 @@ static void afs_fetch_data_success(struct afs_operation *op)
afs_fetch_data_notify(op);
}
+static void afs_fetch_data_aborted(struct afs_operation *op)
+{
+ afs_check_for_remote_deletion(op);
+ afs_fetch_data_notify(op);
+}
+
static void afs_fetch_data_put(struct afs_operation *op)
{
op->fetch.req->error = afs_op_error(op);
@@ -272,7 +280,7 @@ static const struct afs_operation_ops afs_fetch_data_operation = {
.issue_afs_rpc = afs_fs_fetch_data,
.issue_yfs_rpc = yfs_fs_fetch_data,
.success = afs_fetch_data_success,
- .aborted = afs_check_for_remote_deletion,
+ .aborted = afs_fetch_data_aborted,
.failed = afs_fetch_data_notify,
.put = afs_fetch_data_put,
};
@@ -294,7 +302,7 @@ int afs_fetch_data(struct afs_vnode *vnode, struct afs_read *req)
op = afs_alloc_operation(req->key, vnode->volume);
if (IS_ERR(op)) {
if (req->subreq)
- netfs_subreq_terminated(req->subreq, PTR_ERR(op), false);
+ netfs_read_subreq_terminated(req->subreq, PTR_ERR(op), false);
return PTR_ERR(op);
}
@@ -313,7 +321,7 @@ static void afs_read_worker(struct work_struct *work)
fsreq = afs_alloc_read(GFP_NOFS);
if (!fsreq)
- return netfs_subreq_terminated(subreq, -ENOMEM, false);
+ return netfs_read_subreq_terminated(subreq, -ENOMEM, false);
fsreq->subreq = subreq;
fsreq->pos = subreq->start + subreq->transferred;
@@ -322,6 +330,7 @@ static void afs_read_worker(struct work_struct *work)
fsreq->vnode = vnode;
fsreq->iter = &subreq->io_iter;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
afs_fetch_data(fsreq->vnode, fsreq);
afs_put_read(fsreq);
}
diff --git a/fs/afs/fsclient.c b/fs/afs/fsclient.c
index 79cd30775b7a..098fa034a1cc 100644
--- a/fs/afs/fsclient.c
+++ b/fs/afs/fsclient.c
@@ -304,6 +304,7 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
struct afs_vnode_param *vp = &op->file[0];
struct afs_read *req = op->fetch.req;
const __be32 *bp;
+ size_t count_before;
int ret;
_enter("{%u,%zu,%zu/%llu}",
@@ -345,10 +346,14 @@ static int afs_deliver_fs_fetch_data(struct afs_call *call)
/* extract the returned data */
case 2:
- _debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->actual_len);
+ count_before = call->iov_len;
+ _debug("extract data %zu/%llu", count_before, req->actual_len);
ret = afs_extract_data(call, true);
+ if (req->subreq) {
+ req->subreq->transferred += count_before - call->iov_len;
+ netfs_read_subreq_progress(req->subreq, false);
+ }
if (ret < 0)
return ret;
diff --git a/fs/afs/yfsclient.c b/fs/afs/yfsclient.c
index f521e66d3bf6..024227aba4cd 100644
--- a/fs/afs/yfsclient.c
+++ b/fs/afs/yfsclient.c
@@ -355,6 +355,7 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
struct afs_vnode_param *vp = &op->file[0];
struct afs_read *req = op->fetch.req;
const __be32 *bp;
+ size_t count_before;
int ret;
_enter("{%u,%zu, %zu/%llu}",
@@ -391,10 +392,14 @@ static int yfs_deliver_fs_fetch_data64(struct afs_call *call)
/* extract the returned data */
case 2:
- _debug("extract data %zu/%llu",
- iov_iter_count(call->iter), req->actual_len);
+ count_before = call->iov_len;
+ _debug("extract data %zu/%llu", count_before, req->actual_len);
ret = afs_extract_data(call, true);
+ if (req->subreq) {
+ req->subreq->transferred += count_before - call->iov_len;
+ netfs_read_subreq_progress(req->subreq, false);
+ }
if (ret < 0)
return ret;
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index c4744a02db75..c500c1fd6b9f 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -13,6 +13,7 @@
#include <linux/iversion.h>
#include <linux/ktime.h>
#include <linux/netfs.h>
+#include <trace/events/netfs.h>
#include "super.h"
#include "mds_client.h"
@@ -205,21 +206,6 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
}
}
-static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
-{
- struct inode *inode = subreq->rreq->inode;
- struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
- struct ceph_inode_info *ci = ceph_inode(inode);
- u64 objno, objoff;
- u32 xlen;
-
- /* Truncate the extent at the end of the current block */
- ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
- &objno, &objoff, &xlen);
- subreq->len = min(xlen, fsc->mount_options->rsize);
- return true;
-}
-
static void finish_netfs_read(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
@@ -264,7 +250,12 @@ static void finish_netfs_read(struct ceph_osd_request *req)
calc_pages_for(osd_data->alignment,
osd_data->length), false);
}
- netfs_subreq_terminated(subreq, err, false);
+ if (err > 0) {
+ subreq->transferred = err;
+ err = 0;
+ }
+ trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
+ netfs_read_subreq_terminated(subreq, err, false);
iput(req->r_inode);
ceph_dec_osd_stopping_blocker(fsc->mdsc);
}
@@ -278,7 +269,6 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct iov_iter iter;
ssize_t err = 0;
size_t len;
int mode;
@@ -301,6 +291,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
req->r_num_caps = 2;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0)
goto out;
@@ -314,17 +305,36 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
}
len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
- iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
- err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
- if (err == 0)
+ err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter);
+ if (err == 0) {
err = -EFAULT;
+ } else {
+ subreq->transferred += err;
+ err = 0;
+ }
ceph_mdsc_put_request(req);
out:
- netfs_subreq_terminated(subreq, err, false);
+ netfs_read_subreq_terminated(subreq, err, false);
return true;
}
+static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *rreq = subreq->rreq;
+ struct inode *inode = rreq->inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+ u64 objno, objoff;
+ u32 xlen;
+
+ /* Truncate the extent at the end of the current block */
+ ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
+ &objno, &objoff, &xlen);
+ rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize);
+ return 0;
+}
+
static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
@@ -334,9 +344,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct ceph_client *cl = fsc->client;
struct ceph_osd_request *req = NULL;
struct ceph_vino vino = ceph_vino(inode);
- struct iov_iter iter;
- int err = 0;
- u64 len = subreq->len;
+ int err;
+ u64 len;
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
u64 off = subreq->start;
int extent_cnt;
@@ -349,6 +358,12 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return;
+ // TODO: This rounding here is slightly dodgy. It *should* work, for
+ // now, as the cache only deals in blocks that are a multiple of
+ // PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to
+ // happen is for the fscrypt driving to be moved into netfslib and the
+ // data in the cache also to be stored encrypted.
+ len = subreq->len;
ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
@@ -371,8 +386,6 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n",
ceph_vinop(inode), subreq->start, subreq->len, len);
- iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
-
/*
* FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
* encrypted inodes. We'd need infrastructure that handles an iov_iter
@@ -384,7 +397,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct page **pages;
size_t page_off;
- err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
+ err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off);
if (err < 0) {
doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
ceph_vinop(inode), err);
@@ -399,7 +412,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
false);
} else {
- osd_req_op_extent_osd_iter(req, 0, &iter);
+ osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter);
}
if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
err = -EIO;
@@ -410,17 +423,19 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
req->r_inode = inode;
ihold(inode);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
ceph_osdc_start_request(req->r_osdc, req);
out:
ceph_osdc_put_request(req);
if (err)
- netfs_subreq_terminated(subreq, err, false);
+ netfs_read_subreq_terminated(subreq, err, false);
doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
}
static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
{
struct inode *inode = rreq->inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
struct ceph_client *cl = ceph_inode_to_client(inode);
int got = 0, want = CEPH_CAP_FILE_CACHE;
struct ceph_netfs_request_data *priv;
@@ -472,6 +487,7 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
priv->caps = got;
rreq->netfs_priv = priv;
+ rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize;
out:
if (ret < 0)
@@ -496,9 +512,9 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq)
const struct netfs_request_ops ceph_netfs_ops = {
.init_request = ceph_init_request,
.free_request = ceph_netfs_free_request,
+ .prepare_read = ceph_netfs_prepare_read,
.issue_read = ceph_netfs_issue_read,
.expand_readahead = ceph_netfs_expand_readahead,
- .clamp_length = ceph_netfs_clamp_length,
.check_write_begin = ceph_netfs_check_write_begin,
};
diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index 8e6781e0b10b..d08b0bfb6756 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -5,12 +5,14 @@ netfs-y := \
buffered_write.o \
direct_read.o \
direct_write.o \
- io.o \
iterator.o \
locking.o \
main.o \
misc.o \
objects.o \
+ read_collect.o \
+ read_pgpriv2.o \
+ read_retry.o \
write_collect.o \
write_issue.o
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 27c750d39476..c40e226053cc 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -9,266 +9,388 @@
#include <linux/task_io_accounting_ops.h>
#include "internal.h"
-/*
- * [DEPRECATED] Unlock the folios in a read operation for when the filesystem
- * is using PG_private_2 and direct writing to the cache from here rather than
- * marking the page for writeback.
- *
- * Note that we don't touch folio->private in this code.
- */
-static void netfs_rreq_unlock_folios_pgpriv2(struct netfs_io_request *rreq,
- size_t *account)
+static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
+ unsigned long long *_start,
+ unsigned long long *_len,
+ unsigned long long i_size)
{
- struct netfs_io_subrequest *subreq;
- struct folio *folio;
- pgoff_t start_page = rreq->start / PAGE_SIZE;
- pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
- bool subreq_failed = false;
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
- XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+ if (cres->ops && cres->ops->expand_readahead)
+ cres->ops->expand_readahead(cres, _start, _len, i_size);
+}
- /* Walk through the pagecache and the I/O request lists simultaneously.
- * We may have a mixture of cached and uncached sections and we only
- * really want to write out the uncached sections. This is slightly
- * complicated by the possibility that we might have huge pages with a
- * mixture inside.
+static void netfs_rreq_expand(struct netfs_io_request *rreq,
+ struct readahead_control *ractl)
+{
+ /* Give the cache a chance to change the request parameters. The
+ * resultant request must contain the original region.
*/
- subreq = list_first_entry(&rreq->subrequests,
- struct netfs_io_subrequest, rreq_link);
- subreq_failed = (subreq->error < 0);
+ netfs_cache_expand_readahead(rreq, &rreq->start, &rreq->len, rreq->i_size);
- trace_netfs_rreq(rreq, netfs_rreq_trace_unlock_pgpriv2);
+ /* Give the netfs a chance to change the request parameters. The
+ * resultant request must contain the original region.
+ */
+ if (rreq->netfs_ops->expand_readahead)
+ rreq->netfs_ops->expand_readahead(rreq);
- rcu_read_lock();
- xas_for_each(&xas, folio, last_page) {
- loff_t pg_end;
- bool pg_failed = false;
- bool folio_started = false;
+ /* Expand the request if the cache wants it to start earlier. Note
+ * that the expansion may get further extended if the VM wishes to
+ * insert THPs and the preferred start and/or end wind up in the middle
+ * of THPs.
+ *
+ * If this is the case, however, the THP size should be an integer
+ * multiple of the cache granule size, so we get a whole number of
+ * granules to deal with.
+ */
+ if (rreq->start != readahead_pos(ractl) ||
+ rreq->len != readahead_length(ractl)) {
+ readahead_expand(ractl, rreq->start, rreq->len);
+ rreq->start = readahead_pos(ractl);
+ rreq->len = readahead_length(ractl);
- if (xas_retry(&xas, folio))
- continue;
+ trace_netfs_read(rreq, readahead_pos(ractl), readahead_length(ractl),
+ netfs_read_trace_expanded);
+ }
+}
- pg_end = folio_pos(folio) + folio_size(folio) - 1;
+/*
+ * Begin an operation, and fetch the stored zero point value from the cookie if
+ * available.
+ */
+static int netfs_begin_cache_read(struct netfs_io_request *rreq, struct netfs_inode *ctx)
+{
+ return fscache_begin_read_operation(&rreq->cache_resources, netfs_i_cookie(ctx));
+}
- for (;;) {
- loff_t sreq_end;
+/*
+ * Decant the list of folios to read into a rolling buffer.
+ */
+static size_t netfs_load_buffer_from_ra(struct netfs_io_request *rreq,
+ struct folio_queue *folioq)
+{
+ unsigned int order, nr;
+ size_t size = 0;
+
+ nr = __readahead_batch(rreq->ractl, (struct page **)folioq->vec.folios,
+ ARRAY_SIZE(folioq->vec.folios));
+ folioq->vec.nr = nr;
+ for (int i = 0; i < nr; i++) {
+ struct folio *folio = folioq_folio(folioq, i);
+
+ trace_netfs_folio(folio, netfs_folio_trace_read);
+ order = folio_order(folio);
+ folioq->orders[i] = order;
+ size += PAGE_SIZE << order;
+ }
- if (!subreq) {
- pg_failed = true;
- break;
- }
+ for (int i = nr; i < folioq_nr_slots(folioq); i++)
+ folioq_clear(folioq, i);
- if (!folio_started &&
- test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags) &&
- fscache_operation_valid(&rreq->cache_resources)) {
- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
- folio_start_private_2(folio);
- folio_started = true;
- }
+ return size;
+}
- pg_failed |= subreq_failed;
- sreq_end = subreq->start + subreq->len - 1;
- if (pg_end < sreq_end)
- break;
+/*
+ * netfs_prepare_read_iterator - Prepare the subreq iterator for I/O
+ * @subreq: The subrequest to be set up
+ *
+ * Prepare the I/O iterator representing the read buffer on a subrequest for
+ * the filesystem to use for I/O (it can be passed directly to a socket). This
+ * is intended to be called from the ->issue_read() method once the filesystem
+ * has trimmed the request to the size it wants.
+ *
+ * Returns the limited size if successful and -ENOMEM if insufficient memory
+ * available.
+ *
+ * [!] NOTE: This must be run in the same thread as ->issue_read() was called
+ * in as we access the readahead_control struct.
+ */
+static ssize_t netfs_prepare_read_iterator(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *rreq = subreq->rreq;
+ size_t rsize = subreq->len;
+
+ if (subreq->source == NETFS_DOWNLOAD_FROM_SERVER)
+ rsize = umin(rsize, rreq->io_streams[0].sreq_max_len);
+
+ if (rreq->ractl) {
+ /* If we don't have sufficient folios in the rolling buffer,
+ * extract a folioq's worth from the readahead region at a time
+ * into the buffer. Note that this acquires a ref on each page
+ * that we will need to release later - but we don't want to do
+ * that until after we've started the I/O.
+ */
+ while (rreq->submitted < subreq->start + rsize) {
+ struct folio_queue *tail = rreq->buffer_tail, *new;
+ size_t added;
+
+ new = kmalloc(sizeof(*new), GFP_NOFS);
+ if (!new)
+ return -ENOMEM;
+ netfs_stat(&netfs_n_folioq);
+ folioq_init(new);
+ new->prev = tail;
+ tail->next = new;
+ rreq->buffer_tail = new;
+ added = netfs_load_buffer_from_ra(rreq, new);
+ rreq->iter.count += added;
+ rreq->submitted += added;
+ }
+ }
- *account += subreq->transferred;
- if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
- subreq = list_next_entry(subreq, rreq_link);
- subreq_failed = (subreq->error < 0);
- } else {
- subreq = NULL;
- subreq_failed = false;
- }
+ subreq->len = rsize;
+ if (unlikely(rreq->io_streams[0].sreq_max_segs)) {
+ size_t limit = netfs_limit_iter(&rreq->iter, 0, rsize,
+ rreq->io_streams[0].sreq_max_segs);
- if (pg_end == sreq_end)
- break;
+ if (limit < rsize) {
+ subreq->len = limit;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_limited);
}
+ }
- if (!pg_failed) {
- flush_dcache_folio(folio);
- folio_mark_uptodate(folio);
- }
+ subreq->io_iter = rreq->iter;
- if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
- if (folio->index == rreq->no_unlock_folio &&
- test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
- _debug("no unlock");
- else
- folio_unlock(folio);
+ if (iov_iter_is_folioq(&subreq->io_iter)) {
+ if (subreq->io_iter.folioq_slot >= folioq_nr_slots(subreq->io_iter.folioq)) {
+ subreq->io_iter.folioq = subreq->io_iter.folioq->next;
+ subreq->io_iter.folioq_slot = 0;
}
+ subreq->curr_folioq = (struct folio_queue *)subreq->io_iter.folioq;
+ subreq->curr_folioq_slot = subreq->io_iter.folioq_slot;
+ subreq->curr_folio_order = subreq->curr_folioq->orders[subreq->curr_folioq_slot];
}
- rcu_read_unlock();
+
+ iov_iter_truncate(&subreq->io_iter, subreq->len);
+ iov_iter_advance(&rreq->iter, subreq->len);
+ return subreq->len;
}
-/*
- * Unlock the folios in a read operation. We need to set PG_writeback on any
- * folios we're going to write back before we unlock them.
- *
- * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use
- * PG_private_2 and do a direct write to the cache from here instead.
- */
-void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
+static enum netfs_io_source netfs_cache_prepare_read(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq,
+ loff_t i_size)
{
- struct netfs_io_subrequest *subreq;
- struct netfs_folio *finfo;
- struct folio *folio;
- pgoff_t start_page = rreq->start / PAGE_SIZE;
- pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1;
- size_t account = 0;
- bool subreq_failed = false;
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
- XA_STATE(xas, &rreq->mapping->i_pages, start_page);
+ if (!cres->ops)
+ return NETFS_DOWNLOAD_FROM_SERVER;
+ return cres->ops->prepare_read(subreq, i_size);
+}
- if (test_bit(NETFS_RREQ_FAILED, &rreq->flags)) {
- __clear_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags);
- list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
- __clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
- }
- }
+static void netfs_cache_read_terminated(void *priv, ssize_t transferred_or_error,
+ bool was_async)
+{
+ struct netfs_io_subrequest *subreq = priv;
- /* Handle deprecated PG_private_2 case. */
- if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
- netfs_rreq_unlock_folios_pgpriv2(rreq, &account);
- goto out;
+ if (transferred_or_error < 0) {
+ netfs_read_subreq_terminated(subreq, transferred_or_error, was_async);
+ return;
}
- /* Walk through the pagecache and the I/O request lists simultaneously.
- * We may have a mixture of cached and uncached sections and we only
- * really want to write out the uncached sections. This is slightly
- * complicated by the possibility that we might have huge pages with a
- * mixture inside.
- */
- subreq = list_first_entry(&rreq->subrequests,
- struct netfs_io_subrequest, rreq_link);
- subreq_failed = (subreq->error < 0);
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_unlock);
+ if (transferred_or_error > 0)
+ subreq->transferred += transferred_or_error;
+ netfs_read_subreq_terminated(subreq, 0, was_async);
+}
- rcu_read_lock();
- xas_for_each(&xas, folio, last_page) {
- loff_t pg_end;
- bool pg_failed = false;
- bool wback_to_cache = false;
+/*
+ * Issue a read against the cache.
+ * - Eats the caller's ref on subreq.
+ */
+static void netfs_read_cache_to_pagecache(struct netfs_io_request *rreq,
+ struct netfs_io_subrequest *subreq)
+{
+ struct netfs_cache_resources *cres = &rreq->cache_resources;
- if (xas_retry(&xas, folio))
- continue;
+ netfs_stat(&netfs_n_rh_read);
+ cres->ops->read(cres, subreq->start, &subreq->io_iter, NETFS_READ_HOLE_IGNORE,
+ netfs_cache_read_terminated, subreq);
+}
- pg_end = folio_pos(folio) + folio_size(folio) - 1;
+/*
+ * Perform a read to the pagecache from a series of sources of different types,
+ * slicing up the region to be read according to available cache blocks and
+ * network rsize.
+ */
+static void netfs_read_to_pagecache(struct netfs_io_request *rreq)
+{
+ struct netfs_inode *ictx = netfs_inode(rreq->inode);
+ unsigned long long start = rreq->start;
+ ssize_t size = rreq->len;
+ int ret = 0;
+
+ atomic_inc(&rreq->nr_outstanding);
+
+ do {
+ struct netfs_io_subrequest *subreq;
+ enum netfs_io_source source = NETFS_DOWNLOAD_FROM_SERVER;
+ ssize_t slice;
+
+ subreq = netfs_alloc_subrequest(rreq);
+ if (!subreq) {
+ ret = -ENOMEM;
+ break;
+ }
- for (;;) {
- loff_t sreq_end;
+ subreq->start = start;
+ subreq->len = size;
+
+ atomic_inc(&rreq->nr_outstanding);
+ spin_lock_bh(&rreq->lock);
+ list_add_tail(&subreq->rreq_link, &rreq->subrequests);
+ subreq->prev_donated = rreq->prev_donated;
+ rreq->prev_donated = 0;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_added);
+ spin_unlock_bh(&rreq->lock);
+
+ source = netfs_cache_prepare_read(rreq, subreq, rreq->i_size);
+ subreq->source = source;
+ if (source == NETFS_DOWNLOAD_FROM_SERVER) {
+ unsigned long long zp = umin(ictx->zero_point, rreq->i_size);
+ size_t len = subreq->len;
+
+ if (subreq->start >= zp) {
+ subreq->source = source = NETFS_FILL_WITH_ZEROES;
+ goto fill_with_zeroes;
+ }
- if (!subreq) {
- pg_failed = true;
+ if (len > zp - subreq->start)
+ len = zp - subreq->start;
+ if (len == 0) {
+ pr_err("ZERO-LEN READ: R=%08x[%x] l=%zx/%zx s=%llx z=%llx i=%llx",
+ rreq->debug_id, subreq->debug_index,
+ subreq->len, size,
+ subreq->start, ictx->zero_point, rreq->i_size);
break;
}
+ subreq->len = len;
+
+ netfs_stat(&netfs_n_rh_download);
+ if (rreq->netfs_ops->prepare_read) {
+ ret = rreq->netfs_ops->prepare_read(subreq);
+ if (ret < 0) {
+ atomic_dec(&rreq->nr_outstanding);
+ netfs_put_subrequest(subreq, false,
+ netfs_sreq_trace_put_cancel);
+ break;
+ }
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+ }
- wback_to_cache |= test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
- pg_failed |= subreq_failed;
- sreq_end = subreq->start + subreq->len - 1;
- if (pg_end < sreq_end)
+ slice = netfs_prepare_read_iterator(subreq);
+ if (slice < 0) {
+ atomic_dec(&rreq->nr_outstanding);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_cancel);
+ ret = slice;
break;
-
- account += subreq->transferred;
- if (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
- subreq = list_next_entry(subreq, rreq_link);
- subreq_failed = (subreq->error < 0);
- } else {
- subreq = NULL;
- subreq_failed = false;
}
- if (pg_end == sreq_end)
- break;
+ rreq->netfs_ops->issue_read(subreq);
+ goto done;
}
- if (!pg_failed) {
- flush_dcache_folio(folio);
- finfo = netfs_folio_info(folio);
- if (finfo) {
- trace_netfs_folio(folio, netfs_folio_trace_filled_gaps);
- if (finfo->netfs_group)
- folio_change_private(folio, finfo->netfs_group);
- else
- folio_detach_private(folio);
- kfree(finfo);
- }
- folio_mark_uptodate(folio);
- if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
- folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
- filemap_dirty_folio(folio->mapping, folio);
- }
+ fill_with_zeroes:
+ if (source == NETFS_FILL_WITH_ZEROES) {
+ subreq->source = NETFS_FILL_WITH_ZEROES;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+ netfs_stat(&netfs_n_rh_zero);
+ slice = netfs_prepare_read_iterator(subreq);
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ netfs_read_subreq_terminated(subreq, 0, false);
+ goto done;
}
- if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
- if (folio->index == rreq->no_unlock_folio &&
- test_bit(NETFS_RREQ_NO_UNLOCK_FOLIO, &rreq->flags))
- _debug("no unlock");