aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/nfs/blocklayout/blocklayout.c4
-rw-r--r--fs/nfs/blocklayout/dev.c5
-rw-r--r--fs/nfs/blocklayout/extent_tree.c104
-rw-r--r--fs/nfs/client.c47
-rw-r--r--fs/nfs/delegation.c114
-rw-r--r--fs/nfs/delegation.h3
-rw-r--r--fs/nfs/dir.c4
-rw-r--r--fs/nfs/export.c11
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayout.c26
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c6
-rw-r--r--fs/nfs/fs_context.c42
-rw-r--r--fs/nfs/inode.c69
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/localio.c7
-rw-r--r--fs/nfs/mount_clnt.c68
-rw-r--r--fs/nfs/nfs4_fs.h5
-rw-r--r--fs/nfs/nfs4client.c185
-rw-r--r--fs/nfs/nfs4file.c25
-rw-r--r--fs/nfs/nfs4getroot.c14
-rw-r--r--fs/nfs/nfs4proc.c139
-rw-r--r--fs/nfs/nfs4trace.c2
-rw-r--r--fs/nfs/nfs4trace.h168
-rw-r--r--fs/nfs/nfs4xdr.c24
-rw-r--r--fs/nfs/nfstrace.h11
-rw-r--r--fs/nfs/pnfs.c39
-rw-r--r--fs/nfs/pnfs_nfs.c14
-rw-r--r--fs/nfs/write.c8
-rw-r--r--fs/nfs_common/nfslocalio.c28
-rw-r--r--include/linux/nfs_fs.h8
-rw-r--r--include/linux/nfs_fs_sb.h8
-rw-r--r--include/linux/nfs_xdr.h57
-rw-r--r--include/linux/sunrpc/xdr.h9
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c4
-rw-r--r--net/sunrpc/xdr.c110
-rw-r--r--net/sunrpc/xprtsock.c40
35 files changed, 858 insertions, 562 deletions
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 47189476b553..5d6edafbed20 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -149,8 +149,8 @@ do_add_page_to_bio(struct bio *bio, int npg, enum req_op op, sector_t isect,
/* limit length to what the device mapping allows */
end = disk_addr + *len;
- if (end >= map->start + map->len)
- *len = map->start + map->len - disk_addr;
+ if (end >= map->disk_offset + map->len)
+ *len = map->disk_offset + map->len - disk_addr;
retry:
if (!bio) {
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index cab8809f0e0f..44306ac22353 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -257,10 +257,11 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev *child;
u64 chunk;
u32 chunk_idx;
+ u64 disk_chunk;
u64 disk_offset;
chunk = div_u64(offset, dev->chunk_size);
- div_u64_rem(chunk, dev->nr_children, &chunk_idx);
+ disk_chunk = div_u64_rem(chunk, dev->nr_children, &chunk_idx);
if (chunk_idx >= dev->nr_children) {
dprintk("%s: invalid chunk idx %d (%lld/%lld)\n",
@@ -273,7 +274,7 @@ static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset,
offset = chunk * dev->chunk_size;
/* disk offset of the stripe */
- disk_offset = div_u64(offset, dev->nr_children);
+ disk_offset = disk_chunk * dev->chunk_size;
child = &dev->children[chunk_idx];
child->map(child, disk_offset, map);
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 8f7cff7a4293..315949a7e92d 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -6,6 +6,7 @@
#include <linux/vmalloc.h>
#include "blocklayout.h"
+#include "../nfs4trace.h"
#define NFSDBG_FACILITY NFSDBG_PNFS_LD
@@ -520,10 +521,71 @@ static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
}
-static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
+/**
+ * ext_tree_try_encode_commit - try to encode all extents into the buffer
+ * @bl: pointer to the layout
+ * @p: pointer to the output buffer
+ * @buffer_size: size of the output buffer
+ * @count: output pointer to the number of encoded extents
+ * @lastbyte: output pointer to the last written byte
+ *
+ * Return values:
+ * %0: Success, all required extents encoded, outputs are valid
+ * %-ENOSPC: Buffer too small, nothing encoded, outputs are invalid
+ */
+static int
+ext_tree_try_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count, __u64 *lastbyte)
{
struct pnfs_block_extent *be;
+
+ spin_lock(&bl->bl_ext_lock);
+ for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
+ if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
+ be->be_tag != EXTENT_WRITTEN)
+ continue;
+
+ (*count)++;
+ if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
+ spin_unlock(&bl->bl_ext_lock);
+ return -ENOSPC;
+ }
+ }
+ for (be = ext_tree_first(&bl->bl_ext_rw); be; be = ext_tree_next(be)) {
+ if (be->be_state != PNFS_BLOCK_INVALID_DATA ||
+ be->be_tag != EXTENT_WRITTEN)
+ continue;
+
+ if (bl->bl_scsi_layout)
+ p = encode_scsi_range(be, p);
+ else
+ p = encode_block_extent(be, p);
+ be->be_tag = EXTENT_COMMITTING;
+ }
+ *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
+ bl->bl_lwb = 0;
+ spin_unlock(&bl->bl_ext_lock);
+
+ return 0;
+}
+
+/**
+ * ext_tree_encode_commit - encode as much as possible extents into the buffer
+ * @bl: pointer to the layout
+ * @p: pointer to the output buffer
+ * @buffer_size: size of the output buffer
+ * @count: output pointer to the number of encoded extents
+ * @lastbyte: output pointer to the last written byte
+ *
+ * Return values:
+ * %0: Success, all required extents encoded, outputs are valid
+ * %-ENOSPC: Buffer too small, some extents are encoded, outputs are valid
+ */
+static int
+ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
+ size_t buffer_size, size_t *count, __u64 *lastbyte)
+{
+ struct pnfs_block_extent *be, *be_prev;
int ret = 0;
spin_lock(&bl->bl_ext_lock);
@@ -534,9 +596,9 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
(*count)++;
if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
- /* keep counting.. */
+ (*count)--;
ret = -ENOSPC;
- continue;
+ break;
}
if (bl->bl_scsi_layout)
@@ -544,14 +606,30 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
else
p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
+ be_prev = be;
+ }
+ if (!ret) {
+ *lastbyte = (bl->bl_lwb != 0) ? bl->bl_lwb - 1 : U64_MAX;
+ bl->bl_lwb = 0;
+ } else {
+ *lastbyte = be_prev->be_f_offset + be_prev->be_length;
+ *lastbyte <<= SECTOR_SHIFT;
+ *lastbyte -= 1;
}
- *lastbyte = bl->bl_lwb - 1;
- bl->bl_lwb = 0;
spin_unlock(&bl->bl_ext_lock);
return ret;
}
+/**
+ * ext_tree_prepare_commit - encode extents that need to be committed
+ * @arg: layout commit data
+ *
+ * Return values:
+ * %0: Success, all required extents are encoded
+ * %-ENOSPC: Some extents are encoded, but not all, due to RPC size limit
+ * %-ENOMEM: Out of memory, extents not encoded
+ */
int
ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
{
@@ -560,20 +638,18 @@ ext_tree_prepare_commit(struct nfs4_layoutcommit_args *arg)
__be32 *start_p;
int ret;
- dprintk("%s enter\n", __func__);
-
arg->layoutupdate_page = alloc_page(GFP_NOFS);
if (!arg->layoutupdate_page)
return -ENOMEM;
start_p = page_address(arg->layoutupdate_page);
arg->layoutupdate_pages = &arg->layoutupdate_page;
-retry:
- ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size, &count, &arg->lastbytewritten);
+ ret = ext_tree_try_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
- buffer_size = ext_tree_layoutupdate_size(bl, count);
+ buffer_size = NFS_SERVER(arg->inode)->wsize;
count = 0;
arg->layoutupdate_pages =
@@ -588,7 +664,8 @@ retry:
return -ENOMEM;
}
- goto retry;
+ ret = ext_tree_encode_commit(bl, start_p + 1, buffer_size,
+ &count, &arg->lastbytewritten);
}
*start_p = cpu_to_be32(count);
@@ -607,8 +684,9 @@ retry:
}
}
- dprintk("%s found %zu ranges\n", __func__, count);
- return 0;
+ trace_bl_ext_tree_prepare_commit(ret, count,
+ arg->lastbytewritten, !!ret);
+ return ret;
}
void
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index cf35ad3f818a..8fb4a950dd55 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -682,6 +682,44 @@ struct nfs_client *nfs_init_client(struct nfs_client *clp,
}
EXPORT_SYMBOL_GPL(nfs_init_client);
+static void nfs4_server_set_init_caps(struct nfs_server *server)
+{
+#if IS_ENABLED(CONFIG_NFS_V4)
+ /* Set the basic capabilities */
+ server->caps = server->nfs_client->cl_mvops->init_caps;
+ if (server->flags & NFS_MOUNT_NORDIRPLUS)
+ server->caps &= ~NFS_CAP_READDIRPLUS;
+ if (server->nfs_client->cl_proto == XPRT_TRANSPORT_RDMA)
+ server->caps &= ~NFS_CAP_READ_PLUS;
+
+ /*
+ * Don't use NFS uid/gid mapping if we're using AUTH_SYS or lower
+ * authentication.
+ */
+ if (nfs4_disable_idmapping &&
+ server->client->cl_auth->au_flavor == RPC_AUTH_UNIX)
+ server->caps |= NFS_CAP_UIDGID_NOMAP;
+#endif
+}
+
+void nfs_server_set_init_caps(struct nfs_server *server)
+{
+ switch (server->nfs_client->rpc_ops->version) {
+ case 2:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ break;
+ case 3:
+ server->caps = NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
+ if (!(server->flags & NFS_MOUNT_NORDIRPLUS))
+ server->caps |= NFS_CAP_READDIRPLUS;
+ break;
+ default:
+ nfs4_server_set_init_caps(server);
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nfs_server_set_init_caps);
+
/*
* Create a version 2 or 3 client
*/
@@ -726,7 +764,6 @@ static int nfs_init_server(struct nfs_server *server,
/* Initialise the client representation from the mount data */
server->flags = ctx->flags;
server->options = ctx->options;
- server->caps |= NFS_CAP_HARDLINKS | NFS_CAP_SYMLINKS;
switch (clp->rpc_ops->version) {
case 2:
@@ -762,6 +799,8 @@ static int nfs_init_server(struct nfs_server *server,
if (error < 0)
goto error;
+ nfs_server_set_init_caps(server);
+
/* Preserve the values of mount_server-related mount options */
if (ctx->mount_server.addrlen) {
memcpy(&server->mountd_address, &ctx->mount_server.address,
@@ -814,7 +853,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->wsize = max_rpc_payload;
if (server->wsize > NFS_MAX_FILE_IO_SIZE)
server->wsize = NFS_MAX_FILE_IO_SIZE;
- server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
@@ -831,7 +869,6 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->maxfilesize = fsinfo->maxfilesize;
- server->time_delta = fsinfo->time_delta;
server->change_attr_type = fsinfo->change_attr_type;
server->clone_blksize = fsinfo->clone_blksize;
@@ -936,7 +973,6 @@ void nfs_server_copy_userdata(struct nfs_server *target, struct nfs_server *sour
target->acregmax = source->acregmax;
target->acdirmin = source->acdirmin;
target->acdirmax = source->acdirmax;
- target->caps = source->caps;
target->options = source->options;
target->auth_info = source->auth_info;
target->port = source->port;
@@ -1007,6 +1043,7 @@ struct nfs_server *nfs_alloc_server(void)
INIT_LIST_HEAD(&server->ss_src_copies);
atomic_set(&server->active, 0);
+ atomic_long_set(&server->nr_active_delegations, 0);
server->io_stats = nfs_alloc_iostats();
if (!server->io_stats) {
@@ -1170,6 +1207,8 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source,
if (error < 0)
goto out_free_server;
+ nfs_server_set_init_caps(server);
+
/* probe the filesystem info for this server filesystem */
error = nfs_probe_server(server, fh);
if (error < 0)
diff --git a/fs/nfs/delegation.c b/fs/nfs/delegation.c
index 10ef46e29b25..9d3a5f29f17f 100644
--- a/fs/nfs/delegation.c
+++ b/fs/nfs/delegation.c
@@ -27,8 +27,15 @@
#define NFS_DEFAULT_DELEGATION_WATERMARK (5000U)
-static atomic_long_t nfs_active_delegations;
static unsigned nfs_delegation_watermark = NFS_DEFAULT_DELEGATION_WATERMARK;
+module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
+
+static struct hlist_head *nfs_delegation_hash(struct nfs_server *server,
+ const struct nfs_fh *fhandle)
+{
+ return server->delegation_hash_table +
+ (nfs_fhandle_hash(fhandle) & server->delegation_hash_mask);
+}
static void __nfs_free_delegation(struct nfs_delegation *delegation)
{
@@ -37,11 +44,12 @@ static void __nfs_free_delegation(struct nfs_delegation *delegation)
kfree_rcu(delegation, rcu);
}
-static void nfs_mark_delegation_revoked(struct nfs_delegation *delegation)
+static void nfs_mark_delegation_revoked(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
if (!test_and_set_bit(NFS_DELEGATION_REVOKED, &delegation->flags)) {
delegation->stateid.type = NFS4_INVALID_STATEID_TYPE;
- atomic_long_dec(&nfs_active_delegations);
+ atomic_long_dec(&server->nr_active_delegations);
if (!test_bit(NFS_DELEGATION_RETURNING, &delegation->flags))
nfs_clear_verifier_delegated(delegation->inode);
}
@@ -59,9 +67,10 @@ static void nfs_put_delegation(struct nfs_delegation *delegation)
__nfs_free_delegation(delegation);
}
-static void nfs_free_delegation(struct nfs_delegation *delegation)
+static void nfs_free_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation)
{
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(server, delegation);
nfs_put_delegation(delegation);
}
@@ -237,34 +246,34 @@ void nfs_inode_reclaim_delegation(struct inode *inode, const struct cred *cred,
rcu_read_lock();
delegation = rcu_dereference(NFS_I(inode)->delegation);
- if (delegation != NULL) {
- spin_lock(&delegation->lock);
- nfs4_stateid_copy(&delegation->stateid, stateid);
- delegation->type = type;
- delegation->pagemod_limit = pagemod_limit;
- oldcred = delegation->cred;
- delegation->cred = get_cred(cred);
- switch (deleg_type) {
- case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
- case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
- set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
- break;
- default:
- clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
- }
- clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
- if (test_and_clear_bit(NFS_DELEGATION_REVOKED,
- &delegation->flags))
- atomic_long_inc(&nfs_active_delegations);
- spin_unlock(&delegation->lock);
- rcu_read_unlock();
- put_cred(oldcred);
- trace_nfs4_reclaim_delegation(inode, type);
- } else {
+ if (!delegation) {
rcu_read_unlock();
nfs_inode_set_delegation(inode, cred, type, stateid,
pagemod_limit, deleg_type);
+ return;
}
+
+ spin_lock(&delegation->lock);
+ nfs4_stateid_copy(&delegation->stateid, stateid);
+ delegation->type = type;
+ delegation->pagemod_limit = pagemod_limit;
+ oldcred = delegation->cred;
+ delegation->cred = get_cred(cred);
+ switch (deleg_type) {
+ case NFS4_OPEN_DELEGATE_READ_ATTRS_DELEG:
+ case NFS4_OPEN_DELEGATE_WRITE_ATTRS_DELEG:
+ set_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ break;
+ default:
+ clear_bit(NFS_DELEGATION_DELEGTIME, &delegation->flags);
+ }
+ clear_bit(NFS_DELEGATION_NEED_RECLAIM, &delegation->flags);
+ if (test_and_clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags))
+ atomic_long_inc(&NFS_SERVER(inode)->nr_active_delegations);
+ spin_unlock(&delegation->lock);
+ rcu_read_unlock();
+ put_cred(oldcred);
+ trace_nfs4_reclaim_delegation(inode, type);
}
static int nfs_do_return_delegation(struct inode *inode,
@@ -355,6 +364,8 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
rcu_dereference_protected(nfsi->delegation,
lockdep_is_held(&clp->cl_lock));
+ trace_nfs4_detach_delegation(&nfsi->vfs_inode, delegation->type);
+
if (deleg_cur == NULL || delegation != deleg_cur)
return NULL;
@@ -363,6 +374,7 @@ nfs_detach_delegation_locked(struct nfs_inode *nfsi,
spin_unlock(&delegation->lock);
return NULL;
}
+ hlist_del_init_rcu(&delegation->hash);
list_del_rcu(&delegation->super_list);
delegation->inode = NULL;
rcu_assign_pointer(nfsi->delegation, NULL);
@@ -410,7 +422,8 @@ nfs_update_delegation_cred(struct nfs_delegation *delegation,
}
static void
-nfs_update_inplace_delegation(struct nfs_delegation *delegation,
+nfs_update_inplace_delegation(struct nfs_server *server,
+ struct nfs_delegation *delegation,
const struct nfs_delegation *update)
{
if (nfs4_stateid_is_newer(&update->stateid, &delegation->stateid)) {
@@ -423,7 +436,7 @@ nfs_update_inplace_delegation(struct nfs_delegation *delegation,
nfs_update_delegation_cred(delegation, update->cred);
/* smp_mb__before_atomic() is implicit due to xchg() */
clear_bit(NFS_DELEGATION_REVOKED, &delegation->flags);
- atomic_long_inc(&nfs_active_delegations);
+ atomic_long_inc(&server->nr_active_delegations);
}
}
}
@@ -478,7 +491,7 @@ int nfs_inode_set_delegation(struct inode *inode, const struct cred *cred,
if (nfs4_stateid_match_other(&old_delegation->stateid,
&delegation->stateid)) {
spin_lock(&old_delegation->lock);
- nfs_update_inplace_delegation(old_delegation,
+ nfs_update_inplace_delegation(server, old_delegation,
delegation);
spin_unlock(&old_delegation->lock);
goto out;
@@ -524,10 +537,12 @@ add_new:
spin_unlock(&inode->i_lock);
list_add_tail_rcu(&delegation->super_list, &server->delegations);
+ hlist_add_head_rcu(&delegation->hash,
+ nfs_delegation_hash(server, &NFS_I(inode)->fh));
rcu_assign_pointer(nfsi->delegation, delegation);
delegation = NULL;
- atomic_long_inc(&nfs_active_delegations);
+ atomic_long_inc(&server->nr_active_delegations);
trace_nfs4_set_delegation(inode, type);
@@ -541,7 +556,7 @@ out:
__nfs_free_delegation(delegation);
if (freeme != NULL) {
nfs_do_return_delegation(inode, freeme, 0);
- nfs_free_delegation(freeme);
+ nfs_free_delegation(server, freeme);
}
return status;
}
@@ -592,6 +607,8 @@ static bool nfs_delegation_need_return(struct nfs_delegation *delegation)
{
bool ret = false;
+ trace_nfs_delegation_need_return(delegation);
+
if (test_and_clear_bit(NFS_DELEGATION_RETURN, &delegation->flags))
ret = true;
if (test_bit(NFS_DELEGATION_RETURNING, &delegation->flags) ||
@@ -751,7 +768,7 @@ void nfs_inode_evict_delegation(struct inode *inode)
set_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
set_bit(NFS_DELEGATION_INODE_FREEING, &delegation->flags);
nfs_do_return_delegation(inode, delegation, 1);
- nfs_free_delegation(delegation);
+ nfs_free_delegation(NFS_SERVER(inode), delegation);
}
}
@@ -837,7 +854,8 @@ void nfs4_inode_return_delegation_on_close(struct inode *inode)
if (!delegation)
goto out;
if (test_bit(NFS_DELEGATION_RETURN_IF_CLOSED, &delegation->flags) ||
- atomic_long_read(&nfs_active_delegations) >= nfs_delegation_watermark) {
+ atomic_long_read(&NFS_SERVER(inode)->nr_active_delegations) >=
+ nfs_delegation_watermark) {
spin_lock(&delegation->lock);
if (delegation->inode &&
list_empty(&NFS_I(inode)->open_files) &&
@@ -1013,7 +1031,7 @@ static void nfs_revoke_delegation(struct inode *inode,
}
spin_unlock(&delegation->lock);
}
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
ret = true;
out:
rcu_read_unlock();
@@ -1045,7 +1063,7 @@ void nfs_delegation_mark_returned(struct inode *inode,
delegation->stateid.seqid = stateid->seqid;
}
- nfs_mark_delegation_revoked(delegation);
+ nfs_mark_delegation_revoked(NFS_SERVER(inode), delegation);
clear_bit(NFS_DELEGATION_RETURNING, &delegation->flags);
spin_unlock(&delegation->lock);
if (nfs_detach_delegation(NFS_I(inode), delegation, NFS_SERVER(inode)))
@@ -1158,11 +1176,12 @@ static struct inode *
nfs_delegation_find_inode_server(struct nfs_server *server,
const struct nfs_fh *fhandle)
{
+ struct hlist_head *head = nfs_delegation_hash(server, fhandle);
struct nfs_delegation *delegation;
struct super_block *freeme = NULL;
struct inode *res = NULL;
- list_for_each_entry_rcu(delegation, &server->delegations, super_list) {
+ hlist_for_each_entry_rcu(delegation, head, hash) {
spin_lock(&delegation->lock);
if (delegation->inode != NULL &&
!test_bit(NFS_DELEGATION_REVOKED, &delegation->flags) &&
@@ -1265,7 +1284,7 @@ restart:
if (delegation != NULL) {
if (nfs_detach_delegation(NFS_I(inode), delegation,
server) != NULL)
- nfs_free_delegation(delegation);
+ nfs_free_delegation(server, delegation);
/* Match nfs_start_delegation_return_locked */
nfs_put_delegation(delegation);
}
@@ -1570,4 +1589,17 @@ out:
return ret;
}
-module_param_named(delegation_watermark, nfs_delegation_watermark, uint, 0644);
+int nfs4_delegation_hash_alloc(struct nfs_server *server)
+{
+ int delegation_buckets, i;
+
+ delegation_buckets = roundup_pow_of_two(nfs_delegation_watermark / 16);
+ server->delegation_hash_mask = delegation_buckets - 1;
+ server->delegation_hash_table = kmalloc_array(delegation_buckets,
+ sizeof(*server->delegation_hash_table), GFP_KERNEL);
+ if (!server->delegation_hash_table)
+ return -ENOMEM;
+ for (i = 0; i < delegation_buckets; i++)
+ INIT_HLIST_HEAD(&server->delegation_hash_table[i]);
+ return 0;
+}
diff --git a/fs/nfs/delegation.h b/fs/nfs/delegation.h
index 8ff5ab9c5c25..08ec2e9c68a4 100644
--- a/fs/nfs/delegation.h
+++ b/fs/nfs/delegation.h
@@ -14,6 +14,7 @@
* NFSv4 delegation
*/
struct nfs_delegation {
+ struct hlist_node hash;
struct list_head super_list;
const struct cred *cred;
struct inode *inode;
@@ -123,4 +124,6 @@ static inline int nfs_have_delegated_mtime(struct inode *inode)
NFS_DELEGATION_FLAG_TIME);
}
+int nfs4_delegation_hash_alloc(struct nfs_server *server);
+
#endif
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index d0e0b435a843..d81217923936 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -1828,9 +1828,7 @@ static void block_revalidate(struct dentry *dentry)
static void unblock_revalidate(struct dentry *dentry)
{
- /* store_release ensures wait_var_event() sees the update */
- smp_store_release(&dentry->d_fsdata, NULL);
- wake_up_var(&dentry->d_fsdata);
+ store_release_wake_up(&dentry->d_fsdata, NULL);
}
/*
diff --git a/fs/nfs/export.c b/fs/nfs/export.c
index e9c233b6fd20..a10dd5f9d078 100644
--- a/fs/nfs/export.c
+++ b/fs/nfs/export.c
@@ -66,14 +66,21 @@ nfs_fh_to_dentry(struct super_block *sb, struct fid *fid,
{
struct nfs_fattr *fattr = NULL;
struct nfs_fh *server_fh = nfs_exp_embedfh(fid->raw);
- size_t fh_size = offsetof(struct nfs_fh, data) + server_fh->size;
+ size_t fh_size = offsetof(struct nfs_fh, data);
const struct nfs_rpc_ops *rpc_ops;
struct dentry *dentry;
struct inode *inode;
- int len = EMBED_FH_OFF + XDR_QUADLEN(fh_size);
+ int len = EMBED_FH_OFF;
u32 *p = fid->raw;
int ret;
+ /* Initial check of bounds */
+ if (fh_len < len + XDR_QUADLEN(fh_size) ||
+ fh_len > XDR_QUADLEN(NFS_MAXFHSIZE))
+ return NULL;
+ /* Calculate embedded filehandle size */
+ fh_size += server_fh->size;
+ len += XDR_QUADLEN(fh_size);
/* NULL translates to ESTALE */
if (fh_len < len || fh_type != len)
return NULL;
diff --git a/fs/nfs/flexfilelayout/flexfilelayout.c b/fs/nfs/flexfilelayout/flexfilelayout.c
index 4bea008dbebd..8dc921d83538 100644
--- a/fs/nfs/flexfilelayout/flexfilelayout.c
+++ b/fs/nfs/flexfilelayout/flexfilelayout.c
@@ -762,14 +762,14 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
{
struct nfs4_ff_layout_segment *fls = FF_LAYOUT_LSEG(lseg);
struct nfs4_ff_layout_mirror *mirror;
- struct nfs4_pnfs_ds *ds;
+ struct nfs4_pnfs_ds *ds = ERR_PTR(-EAGAIN);
u32 idx;
/* mirrors are initially sorted by efficiency */
for (idx = start_idx; idx < fls->mirror_array_cnt; idx++) {
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds))
continue;
if (check_device &&
@@ -777,10 +777,10 @@ ff_layout_choose_ds_for_read(struct pnfs_layout_segment *lseg,
continue;
*best_idx = idx;
- return ds;
+ break;
}
- return NULL;
+ return ds;
}
static struct nfs4_pnfs_ds *
@@ -942,7 +942,7 @@ retry:
for (i = 0; i < pgio->pg_mirror_count; i++) {
mirror = FF_LAYOUT_COMP(pgio->pg_lseg, i);
ds = nfs4_ff_layout_prepare_ds(pgio->pg_lseg, mirror, true);
- if (!ds) {
+ if (IS_ERR(ds)) {
if (!ff_layout_no_fallback_to_mds(pgio->pg_lseg))
goto out_mds;
pnfs_generic_pg_cleanup(pgio);
@@ -1867,6 +1867,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
u32 idx = hdr->pgio_mirror_idx;
int vers;
struct nfs_fh *fh;
+ bool ds_fatal_error = false;
dprintk("--> %s ino %lu pgbase %u req %zu@%llu\n",
__func__, hdr->inode->i_ino,
@@ -1874,8 +1875,10 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, false);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -1923,7 +1926,7 @@ ff_layout_read_pagelist(struct nfs_pgio_header *hdr)
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_read_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -1945,11 +1948,14 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
int vers;
struct nfs_fh *fh;
u32 idx = hdr->pgio_mirror_idx;
+ bool ds_fatal_error = false;
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds)) {
+ ds_fatal_error = nfs_error_is_fatal(PTR_ERR(ds));
goto out_failed;
+ }
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
hdr->inode);
@@ -2000,7 +2006,7 @@ ff_layout_write_pagelist(struct nfs_pgio_header *hdr, int sync)
return PNFS_ATTEMPTED;
out_failed:
- if (ff_layout_avoid_mds_available_ds(lseg))
+ if (ff_layout_avoid_mds_available_ds(lseg) && !ds_fatal_error)
return PNFS_TRY_AGAIN;
trace_pnfs_mds_fallback_write_pagelist(hdr->inode,
hdr->args.offset, hdr->args.count,
@@ -2043,7 +2049,7 @@ static int ff_layout_initiate_commit(struct nfs_commit_data *data, int how)
idx = calc_ds_index_from_commit(lseg, data->ds_commit_index);
mirror = FF_LAYOUT_COMP(lseg, idx);
ds = nfs4_ff_layout_prepare_ds(lseg, mirror, true);
- if (!ds)
+ if (IS_ERR(ds))
goto out_err;
ds_clnt = nfs4_ff_find_or_create_ds_client(mirror, ds->ds_clp,
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index 656d5c50bbce..30365ec782bb 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -370,11 +370,11 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg,
struct nfs4_ff_layout_mirror *mirror,
bool fail_return)
{
- struct nfs4_pnfs_ds *ds = NULL;
+ struct nfs4_pnfs_ds *ds;
struct inode *ino = lseg->pls_layout->plh_inode;
struct nfs_server *s = NFS_SERVER(ino);
unsigned int max_payload;
- int status;
+ int status = -EAGAIN;
if (!ff_layout_init_mirror_ds(lseg->pls_layout, mirror))
goto noconnect;
@@ -418,7 +418,7 @@ noconnect:
ff_layout_send_layouterror(lseg);
if (fail_return || !ff_layout_has_available_ds(lseg))
pnfs_error_mark_layout_for_return(ino, lseg);
- ds = NULL;