[PATCH 09/11] nfsd: Allow filehandle lookup to cross internal mount points.
From: NeilBrown <hidden>
Date: 2021-07-27 22:43:50
Also in:
linux-fsdevel, linux-nfs
Subsystem:
filesystems (vfs and infrastructure), kernel nfsd, sunrpc, and lockd servers, the rest · Maintainers:
Alexander Viro, Christian Brauner, Chuck Lever, Jeff Layton, Linus Torvalds
Enhance nfsd to detect internal mounts and to cross them without requiring a new export. Also ensure the fsid reported is different for different submounts. We do this by xoring in the ino of the mounted-on directory. This makes sense for btrfs at least. Signed-off-by: NeilBrown <redacted> --- fs/nfsd/nfs3xdr.c | 28 +++++++++++++++++++++------- fs/nfsd/nfs4xdr.c | 34 +++++++++++++++++++++++----------- fs/nfsd/nfsfh.c | 7 ++++++- fs/nfsd/vfs.c | 11 +++++++++-- 4 files changed, 59 insertions(+), 21 deletions(-)
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 67af0c5c1543..80b1cc0334fa 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c@@ -370,6 +370,8 @@ svcxdr_encode_fattr3(struct svc_rqst *rqstp, struct xdr_stream *xdr, case FSIDSOURCE_UUID: fsid = ((u64 *)fhp->fh_export->ex_uuid)[0]; fsid ^= ((u64 *)fhp->fh_export->ex_uuid)[1]; + if (fhp->fh_mnt != fhp->fh_export->ex_path.mnt) + fsid ^= nfsd_get_mounted_on(fhp->fh_mnt); break; default: fsid = (u64)huge_encode_dev(fhp->fh_dentry->d_sb->s_dev);
@@ -1094,8 +1096,8 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, __be32 rv = nfserr_noent; dparent = cd->fh.fh_dentry; - exp = cd->fh.fh_export; - child.mnt = cd->fh.fh_mnt; + exp = exp_get(cd->fh.fh_export); + child.mnt = mntget(cd->fh.fh_mnt); if (isdotent(name, namlen)) { if (namlen == 2) {
@@ -1112,15 +1114,27 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, child.dentry = dget(dparent); } else child.dentry = lookup_positive_unlocked(name, dparent, namlen); - if (IS_ERR(child.dentry)) + if (IS_ERR(child.dentry)) { + mntput(child.mnt); + exp_put(exp); return rv; - if (d_mountpoint(child.dentry)) - goto out; - if (child.dentry->d_inode->i_ino != ino) + } + /* If child is a mountpoint, then we want to expose the fact + * so client can create a mountpoint. If not, then a different + * ino number probably means a race with rename, so avoid providing + * too much detail. + */ + if (nfsd_mountpoint(child.dentry, exp)) { + int err; + err = nfsd_cross_mnt(cd->rqstp, &child, &exp); + if (err) + goto out; + } else if (child.dentry->d_inode->i_ino != ino) goto out; rv = fh_compose(fhp, exp, &child, &cd->fh); out: - dput(child.dentry); + path_put(&child); + exp_put(exp); return rv; }
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index d5683b6a74b2..4dbc99ed2c8b 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c@@ -2817,6 +2817,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, struct kstat stat; struct svc_fh *tempfh = NULL; struct kstatfs statfs; + u64 mounted_on_ino; + u64 sub_fsid; __be32 *p; int starting_len = xdr->buf->len; int attrlen_offset;
@@ -2871,6 +2873,24 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; fhp = tempfh; } + if ((bmval0 & FATTR4_WORD0_FSID) || + (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID)) { + mounted_on_ino = stat.ino; + sub_fsid = 0; + /* + * The inode number that the current mnt is mounted on is + * used for MOUNTED_ON_FILED if we are at the root, + * and for sub_fsid if mnt is not the export mnt. + */ + if (ignore_crossmnt == 0) { + u64 moi = nfsd_get_mounted_on(mnt); + + if (dentry == mnt->mnt_root && moi) + mounted_on_ino = moi; + if (mnt != exp->ex_path.mnt) + sub_fsid = moi; + } + } if (bmval0 & FATTR4_WORD0_ACL) { err = nfsd4_get_nfs4_acl(rqstp, dentry, &acl); if (err == -EOPNOTSUPP)
@@ -3008,6 +3028,8 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, case FSIDSOURCE_UUID: p = xdr_encode_opaque_fixed(p, exp->ex_uuid, EX_UUID_LEN); + if (mnt != exp->ex_path.mnt) + *(u64*)(p-2) ^= sub_fsid; break; } }
@@ -3253,20 +3275,10 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, *p++ = cpu_to_be32(stat.mtime.tv_nsec); } if (bmval1 & FATTR4_WORD1_MOUNTED_ON_FILEID) { - u64 ino; - p = xdr_reserve_space(xdr, 8); if (!p) goto out_resource; - /* - * Get parent's attributes if not ignoring crossmount - * and this is the root of a cross-mounted filesystem. - */ - if (ignore_crossmnt == 0 && dentry == mnt->mnt_root) - ino = nfsd_get_mounted_on(mnt); - if (!ino) - ino = stat.ino; - p = xdr_encode_hyper(p, ino); + p = xdr_encode_hyper(p, mounted_on_ino); } #ifdef CONFIG_NFSD_PNFS if (bmval1 & FATTR4_WORD1_FS_LAYOUT_TYPES) {
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 4023046f63e2..4b53838bca89 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c@@ -9,7 +9,7 @@ */ #include <linux/exportfs.h> - +#include <linux/namei.h> #include <linux/sunrpc/svcauth_gss.h> #include "nfsd.h" #include "vfs.h"
@@ -285,6 +285,11 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) default: dentry = ERR_PTR(-ESTALE); } + } else if (nfsd_mountpoint(dentry, exp)) { + struct path path = { .mnt = mnt, .dentry = dentry }; + follow_down(&path, LOOKUP_AUTOMOUNT); + mnt = path.mnt; + dentry = path.dentry; } } if (dentry == NULL)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index baa12ac36ece..22523e1cd478 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c@@ -64,7 +64,7 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct path *path_parent, .dentry = dget(path_parent->dentry)}; int err = 0; - err = follow_down(&path, 0); + err = follow_down(&path, LOOKUP_AUTOMOUNT); if (err < 0) goto out; if (path.mnt == path_parent->mnt && path.dentry == path_parent->dentry &&
@@ -73,6 +73,13 @@ nfsd_cross_mnt(struct svc_rqst *rqstp, struct path *path_parent, path_put(&path); goto out; } + if (mount_is_internal(path.mnt)) { + /* Use the new path, but don't look for a new export */ + /* FIXME should I check NOHIDE in this case?? */ + path_put(path_parent); + *path_parent = path; + goto out; + } exp2 = rqst_exp_get_by_name(rqstp, &path); if (IS_ERR(exp2)) {
@@ -157,7 +164,7 @@ int nfsd_mountpoint(struct dentry *dentry, struct svc_export *exp) return 1; if (nfsd4_is_junction(dentry)) return 1; - if (d_mountpoint(dentry)) + if (d_managed(dentry)) /* * Might only be a mountpoint in a different namespace, * but we need to check.