Inter-revision diff: patch 4

Comparing v3 (message) to v4 (message)

--- v3
+++ v4
@@ -9,22 +9,22 @@
 Signed-off-by: Christian Brauner <brauner@kernel.org>
 Signed-off-by: Aleksa Sarai <cyphar@cyphar.com>
 ---
- man/man2/fsopen.2 | 384 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
- 1 file changed, 384 insertions(+)
+ man/man2/fsconfig.2 | 727 ++++++++++++++++++++++++++++++++++++++++++++++++++++
+ 1 file changed, 727 insertions(+)
 
-diff --git a/man/man2/fsopen.2 b/man/man2/fsopen.2
+diff --git a/man/man2/fsconfig.2 b/man/man2/fsconfig.2
 new file mode 100644
-index 0000000000000000000000000000000000000000..cce677f316c67de72c359f94a6b415d851a761d6
+index 0000000000000000000000000000000000000000..5a18e08c700ac93aa22c341b4134944ee3c38d0b
 --- /dev/null
-+++ b/man/man2/fsopen.2
-@@ -0,0 +1,384 @@
++++ b/man/man2/fsconfig.2
+@@ -0,0 +1,727 @@
 +.\" Copyright, the authors of the Linux man-pages project
 +.\"
 +.\" SPDX-License-Identifier: Linux-man-pages-copyleft
 +.\"
-+.TH fsopen 2 (date) "Linux man-pages (unreleased)"
++.TH fsconfig 2 (date) "Linux man-pages (unreleased)"
 +.SH NAME
-+fsopen \- create a new filesystem context
++fsconfig \- configure new or existing filesystem context
 +.SH LIBRARY
 +Standard C library
 +.RI ( libc ,\~ \-lc )
@@ -32,377 +32,720 @@
 +.nf
 +.B #include <sys/mount.h>
 +.P
-+.BI "int fsopen(const char *" fsname ", unsigned int " flags ");"
++.BI "int fsconfig(int " fd ", unsigned int " cmd ,
++.BI "             const char *_Nullable " key ,
++.BI "             const void *_Nullable " value ", int " aux );
 +.fi
 +.SH DESCRIPTION
 +The
-+.BR fsopen ()
++.BR fsconfig ()
 +system call is part of
 +the suite of file descriptor based mount facilities in Linux.
 +.P
-+.BR fsopen ()
-+creates a blank filesystem configuration context within the kernel
-+for the filesystem named by
-+.I fsname
-+and places it into creation mode.
-+A new file descriptor
-+associated with the filesystem configuration context
-+is then returned.
-+The calling process must have the
++.BR fsconfig ()
++is used to supply parameters to
++and issue commands against
++the filesystem configuration context
++associated with the file descriptor
++.IR fd .
++Filesystem configuration contexts can be created with
++.BR fsopen (2)
++or be instantiated from an extant filesystem instance with
++.BR fspick (2).
++.P
++The
++.I cmd
++argument indicates the command to be issued.
++Some commands supply parameters to the context
++(equivalent to mount options specified with
++.BR mount (8)),
++while others are meta-operations on the filesystem context.
++The list of valid
++.I cmd
++values are:
++.RS
++.TP
++.B FSCONFIG_SET_FLAG
++Set the flag parameter named by
++.IR key .
++.I value
++must be NULL,
++and
++.I aux
++must be 0.
++.TP
++.B FSCONFIG_SET_STRING
++Set the string parameter named by
++.I key
++to the value specified by
++.IR value .
++.I value
++points to a null-terminated string,
++and
++.I aux
++must be 0.
++.TP
++.B FSCONFIG_SET_BINARY
++Set the blob parameter named by
++.I key
++to the contents of the binary blob
++specified by
++.IR value .
++.I value
++points to
++the start of a buffer
++that is
++.I aux
++bytes in length.
++.TP
++.B FSCONFIG_SET_FD
++Set the file parameter named by
++.I key
++to the open file description
++referenced by the file descriptor
++.IR aux .
++.I value
++must be NULL.
++.IP
++You may also use
++.B \%FSCONFIG_SET_STRING
++for file parameters,
++with
++.I value
++set to a null-terminated string
++containing a base-10 representation
++of the file descriptor number.
++This mechanism is primarily intended for compatibility
++with older
++.BR mount (2)-based
++programs,
++and only works for parameters
++that
++.I only
++accept file descriptor arguments.
++.TP
++.B FSCONFIG_SET_PATH
++Set the path parameter named by
++.I key
++to the object at a provided path,
++resolved in a similar manner to
++.BR openat (2).
++.I value
++points to a null-terminated pathname string,
++and
++.I aux
++is equivalent to the
++.I dirfd
++argument to
++.BR openat (2).
++See
++.BR openat (2)
++for an explanation of the need for
++.BR \%FSCONFIG_SET_PATH .
++.IP
++You may also use
++.B \%FSCONFIG_SET_STRING
++for path parameters,
++the behaviour of which is equivalent to
++.B \%FSCONFIG_SET_PATH
++with
++.I aux
++set to
++.BR \%AT_FDCWD .
++.TP
++.B FSCONFIG_SET_PATH_EMPTY
++As with
++.BR \%FSCONFIG_SET_PATH ,
++except that if
++.I value
++is an empty string,
++the file descriptor specified by
++.I aux
++is operated on directly
++and may be any type of file
++(not just a directory).
++This is equivalent to the behaviour of
++.B \%AT_EMPTY_PATH
++with most "*at()" system calls.
++If
++.I aux
++is
++.BR \%AT_FDCWD ,
++the parameter will be set to
++the current working directory
++of the calling process.
++.TP
++.B FSCONFIG_CMD_CREATE
++This command instructs the filesystem driver
++to instantiate an instance of the filesystem in the kernel
++with the parameters specified in the filesystem configuration context.
++.I key
++and
++.I value
++must be NULL,
++and
++.I aux
++must be 0.
++.IP
++This command can only be issued once
++in the lifetime of a filesystem context.
++If the operation succeeds,
++the filesystem context
++associated with file descriptor
++.I fd
++now references the created filesystem instance,
++and is placed into a special "awaiting-mount" mode
++that allows you to use
++.BR fsmount (2)
++to create a mount object from the filesystem instance.
++.\" FS_CONTEXT_AWAITING_MOUNT is the term the kernel uses for this.
++If the operation fails,
++in most cases
++the filesystem context is placed in a failed mode
++and cannot be used for any further
++.BR fsconfig ()
++operations
++(though you may still retrieve diagnostic messages
++through the message retrieval interface,
++as described in
++the corresponding subsection of
++.BR fsopen (2)).
++.IP
++This command can only be issued against
++filesystem configuration contexts
++that were created with
++.BR fsopen (2).
++In order to create a filesystem instance,
++the calling process must have the
 +.B \%CAP_SYS_ADMIN
-+capability in order to create a new filesystem configuration context.
-+.P
-+A filesystem configuration context is
-+an in-kernel representation of a pending transaction,
-+containing a set of configuration parameters that are to be applied
-+when creating a new instance of a filesystem
-+(or modifying the configuration of an existing filesystem instance,
-+such as when using
-+.BR fspick (2)).
-+.P
-+After obtaining a filesystem configuration context with
-+.BR fsopen (),
-+the general workflow for operating on the context looks like the following:
-+.IP (1) 5
-+Pass the filesystem context file descriptor to
-+.BR fsconfig (2)
-+to specify any desired filesystem parameters.
-+This may be done as many times as necessary.
-+.IP (2)
-+Pass the same filesystem context file descriptor to
-+.BR fsconfig (2)
-+with
++capability.
++.IP
++An important thing to be aware of is that
++the Linux kernel will
++.I silently
++reuse extant filesystem instances
++depending on the filesystem type
++and the configured parameters
++(each filesystem driver has
++its own policy for
++how filesystem instances are reused).
++This means that
++the filesystem instance "created" by
 +.B \%FSCONFIG_CMD_CREATE
-+to create an instance of the configured filesystem.
-+.IP (3)
-+Pass the same filesystem context file descriptor to
-+.BR fsmount (2)
-+to create a new detached mount object for
-+the root of the filesystem instance,
-+which is then attached to a new file descriptor.
-+(This also places the filesystem context file descriptor into
-+reconfiguration mode,
-+similar to the mode produced by
-+.BR fspick (2).)
-+Once a mount object has been created with
-+.BR fsmount (2),
-+the filesystem context file descriptor can be safely closed.
-+.IP (4)
-+Now that a mount object has been created,
-+you may
-+.RS
-+.IP (4.1) 7
-+use the detached mount object file descriptor as a
-+.I dirfd
-+argument to "*at()" system calls; and/or
-+.IP (4.2) 7
-+attach the mount object to a mount point
-+by passing the mount object file descriptor to
-+.BR move_mount (2).
-+This will also prevent the mount object from
-+being unmounted and destroyed when
-+the mount object file descriptor is closed.
++may, in fact, be a reference
++to an extant filesystem instance in the kernel.
++(For reference,
++this behaviour also applies to
++.BR mount (2).)
++.IP
++One side-effect of this behaviour is that
++if an extant filesystem instance is reused,
++.I all
++parameters configured
++for this filesystem configuration context
++are
++.I silently ignored
++(with the exception of the
++.I ro
++and
++.I rw
++flag parameters;
++if the state of the read-only flag in the
++extant filesystem instance and the filesystem configuration context
++do not match, this operation will return
++.BR EBUSY ).
++This also means that
++.BR \%FSCONFIG_CMD_RECONFIGURE
++commands issued against
++the "created" filesystem instance
++will also affect any mount objects associated with
++the extant filesystem instance.
++.IP
++Programs that need to ensure
++that they create a new filesystem instance
++with specific parameters
++(notably, security-related parameters
++such as
++.I acl
++to enable POSIX ACLs\[em]as described in
++.BR acl (5))
++should use
++.B \%FSCONFIG_CMD_CREATE_EXCL
++instead.
++.TP
++.BR FSCONFIG_CMD_CREATE_EXCL " (since Linux 6.6)"
++.\" commit 22ed7ecdaefe0cac0c6e6295e83048af60435b13
++.\" commit 84ab1277ce5a90a8d1f377707d662ac43cc0918a
++As with
++.BR \%FSCONFIG_CMD_CREATE ,
++except that the kernel is instructed
++to not reuse extant filesystem instances.
++If the operation
++would be forced to
++reuse an extant filesystem instance,
++this operation will return
++.B EBUSY
++instead.
++.IP
++As a result (unlike
++.BR \%FSCONFIG_CMD_CREATE ),
++if this operation succeeds
++then the calling process can be sure that
++all of the parameters successfully configured with
++.BR fsconfig ()
++will actually be applied
++to the created filesystem instance.
++.TP
++.B FSCONFIG_CMD_RECONFIGURE
++This command instructs the filesystem driver
++to apply the parameters specified in the filesystem configuration context
++to the extant filesystem instance
++referenced by the filesystem configuration context.
++.I key
++and
++.I value
++must be NULL,
++and
++.I aux
++must be 0.
++.IP
++This is primarily intended for use with
++.BR fspick (2),
++but may also be used to modify
++the parameters of a filesystem instance
++after
++.BR \%FSCONFIG_CMD_CREATE
++was used to create it
++and a mount object was created using
++.BR fsmount (2).
++In order to reconfigure an extant filesystem instance,
++the calling process must have the
++.B CAP_SYS_ADMIN
++capability.
++.IP
++If the operation succeeds,
++the filesystem context is reset
++but remains in reconfiguration mode
++and thus can be reused for subsequent
++.B \%FSCONFIG_CMD_RECONFIGURE
++commands.
++If the operation fails,
++in most cases
++the filesystem context is placed in a failed mode
++and cannot be used for any further
++.BR fsconfig ()
++operations
++(though you may still retrieve diagnostic messages
++through the message retrieval interface,
++as described in
++the corresponding subsection of
++.BR fsopen (2)).
 +.RE
-+.IP
-+The mount object file descriptor will
-+remain associated with the mount object
-+even after doing the above operations,
-+so you may repeatedly use the mount object file descriptor with
-+.BR move_mount (2)
-+and/or "*at()" system calls
-+as many times as necessary.
-+.P
-+A filesystem context will move between different modes
-+throughout its lifecycle
-+(such as the creation phase
-+when created with
-+.BR fsopen (),
-+the reconfiguration phase
-+when an existing filesystem instance is selected with
-+.BR fspick (2),
-+and the intermediate "awaiting-mount" phase
-+.\" FS_CONTEXT_AWAITING_MOUNT is the term the kernel uses for this.
-+between
-+.BR \%FSCONFIG_CMD_CREATE
-+and
-+.BR fsmount (2)),
-+which has an impact on
-+what operations are permitted on the filesystem context.
-+.P
-+The file descriptor returned by
-+.BR fsopen ()
-+also acts as a channel for filesystem drivers to
-+provide more comprehensive diagnostic information
-+than is normally provided through the standard
-+.BR errno (3)
-+interface for system calls.
-+If an error occurs at any time during the workflow mentioned above,
-+calling
-+.BR read (2)
-+on the filesystem context file descriptor
-+will retrieve any ancillary information about the encountered errors.
-+(See the "Message retrieval interface" section
-+for more details on the message format.)
-+.P
-+.I flags
-+can be used to control aspects of
-+the creation of the filesystem configuration context file descriptor.
-+A value for
-+.I flags
-+is constructed by bitwise ORing
-+zero or more of the following constants:
-+.RS
-+.TP
-+.B FSOPEN_CLOEXEC
-+Set the close-on-exec
-+.RB ( FD_CLOEXEC )
-+flag on the new file descriptor.
-+See the description of the
-+.B O_CLOEXEC
-+flag in
-+.BR open (2)
-+for reasons why this may be useful.
-+.RE
-+.P
-+A list of filesystems supported by the running kernel
-+(and thus a list of valid values for
-+.IR fsname )
-+can be obtained from
-+.IR /proc/filesystems .
-+(See also
-+.BR proc_filesystems (5).)
-+.SS Message retrieval interface
-+When doing operations on a filesystem configuration context,
-+the filesystem driver may choose to provide
-+ancillary information to userspace
-+in the form of message strings.
-+.P
-+The filesystem context file descriptors returned by
-+.BR fsopen ()
-+and
-+.BR fspick (2)
-+may be queried for message strings at any time by calling
-+.BR read (2)
-+on the file descriptor.
-+Each call to
-+.BR read (2)
-+will return a single message,
-+prefixed to indicate its class:
-+.RS
-+.TP
-+\fBe\fP <\fImessage\fP>
-+An error message was logged.
-+This is usually associated with an error being returned
-+from the corresponding system call which triggered this message.
-+.TP
-+\fBw\fP <\fImessage\fP>
-+A warning message was logged.
-+.TP
-+\fBi\fP <\fImessage\fP>
-+An informational message was logged.
-+.RE
-+.P
-+Messages are removed from the queue as they are read.
-+Note that the message queue has limited depth,
-+so it is possible for messages to get lost.
-+If there are no messages in the message queue,
-+.B read(2)
-+will return \-1 and
-+.I errno
-+will be set to
-+.BR \%ENODATA .
-+If the
-+.I buf
-+argument to
-+.BR read (2)
-+is not large enough to contain the entire message,
-+.BR read (2)
-+will return \-1 and
-+.I errno
-+will be set to
-+.BR \%EMSGSIZE .
-+(See BUGS.)
-+.P
-+If there are multiple filesystem contexts
-+referencing the same filesystem instance
-+(such as if you call
-+.BR fspick (2)
-+multiple times for the same mount),
-+each one gets its own independent message queue.
-+This does not apply to multiple file descriptors that are
-+tied to the same underlying open file description
-+(such as those created with
-+.BR dup (2)).
-+.P
-+Message strings will usually be prefixed by
-+the name of the filesystem or kernel subsystem
-+that logged the message,
-+though this may not always be the case.
-+See the Linux kernel source code for details.
++.P
++Parameters specified with
++.BI FSCONFIG_SET_ *
++do not take effect
++until a corresponding
++.B \%FSCONFIG_CMD_CREATE
++or
++.B \%FSCONFIG_CMD_RECONFIGURE
++command is issued.
 +.SH RETURN VALUE
-+On success, a new file descriptor is returned.
++On success,
++.BR fsconfig ()
++returns 0.
 +On error, \-1 is returned, and
 +.I errno
 +is set to indicate the error.
 +.SH ERRORS
++If an error occurs, the filesystem driver may provide
++additional information about the error
++through the message retrieval interface for filesystem configuration contexts.
++This additional information can be retrieved at any time by calling
++.BR read (2)
++on the filesystem instance or filesystem configuration context
++referenced by the file descriptor
++.IR fd .
++(See the "Message retrieval interface" subsection in
++.BR fsopen (2)
++for more details on the message format.)
++.P
++Even after an error occurs,
++the filesystem configuration context is
++.I not
++invalidated,
++and thus can still be used with other
++.BR fsconfig ()
++commands.
++This means that users can probe support for filesystem parameters
++on a per-parameter basis,
++and adjust which parameters they wish to set.
++.P
++The error values given below result from
++filesystem type independent errors.
++Each filesystem type may have its own special errors
++and its own special behavior.
++See the Linux kernel source code for details.
++.TP
++.B EACCES
++A component of a path
++provided as a path parameter
++was not searchable.
++(See also
++.BR path_resolution (7).)
++.TP
++.B EACCES
++.B \%FSCONFIG_CMD_CREATE
++was attempted
++for a read-only filesystem
++without specifying the
++.RB ' ro '
++flag parameter.
++.TP
++.B EACCES
++A specified block device parameter
++is located on a filesystem
++mounted with the
++.B \%MS_NODEV
++option.
++.TP
++.B EBADF
++The file descriptor given by
++.I fd
++(or possibly by
++.IR aux ,
++depending on the command)
++is invalid.
++.TP
++.B EBUSY
++The filesystem context associated with
++.I fd
++is in the wrong state
++for the given command.
++.TP
++.B EBUSY
++The filesystem instance cannot be reconfigured as read-only
++with
++.B \%FSCONFIG_CMD_RECONFIGURE
++because some programs
++still hold files open for writing.
++.TP
++.B EBUSY
++A new filesystem instance was requested with
++.B \%FSCONFIG_CMD_CREATE_EXCL
++but a matching superblock already existed.
 +.TP
 +.B EFAULT
-+.I fsname
-+is NULL
-+or a pointer to a location
++One of the pointer arguments
++points to a location
 +outside the calling process's accessible address space.
 +.TP
 +.B EINVAL
-+.I flags
-+had an invalid flag set.
-+.TP
-+.B EMFILE
-+The calling process has too many open files to create more.
-+.TP
-+.B ENFILE
-+The system has too many open files to create more.
-+.TP
-+.B ENODEV
-+The filesystem named by
-+.I fsname
-+is not supported by the kernel.
++.I fd
++does not refer to
++a filesystem configuration context
++or filesystem instance.
++.TP
++.B EINVAL
++One of the values of
++.IR name ,
++.IR value ,
++and/or
++.I aux
++were set to a non-zero value when
++.I cmd
++required that they be zero
++(or NULL).
++.TP
++.B EINVAL
++The parameter named by
++.I name
++cannot be set
++using the type specified with
++.IR cmd .
++.TP
++.B EINVAL
++One of the source parameters
++referred to
++an invalid superblock.
++.TP
++.B ELOOP
++Too many links encountered
++during pathname resolution
++of a path argument.
++.TP
++.B ENAMETOOLONG
++A path argument was longer than
++.BR PATH_MAX .
++.TP
++.B ENOENT
++A path argument had a non-existent component.
++.TP
++.B ENOENT
++A path argument is an empty string,
++but
++.I cmd
++is not
++.BR \%FSCONFIG_SET_PATH_EMPTY .
 +.TP
 +.B ENOMEM
 +The kernel could not allocate sufficient memory to complete the operation.
 +.TP
++.B ENOTBLK
++The parameter named by
++.I name
++must be a block device,
++but the provided parameter value was not a block device.
++.TP
++.B ENOTDIR
++A component of the path prefix
++of a path argument
++was not a directory.
++.TP
++.B EOPNOTSUPP
++The command given by
++.I cmd
++is not valid.
++.TP
++.B ENXIO
++The major number
++of a block device parameter
++is out of range.
++.TP
 +.B EPERM
-+The calling process does not have the required
++The command given by
++.I cmd
++was
++.BR \%FSCONFIG_CMD_CREATE ,
++.BR \%FSCONFIG_CMD_CREATE_EXCL ,
++or
++.BR \%FSCONFIG_CMD_RECONFIGURE ,
++but the calling process does not have the required
 +.B \%CAP_SYS_ADMIN
 +capability.
 +.SH STANDARDS
 +Linux.
 +.SH HISTORY
 +Linux 5.2.
-+.\" commit 24dcb3d90a1f67fe08c68a004af37df059d74005
++.\" commit ecdab150fddb42fe6a739335257949220033b782
 +.\" commit 400913252d09f9cfb8cce33daee43167921fc343
 +glibc 2.36.
-+.SH BUGS
-+.SS Message retrieval interface and \fB\%EMSGSIZE\fP
-+As described in the "Message retrieval interface" subsection above,
-+calling
-+.BR read (2)
-+with too small a buffer to contain
-+the next pending message in the message queue
-+for the filesystem configuration context
-+will cause
-+.BR read (2)
-+to return \-1 and set
-+.BR errno (3)
-+to
-+.BR \%EMSGSIZE .
++.SH NOTES
++.SS Generic filesystem parameters
++Each filesystem driver is responsible for
++parsing most parameters specified with
++.BR fsconfig (),
++meaning that individual filesystems
++may have very different behaviour
++when encountering parameters with the same name.
++In general,
++you should not assume that the behaviour of
++.BR fsconfig ()
++when specifying a parameter to one filesystem type
++will match the behaviour of the same parameter
++with a different filesystem type.
 +.P
 +However,
-+this failed operation still
-+consumes the message from the message queue.
-+This effectively discards the message silently,
-+as no data is copied into the
-+.BR read (2)
-+buffer.
-+.P
-+Programs should take care to ensure that
-+their buffers are sufficiently large
-+to contain any reasonable message string,
-+in order to avoid silently losing valuable diagnostic information.
++the following generic parameters
++apply to all filesystems and have unified behaviour.
++They are set using the listed
++.BI \%FSCONFIG_SET_ *
++command.
++.TP
++\fIro\fP and \fIrw\fP (\fB\%FSCONFIG_SET_FLAG\fP)
++Configure whether the filesystem instance is read-only.
++.TP
++\fIdirsync\fP (\fB\%FSCONFIG_SET_FLAG\fP)
++Make directory changes on this filesystem instance synchronous.
++.TP
++\fIsync\fP and \fIasync\fP (\fB\%FSCONFIG_SET_FLAG\fP)
++Configure whether writes on this filesystem instance
++will be made synchronous
++(as though the
++.B O_SYNC
++flag to
++.BR open (2)
++was specified for
++all file opens in this filesystem instance).
++.TP
++\fIlazytime\fP and \fInolazytime\fP (\fB\%FSCONFIG_SET_FLAG\fP)
++Configure whether to reduce on-disk updates
++of inode timestamps on this filesystem instance
++(as described in the
++.B \%MS_LAZYTIME
++section of
++.BR mount (2)).
++.TP
++\fImand\fP and \fInomand\fP (\fB\%FSCONFIG_SET_FLAG\fP)
++Configure whether the filesystem instance should permit mandatory locking.
++Since Linux 5.15,
++.\" commit f7e33bdbd6d1bdf9c3df8bba5abcf3399f957ac3
++mandatory locking has been deprecated
++and setting this flag is a no-op.
++.TP
++\fIsource\fP (\fB\%FSCONFIG_SET_STRING\fP)
++This parameter is equivalent to the
++.I source
++parameter passed to
++.BR mount (2)
++for the same filesystem type,
++and is usually the pathname of a block device
++containing the filesystem.
++This parameter may only be set once
++per filesystem configuration context transaction.
++.P
++In addition,
++any filesystem parameters associated with
++Linux Security Modules (LSMs)
++are also generic with respect to the underlying filesystem.
++See the documentation for the LSM you wish to configure for more details.
++.SH CAVEATS
++.SS Filesystem parameter types
++As a result of
++each filesystem driver being responsible for
++parsing most parameters specified with
++.BR fsconfig (),
++some filesystem drivers
++may have unintuitive behaviour
++with regards to which
++.BI \%FSCONFIG_SET_ *
++commands are permitted
++to configure a given parameter.
++.P
++In order for
++filesystem parameters to be backwards compatible with
++.BR mount (2),
++they must be parseable as strings;
++this almost universally means that
++.B \%FSCONFIG_SET_STRING
++can also be used to configure them.
 +.\" Aleksa Sarai
-+.\"   This unfortunate behaviour has existed since this feature was merged, but
-+.\"   I have sent a patchset which will finally fix it.
-+.\"   <https://lore.kernel.org/r/20250807-fscontext-log-cleanups-v3-1-8d91d6242dc3@cyphar.com/>
++.\"   Theoretically, a filesystem could check fc->oldapi and refuse
++.\"   FSCONFIG_SET_STRING if the operation is coming from the new API, but no
++.\"   filesystems do this (and probably never will).
++However, other
++.BI \%FSCONFIG_SET_ *
++commands need to be opted into
++by each filesystem driver's parameter parser.
++.P
++One of the most user-visible instances of
++this inconsistency is that
++many filesystems do not support
++configuring path parameters with
++.B \%FSCONFIG_SET_PATH
++(despite the name),
++which can lead to somewhat confusing
++.B EINVAL
++errors.
++(For example, the generic
++.I source
++parameter\[em]which is usually a path\[em]can only be configured
++with
++.BR \%FSCONFIG_SET_STRING .)
++.P
++When writing programs that use
++.BR fsconfig ()
++to configure parameters
++with commands other than
++.BR \%FSCONFIG_SET_STRING ,
++users should verify
++that the
++.BI \%FSCONFIG_SET_ *
++commands used to configure each parameter
++are supported by the corresponding filesystem driver.
++.\" Aleksa Sarai
++.\"   While this (quite confusing) inconsistency in behaviour is true today
++.\"   (and has been true since this was merged), this appears to mostly be an
++.\"   unintended consequence of filesystem drivers hand-coding fsparam parsing.
++.\"   Path parameters are the most eggregious causes of confusion. Hopefully we
++.\"   can make this no longer the case in a future kernel.
 +.SH EXAMPLES
-+To illustrate the workflow for creating a new mount,
-+the following is an example of how to mount an
-+.BR ext4 (5)
-+filesystem stored on
-+.I /dev/sdb1
-+onto
-+.IR /mnt .
++To illustrate the different kinds of flags that can be configured with
++.BR fsconfig (),
++here are a few examples of some different filesystems being created:
 +.P
 +.in +4n
 +.EX
 +int fsfd, mntfd;
 +\&
-+fsfd = fsopen("ext4", FSOPEN_CLOEXEC);
-+fsconfig(fsfd, FSCONFIG_SET_FLAG, "ro", NULL, 0);
-+fsconfig(fsfd, FSCONFIG_SET_PATH, "source", "/dev/sdb1", AT_FDCWD);
-+fsconfig(fsfd, FSCONFIG_SET_FLAG, "noatime", NULL, 0);
++fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC);
++fsconfig(fsfd, FSCONFIG_SET_FLAG, "inode64", NULL, 0);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "uid", "1234", 0);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "huge", "never", 0);
++fsconfig(fsfd, FSCONFIG_SET_FLAG, "casefold", NULL, 0);
++fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
++mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC);
++move_mount(mntfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH);
++\&
++fsfd = fsopen("erofs", FSOPEN_CLOEXEC);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "/dev/loop0", 0);
 +fsconfig(fsfd, FSCONFIG_SET_FLAG, "acl", NULL, 0);
 +fsconfig(fsfd, FSCONFIG_SET_FLAG, "user_xattr", NULL, 0);
-+fsconfig(fsfd, FSCONFIG_SET_FLAG, "iversion", NULL, 0)
-+fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
-+mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_RELATIME);
++fsconfig(fsfd, FSCONFIG_CMD_CREATE_EXCL, NULL, NULL, 0);
++mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOSUID);
 +move_mount(mntfd, "", AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
 +.EE
 +.in
 +.P
-+First,
-+an ext4 configuration context is created and attached to the file descriptor
-+.IR fsfd .
-+Then, a series of parameters
-+(such as the source of the filesystem)
-+are provided using
-+.BR fsconfig (2),
-+followed by the filesystem instance being created with
-+.BR \%FSCONFIG_CMD_CREATE .
-+.BR fsmount (2)
-+is then used to create a new mount object attached to the file descriptor
-+.IR mntfd ,
-+which is then attached to the intended mount point using
-+.BR move_mount (2).
-+.P
-+The above procedure is functionally equivalent to
-+the following mount operation using
-+.BR mount (2):
++Usually,
++specifying the same parameter named by
++.I key
++multiple times with
++.BR fsconfig ()
++causes the parameter value to be replaced.
++However, some filesystems may have unique behaviour:
 +.P
 +.in +4n
 +.EX
-+mount("/dev/sdb1", "/mnt", "ext4", MS_RELATIME,
-+      "ro,noatime,acl,user_xattr,iversion");
++\&
++int fsfd, mntfd;
++int lowerdirfd = open("/o/ctr/lower1", O_DIRECTORY | O_CLOEXEC);
++\&
++fsfd = fsopen("overlay", FSOPEN_CLOEXEC);
++/* "lowerdir+" appends to the lower dir stack each time. */
++fsconfig(fsfd, FSCONFIG_SET_FD, "lowerdir+", NULL, lowerdirfd);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir+", "/o/ctr/lower2", 0);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir+", "/o/ctr/lower3", 0);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir+", "/o/ctr/lower4", 0);
++.\" fsconfig(fsfd, FSCONFIG_SET_PATH, "lowerdir+", "/o/ctr/lower5", AT_FDCWD);
++.\" fsconfig(fsfd, FSCONFIG_SET_PATH_EMPTY, "lowerdir+", "", lowerdirfd);
++.\" Aleksa Sarai: Hopefully these will also be supported in the future.
++fsconfig(fsfd, FSCONFIG_SET_STRING, "xino", "auto", 0);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "nfs_export", "off", 0);
++fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
++mntfd = fsmount(fsfd, FSMOUNT_CLOEXEC, 0);
++move_mount(mntfd, "", AT_FDCWD, "/mnt", MOVE_MOUNT_F_EMPTY_PATH);
 +.EE
 +.in
 +.P
-+And here's an example of creating a mount object
-+of an NFS server share
-+and setting a Smack security module label.
-+However, instead of attaching it to a mount point,
-+the program uses the mount object directly
-+to open a file from the NFS share.
++And here is an example of how
++.BR fspick (2)
++can be used with
++.BR fsconfig ()
++to reconfigure the parameters
++of an extant filesystem instance
++attached to
++.IR /proc :
 +.P
 +.in +4n
 +.EX
-+int fsfd, mntfd, fd;
-+\&
-+fsfd = fsopen("nfs", 0);
-+fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "example.com/pub/linux", 0);
-+fsconfig(fsfd, FSCONFIG_SET_STRING, "nfsvers", "3", 0);
-+fsconfig(fsfd, FSCONFIG_SET_STRING, "rsize", "65536", 0);
-+fsconfig(fsfd, FSCONFIG_SET_STRING, "wsize", "65536", 0);
-+fsconfig(fsfd, FSCONFIG_SET_STRING, "smackfsdef", "foolabel", 0);
-+fsconfig(fsfd, FSCONFIG_SET_FLAG, "rdma", NULL, 0);
-+fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
-+mntfd = fsmount(fsfd, 0, MOUNT_ATTR_NODEV);
-+fd = openat(mntfd, "src/linux-5.2.tar.xz", O_RDONLY);
++int fsfd = fspick(AT_FDCWD, "/proc", FSPICK_CLOEXEC);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "hidepid", "ptraceable", 0);
++fsconfig(fsfd, FSCONFIG_SET_STRING, "subset", "pid", 0);
++fsconfig(fsfd, FSCONFIG_CMD_RECONFIGURE, NULL, NULL, 0);
 +.EE
 +.in
-+.P
-+Unlike the previous example,
-+this operation has no trivial equivalent with
-+.BR mount (2),
-+as it was not previously possible to create a mount object
-+that is not attached to any mount point.
 +.SH SEE ALSO
-+.BR fsconfig (2),
 +.BR fsmount (2),
++.BR fsopen (2),
 +.BR fspick (2),
 +.BR mount (2),
 +.BR mount_setattr (2),
 +.BR move_mount (2),
 +.BR open_tree (2),
 +.BR mount_namespaces (7)
++
 
 -- 
-2.50.1
+2.51.0
 
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help