Thread (65 messages) 65 messages, 9 authors, 2019-06-17

Re: [PATCH 4/7] vfs: Add superblock notifications

From: Jann Horn <jannh@google.com>
Date: 2019-05-28 20:28:17
Also in: keyrings, linux-block, linux-fsdevel, linux-security-module, lkml

On Tue, May 28, 2019 at 6:05 PM David Howells [off-list ref] wrote:
Add a superblock event notification facility whereby notifications about
superblock events, such as I/O errors (EIO), quota limits being hit
(EDQUOT) and running out of space (ENOSPC) can be reported to a monitoring
process asynchronously.  Note that this does not cover vfsmount topology
changes.  mount_notify() is used for that.
[...]
+#ifdef CONFIG_SB_NOTIFICATIONS
+/*
+ * Post superblock notifications.
+ */
+void post_sb_notification(struct super_block *s, struct superblock_notification *n)
+{
+       post_watch_notification(s->s_watchers, &n->watch, current_cred(),
+                               s->s_unique_id);
+}
You're using current_cred() here? So the idea is that if some random
process runs into a disk I/O error, the I/O error will come from that
task's credentials? In general, you're not supposed to look at task
credentials in ->read/->write handlers.
+static void release_sb_watch(struct watch_list *wlist, struct watch *watch)
+{
+       struct super_block *s = watch->private;
+
+       put_super(s);
+}
+
+/**
+ * sys_sb_notify - Watch for superblock events.
+ * @dfd: Base directory to pathwalk from or fd referring to superblock.
+ * @filename: Path to superblock to place the watch upon
+ * @at_flags: Pathwalk control flags
+ * @watch_fd: The watch queue to send notifications to.
+ * @watch_id: The watch ID to be placed in the notification (-1 to remove watch)
+ */
+SYSCALL_DEFINE5(sb_notify,
+               int, dfd,
+               const char __user *, filename,
+               unsigned int, at_flags,
+               int, watch_fd,
+               int, watch_id)
+{
+       struct watch_queue *wqueue;
+       struct super_block *s;
+       struct watch_list *wlist = NULL;
+       struct watch *watch;
+       struct path path;
+       int ret;
+
+       if (watch_id < -1 || watch_id > 0xff)
+               return -EINVAL;
+
+       ret = user_path_at(dfd, filename, at_flags, &path);
As in the other patch, I don't think userspace is supposed to be able
to supply user_path_at()'s third argument.

It might make sense to require that the path points to the root inode
of the superblock? That way you wouldn't be able to do this on a bind
mount that exposes part of a shared filesystem to a container.
+       if (ret)
+               return ret;
+
+       wqueue = get_watch_queue(watch_fd);
+       if (IS_ERR(wqueue))
+               goto err_path;
+
+       s = path.dentry->d_sb;
+       if (watch_id >= 0) {
+               if (!s->s_watchers) {
+                       wlist = kzalloc(sizeof(*wlist), GFP_KERNEL);
+                       if (!wlist)
+                               goto err_wqueue;
+                       INIT_HLIST_HEAD(&wlist->watchers);
+                       spin_lock_init(&wlist->lock);
+                       wlist->release_watch = release_sb_watch;
+               }
+
+               watch = kzalloc(sizeof(*watch), GFP_KERNEL);
+               if (!watch)
+                       goto err_wlist;
+
+               init_watch(watch, wqueue);
+               watch->id               = s->s_unique_id;
+               watch->private          = s;
+               watch->info_id          = (u32)watch_id << 24;
+
+               down_write(&s->s_umount);
+               ret = -EIO;
+               if (atomic_read(&s->s_active)) {
+                       if (!s->s_watchers) {
+                               s->s_watchers = wlist;
+                               wlist = NULL;
+                       }
+
+                       ret = add_watch_to_object(watch, s->s_watchers);
+                       if (ret == 0) {
+                               spin_lock(&sb_lock);
+                               s->s_count++;
+                               spin_unlock(&sb_lock);
Why do watches hold references on the superblock they're watching?
+                       }
+               }
+               up_write(&s->s_umount);
+               if (ret < 0)
+                       kfree(watch);
+       } else if (s->s_watchers) {
This should probably have something like a READ_ONCE() for clarity?
+               down_write(&s->s_umount);
+               ret = remove_watch_from_object(s->s_watchers, wqueue,
+                                              s->s_unique_id, false);
+               up_write(&s->s_umount);
+       } else {
+               ret = -EBADSLT;
+       }
+
+err_wlist:
+       kfree(wlist);
+err_wqueue:
+       put_watch_queue(wqueue);
+err_path:
+       path_put(&path);
+       return ret;
+}
+#endif
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help