Thread (26 messages) 26 messages, 5 authors, 2021-05-24

Re: [PATCH 08/11] fanotify: Introduce FAN_ERROR event

From: Amir Goldstein <amir73il@gmail.com>
Date: 2021-05-21 11:02:54
Also in: linux-fsdevel

On Fri, May 21, 2021 at 5:42 AM Gabriel Krisman Bertazi
[off-list ref] wrote:
quoted hunk ↗ jump to hunk
The FAN_ERROR event is used by filesystem wide monitoring tools to
receive notifications of type FS_ERROR_EVENT, emited by filesystems when
a problem is detected.  The error notification includes a generic error
descriptor.

Signed-off-by: Gabriel Krisman Bertazi <redacted>

Changes since v1:
  - Pass dentry to fanotify_check_fsid (Amir)
  - FANOTIFY_EVENT_TYPE_ERROR -> FANOTIFY_EVENT_TYPE_FS_ERROR
  - Merge previous patch into it
  - Use a single slot
---
 fs/notify/fanotify/fanotify.c      |  74 ++++++++++++++++-
 fs/notify/fanotify/fanotify.h      |  28 ++++++-
 fs/notify/fanotify/fanotify_user.c | 123 ++++++++++++++++++++++++++---
 include/linux/fanotify.h           |   6 +-
 include/uapi/linux/fanotify.h      |  10 +++
 5 files changed, 225 insertions(+), 16 deletions(-)
diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c
index 34e2ee759b39..197291a8c41d 100644
--- a/fs/notify/fanotify/fanotify.c
+++ b/fs/notify/fanotify/fanotify.c
@@ -269,7 +269,7 @@ static u32 fanotify_group_event_mask(struct fsnotify_group *group,
        pr_debug("%s: report_mask=%x mask=%x data=%p data_type=%d\n",
                 __func__, iter_info->report_mask, event_mask, data, data_type);

-       if (!fid_mode) {
+       if (!fid_mode && data_type != FSNOTIFY_EVENT_ERROR) {
                /* Do we have path to open a file descriptor? */
                if (!path)
                        return 0;
@@ -657,6 +657,51 @@ static struct fanotify_event *fanotify_alloc_event(struct fsnotify_group *group,
        return event;
 }

+static void fanotify_init_error_event(struct fanotify_error_event *event,
+                                     __kernel_fsid_t fsid,
+                                     const struct fs_error_report *report)
+{
+       event->fae.type = FANOTIFY_EVENT_TYPE_FS_ERROR;
+       event->err_count = 1;
+       event->fsid = fsid;
+       event->error = report->error;
+       event->ino = (report->inode) ? report->inode->i_ino : 0;
+}
+
+static int fanotify_queue_error_event(struct fsnotify_iter_info *iter_info,
+                                     struct fsnotify_group *group,
+                                     __kernel_fsid_t fsid,
+                                     const struct fs_error_report *report)
+{
+       struct fanotify_mark *mark;
+       int type;
+       int ret = -ENOMEM;
+
+       fsnotify_foreach_obj_type(type) {
+               if (!fsnotify_iter_should_report_type(iter_info, type))
+                       continue;
+               mark = FANOTIFY_MARK(iter_info->marks[type]);
+       }
Two options.
If FAN_ERROR is allowed on inode marks, need to do the below
for every mark found in mark type iterator.
If FAN_ERROR is only allowed in sb marks use
fsnotify_iter_sb_mark(iter_info) instead of mark iterator.
+
+       spin_lock(&mark->fsn_mark.lock);
+       if (mark->error_event) {
+               if (list_empty(&mark->error_event->fae.fse.list)) {
event list check should be protected with notification_lock.
err_count++ as well.

I suggest that you use the merge() callback of fsnotify_add_event()
to do err_count++ under notification_lock and the insert() callback
to do fsnotify_get_mark() and fanotify_init_error_event() under
notification_lock.
quoted hunk ↗ jump to hunk
+                       fsnotify_get_mark(&mark->fsn_mark);
+                       fanotify_init_error_event(mark->error_event, fsid, report);
+                       ret = fsnotify_add_event(group, &mark->error_event->fae.fse,
+                                                NULL, NULL);
+                       if (ret)
+                               fsnotify_put_mark(&mark->fsn_mark);
+               } else {
+                       mark->error_event->err_count++;
+                       ret = 0;
+               }
+       }
+       spin_unlock(&mark->fsn_mark.lock);
+
+       return ret;
+}
+
 /*
  * Get cached fsid of the filesystem containing the object from any connector.
  * All connectors are supposed to have the same fsid, but we do not verify that
@@ -738,8 +783,9 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
        BUILD_BUG_ON(FAN_ONDIR != FS_ISDIR);
        BUILD_BUG_ON(FAN_OPEN_EXEC != FS_OPEN_EXEC);
        BUILD_BUG_ON(FAN_OPEN_EXEC_PERM != FS_OPEN_EXEC_PERM);
+       BUILD_BUG_ON(FAN_ERROR != FS_ERROR);

-       BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 19);
+       BUILD_BUG_ON(HWEIGHT32(ALL_FANOTIFY_EVENT_BITS) != 20);

        mask = fanotify_group_event_mask(group, iter_info, mask, data,
                                         data_type, dir);
@@ -757,13 +803,20 @@ static int fanotify_handle_event(struct fsnotify_group *group, u32 mask,
                        return 0;
        }

-       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS)) {
+       if (FAN_GROUP_FLAG(group, FANOTIFY_FID_BITS) || fanotify_is_error_event(mask)) {
Jan likes to keep code inside 80-columns.
Please run checkpatch.pl --max-line-length=80 on your patches.
                fsid = fanotify_get_fsid(iter_info);
                /* Racing with mark destruction or creation? */
                if (!fsid.val[0] && !fsid.val[1])
                        return 0;
        }

+       if (fanotify_is_error_event(mask)) {
+               ret = fanotify_queue_error_event(iter_info, group, fsid, data);
+               if (ret)
+                       fsnotify_queue_overflow(group);
fsnotify_add_event() already takes care of overflow event.
quoted hunk ↗ jump to hunk
+               goto finish;
+       }
+
        event = fanotify_alloc_event(group, mask, data, data_type, dir,
                                     file_name, &fsid);
        ret = -ENOMEM;
@@ -833,6 +886,17 @@ static void fanotify_free_name_event(struct fanotify_event *event)
        kfree(FANOTIFY_NE(event));
 }

+static void fanotify_free_error_event(struct fanotify_event *event)
+{
+       /*
+        * Just drop the reference acquired by
+        * fanotify_queue_error_event.
+        *
+        * The actual memory is freed with the mark.
+        */
+       fsnotify_put_mark(&(FANOTIFY_EE(event)->mark->fsn_mark));
+}
+
 static void fanotify_free_event(struct fsnotify_event *fsn_event)
 {
        struct fanotify_event *event;
@@ -855,6 +919,9 @@ static void fanotify_free_event(struct fsnotify_event *fsn_event)
        case FANOTIFY_EVENT_TYPE_OVERFLOW:
                kfree(event);
                break;
+       case FANOTIFY_EVENT_TYPE_FS_ERROR:
+               fanotify_free_error_event(event);
+               break;
        default:
                WARN_ON_ONCE(1);
        }
@@ -871,6 +938,7 @@ static void fanotify_free_mark(struct fsnotify_mark *fsn_mark)
 {
        struct fanotify_mark *mark = FANOTIFY_MARK(fsn_mark);

+       kfree(mark->error_event);
        kmem_cache_free(fanotify_mark_cache, mark);
 }
diff --git a/fs/notify/fanotify/fanotify.h b/fs/notify/fanotify/fanotify.h
index a399c5e2615d..ebe9e593dfbf 100644
--- a/fs/notify/fanotify/fanotify.h
+++ b/fs/notify/fanotify/fanotify.h
@@ -151,6 +151,7 @@ enum fanotify_event_type {
        FANOTIFY_EVENT_TYPE_PATH,
        FANOTIFY_EVENT_TYPE_PATH_PERM,
        FANOTIFY_EVENT_TYPE_OVERFLOW, /* struct fanotify_event */
+       FANOTIFY_EVENT_TYPE_FS_ERROR, /* struct fanotify_error_event */
        __FANOTIFY_EVENT_TYPE_NUM
 };
@@ -206,12 +207,31 @@ FANOTIFY_NE(struct fanotify_event *event)
        return container_of(event, struct fanotify_name_event, fae);
 }

+struct fanotify_error_event {
+       struct fanotify_event fae;
+       __kernel_fsid_t fsid;
+       unsigned long ino;
+       int error;
+       u32 err_count;
+
+       /* Back reference to the mark this error refers to. */
+       struct fanotify_mark *mark;
+};
+
+static inline struct fanotify_error_event *
+FANOTIFY_EE(struct fanotify_event *event)
+{
+       return container_of(event, struct fanotify_error_event, fae);
+}
+
 static inline __kernel_fsid_t *fanotify_event_fsid(struct fanotify_event *event)
 {
        if (event->type == FANOTIFY_EVENT_TYPE_FID)
                return &FANOTIFY_FE(event)->fsid;
        else if (event->type == FANOTIFY_EVENT_TYPE_FID_NAME)
                return &FANOTIFY_NE(event)->fsid;
+       else if (event->type == FANOTIFY_EVENT_TYPE_FS_ERROR)
+               return &FANOTIFY_EE(event)->fsid;
        else
                return NULL;
 }
@@ -297,6 +317,11 @@ static inline struct fanotify_event *FANOTIFY_E(struct fsnotify_event *fse)
        return container_of(fse, struct fanotify_event, fse);
 }

+static inline bool fanotify_is_error_event(u32 mask)
+{
+       return mask & FANOTIFY_ERROR_EVENTS;
+}
+
 static inline bool fanotify_event_has_path(struct fanotify_event *event)
 {
        return event->type == FANOTIFY_EVENT_TYPE_PATH ||
@@ -325,7 +350,8 @@ static inline struct path *fanotify_event_path(struct fanotify_event *event)
  */
 static inline bool fanotify_is_hashed_event(u32 mask)
 {
-       return !fanotify_is_perm_event(mask) && !(mask & FS_Q_OVERFLOW);
+       return (!fanotify_is_perm_event(mask) && !fanotify_is_error_event(mask)
+               && !(mask & FS_Q_OVERFLOW));
Please do us the service off adding helper fanotify_is_overflow_event(mask)
quoted hunk ↗ jump to hunk
 }

 static inline unsigned int fanotify_event_hash_bucket(
diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c
index 00210535a78e..ea9b9f8f7c21 100644
--- a/fs/notify/fanotify/fanotify_user.c
+++ b/fs/notify/fanotify/fanotify_user.c
@@ -106,6 +106,8 @@ struct kmem_cache *fanotify_perm_event_cachep __read_mostly;
 #define FANOTIFY_EVENT_ALIGN 4
 #define FANOTIFY_INFO_HDR_LEN \
        (sizeof(struct fanotify_event_info_fid) + sizeof(struct file_handle))
+#define FANOTIFY_INFO_ERROR_LEN \
+       (sizeof(struct fanotify_event_info_error))

 static int fanotify_fid_info_len(int fh_len, int name_len)
 {
@@ -126,6 +128,9 @@ static size_t fanotify_event_len(struct fanotify_event *event,
        int fh_len;
        int dot_len = 0;

+       if (fanotify_is_error_event(event->mask))
+               return event_len + FANOTIFY_INFO_ERROR_LEN;
+
        if (!fid_mode)
                return event_len;
@@ -149,6 +154,30 @@ static size_t fanotify_event_len(struct fanotify_event *event,
        return event_len;
 }

+static struct fanotify_event *fanotify_dequeue_error_event(struct fsnotify_group *group,
+                                                          struct fanotify_event *event,
+                                                          struct fanotify_error_event *error_event)
+{
+       struct fsnotify_mark *mark = &(FANOTIFY_EE(event)->mark->fsn_mark);
+       /*
+        * In order to avoid missing an error count update, the
+        * queued event is de-queued and duplicated to an
+        * in-stack fanotify_error_event while still inside
+        * mark->lock.  Once the event is dequeued, it can be
+        * immediately re-used for a new event.
+        *
+        * The ownership of the mark reference is dropped later
+        * by destroy_event.
+        */
+       spin_lock(&mark->lock);
+       memcpy(error_event, FANOTIFY_EE(event), sizeof(*error_event));
+       fsnotify_init_event(&error_event->fae.fse);
+       fsnotify_remove_queued_event(group, &event->fse);
+       spin_unlock(&mark->lock);
+
+       return &error_event->fae;
+}
+
 /*
  * Remove an hashed event from merge hash table.
  */
@@ -173,7 +202,8 @@ static void fanotify_unhash_event(struct fsnotify_group *group,
  * updated accordingly.
  */
 static struct fanotify_event *get_one_event(struct fsnotify_group *group,
-                                           size_t count)
+                                           size_t count,
+                                           struct fanotify_error_event *error_event)
 {
        size_t event_size;
        struct fanotify_event *event = NULL;
@@ -197,9 +227,14 @@ static struct fanotify_event *get_one_event(struct fsnotify_group *group,

        /*
         * Held the notification_lock the whole time, so this is the
-        * same event we peeked above.
+        * same event we peeked above, unless it is copied to
+        * error_event.
         */
-       fsnotify_remove_first_event(group);
+       if (fanotify_is_error_event(event->mask))
+               event = fanotify_dequeue_error_event(group, event, error_event);
+       else
+               fsnotify_remove_first_event(group);
+
        if (fanotify_is_perm_event(event->mask))
                FANOTIFY_PERM(event)->state = FAN_EVENT_REPORTED;
        if (fanotify_is_hashed_event(event->mask))
@@ -309,6 +344,30 @@ static int process_access_response(struct fsnotify_group *group,
        return -ENOENT;
 }

+static size_t copy_error_info_to_user(struct fanotify_event *event,
+                                     char __user *buf, int count)
+{
+       struct fanotify_event_info_error info;
+       struct fanotify_error_event *fee = FANOTIFY_EE(event);
+
+       info.hdr.info_type = FAN_EVENT_INFO_TYPE_ERROR;
+       info.hdr.pad = 0;
+       info.hdr.len = sizeof(struct fanotify_event_info_error);
+
+       if (WARN_ON(count < info.hdr.len))
+               return -EFAULT;
+
+       info.fsid = fee->fsid;
+       info.error = fee->error;
+       info.inode = fee->ino;
+       info.error_count = fee->err_count;
+
+       if (copy_to_user(buf, &info, sizeof(info)))
+               return -EFAULT;
+
+       return info.hdr.len;
+}
+
 static int copy_info_to_user(__kernel_fsid_t *fsid, struct fanotify_fh *fh,
                             int info_type, const char *name, size_t name_len,
                             char __user *buf, size_t count)
@@ -523,6 +582,14 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group,
                count -= ret;
        }

+       if (fanotify_is_error_event(event->mask)) {
+               ret = copy_error_info_to_user(event, buf, count);
+               if (ret < 0)
+                       return ret;
+               buf += ret;
+               count -= ret;
+       }
+
        return metadata.event_len;

 out_close_fd:
@@ -553,6 +620,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
 {
        struct fsnotify_group *group;
        struct fanotify_event *event;
+       struct fanotify_error_event error_event;
        char __user *start;
        int ret;
        DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -569,7 +637,7 @@ static ssize_t fanotify_read(struct file *file, char __user *buf,
                 * in case there are lots of available events.
                 */
                cond_resched();
-               event = get_one_event(group, count);
+               event = get_one_event(group, count, &error_event);
                if (IS_ERR(event)) {
                        ret = PTR_ERR(event);
                        break;
@@ -888,16 +956,33 @@ static int fanotify_remove_inode_mark(struct fsnotify_group *group,
                                    flags, umask);
 }

-static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
-                                      __u32 mask,
-                                      unsigned int flags)
+static int fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
+                                      __u32 mask, unsigned int flags,
+                                    __u32 *modified_mask)
It really means 'added' not 'modified' just like the local variable name.
 {
+       struct fanotify_mark *mark = FANOTIFY_MARK(fsn_mark);
+       struct fanotify_error_event *error_event = NULL;
+       bool addition = !(flags & FAN_MARK_IGNORED_MASK);
'addition' is confusing here. I would call it 'ignored',
but do we want to allow FAN_ERROR in ignored mask?
for what case? on inode mark?
If we do not, let's not allow it.
quoted hunk ↗ jump to hunk
        __u32 oldmask = -1;

+       /* Only pre-alloc error_event if needed. */
+       if (addition && (mask & FAN_ERROR) && !mark->error_event) {
+               error_event = kzalloc(sizeof(*error_event), GFP_KERNEL);
+               if (!error_event)
+                       return -ENOMEM;
+               fanotify_init_event(&error_event->fae, 0, FS_ERROR);
+               error_event->mark = mark;
+       }
+
        spin_lock(&fsn_mark->lock);
-       if (!(flags & FAN_MARK_IGNORED_MASK)) {
+       if (addition) {
                oldmask = fsn_mark->mask;
                fsn_mark->mask |= mask;
+
+               if (!mark->error_event) {
+                       mark->error_event = error_event;
+                       error_event = NULL;
+               }
        } else {
                fsn_mark->ignored_mask |= mask;
                if (flags & FAN_MARK_IGNORED_SURV_MODIFY)
@@ -905,7 +990,11 @@ static __u32 fanotify_mark_add_to_mask(struct fsnotify_mark *fsn_mark,
        }
        spin_unlock(&fsn_mark->lock);

-       return mask & ~oldmask;
+       kfree(error_event);
+
+       *modified_mask = mask & ~oldmask;
+       return 0;
+
 }

 static struct fsnotify_mark *fanotify_add_new_mark(struct fsnotify_group *group,
@@ -955,6 +1044,7 @@ static int fanotify_add_mark(struct fsnotify_group *group,
 {
        struct fsnotify_mark *fsn_mark;
        __u32 added;
+       int ret = 0;

        mutex_lock(&group->mark_mutex);
        fsn_mark = fsnotify_find_mark(connp, group);
@@ -965,13 +1055,18 @@ static int fanotify_add_mark(struct fsnotify_group *group,
                        return PTR_ERR(fsn_mark);
                }
        }
-       added = fanotify_mark_add_to_mask(fsn_mark, mask, flags);
+       ret = fanotify_mark_add_to_mask(fsn_mark, mask, flags, &added);
+       if (ret)
+               goto out;
+
        if (added & ~fsnotify_conn_mask(fsn_mark->connector))
                fsnotify_recalc_mask(fsn_mark->connector);
+
+out:
        mutex_unlock(&group->mark_mutex);

        fsnotify_put_mark(fsn_mark);
-       return 0;
+       return ret;
 }

 static int fanotify_add_vfsmount_mark(struct fsnotify_group *group,
@@ -1377,6 +1472,12 @@ static int do_fanotify_mark(int fanotify_fd, unsigned int flags, __u64 mask,

                fsid = &__fsid;
        }
+       if (mask & FAN_ERROR) {
+               ret = fanotify_test_fsid(path.dentry, &__fsid);
+               if (ret)
+                       goto path_put_and_out;
+               fsid = &__fsid;
+       }

        /* inode held in place by reference to path; group by fget on fd */
        if (mark_type == FAN_MARK_INODE)
diff --git a/include/linux/fanotify.h b/include/linux/fanotify.h
index bad41bcb25df..05c929d588e4 100644
--- a/include/linux/fanotify.h
+++ b/include/linux/fanotify.h
@@ -81,9 +81,12 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
 #define FANOTIFY_INODE_EVENTS  (FANOTIFY_DIRENT_EVENTS | \
                                 FAN_ATTRIB | FAN_MOVE_SELF | FAN_DELETE_SELF)

+#define FANOTIFY_ERROR_EVENTS  (FAN_ERROR)
+
 /* Events that user can request to be notified on */
 #define FANOTIFY_EVENTS                (FANOTIFY_PATH_EVENTS | \
-                                FANOTIFY_INODE_EVENTS)
+                                FANOTIFY_INODE_EVENTS | \
+                                FANOTIFY_ERROR_EVENTS)

 /* Events that require a permission response from user */
 #define FANOTIFY_PERM_EVENTS   (FAN_OPEN_PERM | FAN_ACCESS_PERM | \
@@ -95,6 +98,7 @@ extern struct ctl_table fanotify_table[]; /* for sysctl */
 /* Events that may be reported to user */
 #define FANOTIFY_OUTGOING_EVENTS       (FANOTIFY_EVENTS | \
                                         FANOTIFY_PERM_EVENTS | \
+                                        FANOTIFY_ERROR_EVENTS | \
                                         FAN_Q_OVERFLOW | FAN_ONDIR)

 #define ALL_FANOTIFY_EVENT_BITS                (FANOTIFY_OUTGOING_EVENTS | \
diff --git a/include/uapi/linux/fanotify.h b/include/uapi/linux/fanotify.h
index fbf9c5c7dd59..e3920597112f 100644
--- a/include/uapi/linux/fanotify.h
+++ b/include/uapi/linux/fanotify.h
@@ -20,6 +20,7 @@
 #define FAN_OPEN_EXEC          0x00001000      /* File was opened for exec */

 #define FAN_Q_OVERFLOW         0x00004000      /* Event queued overflowed */
+#define FAN_ERROR              0x00008000      /* Filesystem error */

 #define FAN_OPEN_PERM          0x00010000      /* File open in perm check */
 #define FAN_ACCESS_PERM                0x00020000      /* File accessed in perm check */
@@ -123,6 +124,7 @@ struct fanotify_event_metadata {
 #define FAN_EVENT_INFO_TYPE_FID                1
 #define FAN_EVENT_INFO_TYPE_DFID_NAME  2
 #define FAN_EVENT_INFO_TYPE_DFID       3
+#define FAN_EVENT_INFO_TYPE_ERROR      4

 /* Variable length info record following event metadata */
 struct fanotify_event_info_header {
@@ -148,6 +150,14 @@ struct fanotify_event_info_fid {
        unsigned char handle[0];
 };

+struct fanotify_event_info_error {
+       struct fanotify_event_info_header hdr;
+       int error;
+       __kernel_fsid_t fsid;
+       unsigned long inode;
I think for UAPI this should use __u64 and better call this ino and
not inode?

Thanks,
Amir.
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help