Re: [PATCHv6 1/3] rdmacg: Added rdma cgroup controller
From: Haggai Eran <hidden>
Date: 2016-02-24 16:49:32
Also in:
linux-rdma, lkml
Hi, Overall I the patch looks good to me. I have a few comments below. On 20/02/2016 13:00, Parav Pandit wrote:
Resource pool is created/destroyed dynamically whenever charging/uncharging occurs respectively and whenever user configuration is done. Its a tradeoff of memory vs little more code
Its -> It's
space that creates resource pool whenever necessary, instead of creating them during cgroup creation and device registration time. Signed-off-by: Parav Pandit <redacted>
quoted hunk ↗ jump to hunk
diff --git a/include/linux/cgroup_rdma.h b/include/linux/cgroup_rdma.h new file mode 100644 index 0000000..b370733 --- /dev/null +++ b/include/linux/cgroup_rdma.h
+struct rdmacg_device {
+ struct rdmacg_pool_info pool_info;
+ struct list_head rdmacg_list;
+ struct list_head rpool_head;
+ spinlock_t rpool_lock; /* protects rsource pool list */rsource -> resource
+ char *name; +}; + +/* APIs for RDMA/IB stack to publish when a device wants to + * participate in resource accounting + */ +int rdmacg_register_device(struct rdmacg_device *device); +void rdmacg_unregister_device(struct rdmacg_device *device); + +/* APIs for RDMA/IB stack to charge/uncharge pool specific resources */ +int rdmacg_try_charge(struct rdma_cgroup **rdmacg, + struct rdmacg_device *device, + int resource_index, + int num); +void rdmacg_uncharge(struct rdma_cgroup *cg, + struct rdmacg_device *device, + int resource_index, + int num); +void rdmacg_query_limit(struct rdmacg_device *device, + int *limits, int max_count);
You can drop the max_count parameter, and require the caller to always provide pool_info->table_len items, couldn't you?
quoted hunk ↗ jump to hunk
+ +#endif /* CONFIG_CGROUP_RDMA */ +#endif /* _CGROUP_RDMA_H */diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 0df0336a..d0e597c 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h@@ -56,6 +56,10 @@ SUBSYS(hugetlb) SUBSYS(pids) #endif +#if IS_ENABLED(CONFIG_CGROUP_RDMA) +SUBSYS(rdma) +#endif + /* * The following subsystems are not supported on the default hierarchy. */diff --git a/init/Kconfig b/init/Kconfig index 2232080..1741b65 100644 --- a/init/Kconfig +++ b/init/Kconfig@@ -1054,6 +1054,16 @@ config CGROUP_PIDS since the PIDs limit only affects a process's ability to fork, not to attach to a cgroup. +config CGROUP_RDMA + bool "RDMA controller" + help + Provides enforcement of RDMA resources defined by IB stack. + It is fairly easy for consumers to exhaust RDMA resources, which + can result into resource unavailibility to other consumers.
unavailibility -> unavailability
quoted hunk ↗ jump to hunk
+ RDMA controller is designed to stop this from happening. + Attaching processes with active RDMA resources to the cgroup + hierarchy is allowed even if can cross the hierarchy's limit. + config CGROUP_FREEZER bool "Freezer controller" helpdiff --git a/kernel/Makefile b/kernel/Makefile index baa55e5..501f5df 100644
+/** + * uncharge_cg_resource - uncharge resource for rdma cgroup + * @cg: pointer to cg to uncharge and all parents in hierarchy
It only uncharges a single cg, right?
+ * @device: pointer to ib device + * @index: index of the resource to uncharge in cg (resource pool) + * @num: the number of rdma resource to uncharge + * + * It also frees the resource pool in the hierarchy for the resource pool + * which was created as part of charing operation.
charing -> charging
+ */
+static void uncharge_cg_resource(struct rdma_cgroup *cg,
+ struct rdmacg_device *device,
+ int index, int num)
+{
+ struct rdmacg_resource_pool *rpool;
+ struct rdmacg_pool_info *pool_info = &device->pool_info;
+
+ spin_lock(&cg->rpool_list_lock);
+ rpool = find_cg_rpool_locked(cg, device);Is it possible for rpool to be NULL?
+
+ /*
+ * A negative count (or overflow) is invalid,
+ * it indicates a bug in the rdma controller.
+ */
+ rpool->resources[index].usage -= num;
+
+ WARN_ON_ONCE(rpool->resources[index].usage < 0);
+ rpool->refcnt--;
+ if (rpool->refcnt == 0 && rpool->num_max_cnt == pool_info->table_len) {
+ /*
+ * No user of the rpool and all entries are set to max, so
+ * safe to delete this rpool.
+ */
+ list_del(&rpool->cg_list);
+ spin_unlock(&cg->rpool_list_lock);
+
+ free_cg_rpool(rpool);
+ return;
+ }
+ spin_unlock(&cg->rpool_list_lock);
+}+/**
+ * charge_cg_resource - charge resource for rdma cgroup
+ * @cg: pointer to cg to charge
+ * @device: pointer to rdmacg device
+ * @index: index of the resource to charge in cg (resource pool)
+ * @num: the number of rdma resource to charge
+ */
+static int charge_cg_resource(struct rdma_cgroup *cg,
+ struct rdmacg_device *device,
+ int index, int num)
+{
+ struct rdmacg_resource_pool *rpool;
+ s64 new;
+ int ret = 0;
+
+retry:
+ spin_lock(&cg->rpool_list_lock);
+ rpool = find_cg_rpool_locked(cg, device);
+ if (!rpool) {
+ spin_unlock(&cg->rpool_list_lock);
+ ret = alloc_cg_rpool(cg, device);
+ if (ret)
+ goto err;
+ else
+ goto retry;Instead of retrying after allocation of a new rpool, why not just return the newly allocated rpool (or the existing one) from alloc_cg_rpool?
+ }
+ new = num + rpool->resources[index].usage;
+ if (new > rpool->resources[index].max) {
+ ret = -EAGAIN;
+ } else {
+ rpool->refcnt++;
+ rpool->resources[index].usage = new;
+ }
+ spin_unlock(&cg->rpool_list_lock);
+err:
+ return ret;
+}+static ssize_t rdmacg_resource_set_max(struct kernfs_open_file *of,
+ char *buf, size_t nbytes, loff_t off)
+{
+ struct rdma_cgroup *cg = css_rdmacg(of_css(of));
+ const char *dev_name;
+ struct rdmacg_resource_pool *rpool;
+ struct rdmacg_device *device;
+ char *options = strstrip(buf);
+ struct rdmacg_pool_info *pool_info;
+ u64 enables = 0;This limits the number of resources to 64. Sounds fine to me, but I think there should be a check somewhere (maybe in rdmacg_register_device()?) to make sure someone doesn't pass too many resources.
+ int *new_limits;
+ int i = 0, ret = 0;
+
+ /* extract the device name first */
+ dev_name = strsep(&options, " ");
+ if (!dev_name) {
+ ret = -EINVAL;
+ goto err;
+ }
+
+ /* acquire lock to synchronize with hot plug devices */
+ mutex_lock(&dev_mutex);
+
+ device = rdmacg_get_device_locked(dev_name);
+ if (!device) {
+ ret = -ENODEV;
+ goto parse_err;
+ }
+
+ pool_info = &device->pool_info;
+
+ new_limits = kcalloc(pool_info->table_len, sizeof(int), GFP_KERNEL);
+ if (!new_limits) {
+ ret = -ENOMEM;
+ goto parse_err;
+ }
+
+ ret = rdmacg_parse_limits(options, pool_info, new_limits, &enables);
+ if (ret)
+ goto opt_err;
+
+retry:
+ spin_lock(&cg->rpool_list_lock);
+ rpool = find_cg_rpool_locked(cg, device);
+ if (!rpool) {
+ spin_unlock(&cg->rpool_list_lock);
+ ret = alloc_cg_rpool(cg, device);
+ if (ret)
+ goto opt_err;
+ else
+ goto retry;You can avoid the retry here too. Perhaps this can go into a function.
+ }
+
+ /* now set the new limits of the rpool */
+ while (enables) {
+ /* if user set the limit, enables bit is set */
+ if (enables & BIT(i)) {
+ enables &= ~BIT(i);
+ set_resource_limit(rpool, i, new_limits[i]);
+ }
+ i++;
+ }
+ if (rpool->refcnt == 0 &&
+ rpool->num_max_cnt == pool_info->table_len) {
+ /*
+ * No user of the rpool and all entries are
+ * set to max, so safe to delete this rpool.
+ */
+ list_del(&rpool->cg_list);
+ spin_unlock(&cg->rpool_list_lock);
+ free_cg_rpool(rpool);
+ } else {
+ spin_unlock(&cg->rpool_list_lock);
+ }You should consider putting this piece of code in a function (the check of the reference counts and release of the rpool).
+ +opt_err: + kfree(new_limits); +parse_err: + mutex_unlock(&dev_mutex); +err: + return ret ?: nbytes; +} +
+ +static int print_rpool_values(struct seq_file *sf,
This can return void.
+ struct rdmacg_pool_info *pool_info,
+ u32 *value_tbl)
+{
+ int i;
+
+ for (i = 0; i < pool_info->table_len; i++) {
+ seq_puts(sf, pool_info->resource_name_table[i]);
+ seq_putc(sf, '=');
+ if (value_tbl[i] == S32_MAX)
+ seq_puts(sf, RDMACG_MAX_STR);
+ else
+ seq_printf(sf, "%d", value_tbl[i]);
+ seq_putc(sf, ' ');
+ }
+ return 0;
+}
+Thanks, Haggai