Thread (49 messages) 49 messages, 7 authors, 2021-11-16

Re: [PATCH v2 13/15] hwmon: peci: Add dimmtemp driver

From: Guenter Roeck <linux@roeck-us.net>
Date: 2021-08-04 17:33:43
Also in: linux-arm-kernel, linux-aspeed, linux-devicetree, linux-doc, lkml, openbmc

On 8/4/21 3:46 AM, Winiarska, Iwona wrote:
On Tue, 2021-08-03 at 08:39 -0700, Guenter Roeck wrote:
quoted
On Tue, Aug 03, 2021 at 01:31:32PM +0200, Iwona Winiarska wrote:
quoted
Add peci-dimmtemp driver for Temperature Sensor on DIMM readings that
are accessible via the processor PECI interface.

The main use case for the driver (and PECI interface) is out-of-band
management, where we're able to obtain thermal readings from an external
entity connected with PECI, e.g. BMC on server platforms.

Co-developed-by: Jae Hyun Yoo <redacted>
Signed-off-by: Jae Hyun Yoo <redacted>
Signed-off-by: Iwona Winiarska <iwona.winiarska@intel.com>
Reviewed-by: Pierre-Louis Bossart <redacted>
---
Note that the timeout was completely removed - we're going to probe
for detected DIMMs every 5 seconds until we reach "stable" state of
either getting correct DIMM data or getting all -EINVAL (which
suggest that the CPU doesn't have any DIMMs).

  drivers/hwmon/peci/Kconfig    |  13 +
  drivers/hwmon/peci/Makefile   |   2 +
  drivers/hwmon/peci/dimmtemp.c | 614 ++++++++++++++++++++++++++++++++++
  3 files changed, 629 insertions(+)
  create mode 100644 drivers/hwmon/peci/dimmtemp.c
diff --git a/drivers/hwmon/peci/Kconfig b/drivers/hwmon/peci/Kconfig
index e10eed68d70a..9d32a57badfe 100644
--- a/drivers/hwmon/peci/Kconfig
+++ b/drivers/hwmon/peci/Kconfig
@@ -14,5 +14,18 @@ config SENSORS_PECI_CPUTEMP
           This driver can also be built as a module. If so, the module
           will be called peci-cputemp.
  
+config SENSORS_PECI_DIMMTEMP
+       tristate "PECI DIMM temperature monitoring client"
+       depends on PECI
+       select SENSORS_PECI
+       select PECI_CPU
+       help
+         If you say yes here you get support for the generic Intel PECI
hwmon
+         driver which provides Temperature Sensor on DIMM readings that are
+         accessible via the processor PECI interface.
+
+         This driver can also be built as a module. If so, the module
+         will be called peci-dimmtemp.
+
  config SENSORS_PECI
         tristate
diff --git a/drivers/hwmon/peci/Makefile b/drivers/hwmon/peci/Makefile
index e8a0ada5ab1f..191cfa0227f3 100644
--- a/drivers/hwmon/peci/Makefile
+++ b/drivers/hwmon/peci/Makefile
@@ -1,5 +1,7 @@
  # SPDX-License-Identifier: GPL-2.0-only
  
  peci-cputemp-y := cputemp.o
+peci-dimmtemp-y := dimmtemp.o
  
  obj-$(CONFIG_SENSORS_PECI_CPUTEMP)     += peci-cputemp.o
+obj-$(CONFIG_SENSORS_PECI_DIMMTEMP)    += peci-dimmtemp.o
diff --git a/drivers/hwmon/peci/dimmtemp.c b/drivers/hwmon/peci/dimmtemp.c
new file mode 100644
index 000000000000..6264c29bb6c0
--- /dev/null
+++ b/drivers/hwmon/peci/dimmtemp.c
@@ -0,0 +1,614 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (c) 2018-2021 Intel Corporation
+
+#include <linux/auxiliary_bus.h>
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/hwmon.h>
+#include <linux/jiffies.h>
+#include <linux/module.h>
+#include <linux/peci.h>
+#include <linux/peci-cpu.h>
+#include <linux/units.h>
+#include <linux/workqueue.h>
+#include <linux/x86/intel-family.h>
+
+#include "common.h"
+
+#define DIMM_MASK_CHECK_DELAY_JIFFIES  msecs_to_jiffies(5000)
+
+/* Max number of channel ranks and DIMM index per channel */
+#define CHAN_RANK_MAX_ON_HSX   8
+#define DIMM_IDX_MAX_ON_HSX    3
+#define CHAN_RANK_MAX_ON_BDX   4
+#define DIMM_IDX_MAX_ON_BDX    3
+#define CHAN_RANK_MAX_ON_BDXD  2
+#define DIMM_IDX_MAX_ON_BDXD   2
+#define CHAN_RANK_MAX_ON_SKX   6
+#define DIMM_IDX_MAX_ON_SKX    2
+#define CHAN_RANK_MAX_ON_ICX   8
+#define DIMM_IDX_MAX_ON_ICX    2
+#define CHAN_RANK_MAX_ON_ICXD  4
+#define DIMM_IDX_MAX_ON_ICXD   2
+
+#define CHAN_RANK_MAX          CHAN_RANK_MAX_ON_HSX
+#define DIMM_IDX_MAX           DIMM_IDX_MAX_ON_HSX
+#define DIMM_NUMS_MAX          (CHAN_RANK_MAX * DIMM_IDX_MAX)
+
+#define CPU_SEG_MASK           GENMASK(23, 16)
+#define GET_CPU_SEG(x)         (((x) & CPU_SEG_MASK) >> 16)
+#define CPU_BUS_MASK           GENMASK(7, 0)
+#define GET_CPU_BUS(x)         ((x) & CPU_BUS_MASK)
+
+#define DIMM_TEMP_MAX          GENMASK(15, 8)
+#define DIMM_TEMP_CRIT         GENMASK(23, 16)
+#define GET_TEMP_MAX(x)                (((x) & DIMM_TEMP_MAX) >> 8)
+#define GET_TEMP_CRIT(x)       (((x) & DIMM_TEMP_CRIT) >> 16)
+
+struct peci_dimmtemp;
+
+struct dimm_info {
+       int chan_rank_max;
+       int dimm_idx_max;
+       u8 min_peci_revision;
+       int (*read_thresholds)(struct peci_dimmtemp *priv, int dimm_order,
+                              int chan_rank, u32 *data);
+};
+
+struct peci_dimm_thresholds {
+       long temp_max;
+       long temp_crit;
+       struct peci_sensor_state state;
+};
+
+enum peci_dimm_threshold_type {
+       temp_max_type,
+       temp_crit_type,
+};
+
+struct peci_dimmtemp {
+       struct peci_device *peci_dev;
+       struct device *dev;
+       const char *name;
+       const struct dimm_info *gen_info;
+       struct delayed_work detect_work;
+       struct {
+               struct peci_sensor_data temp;
+               struct peci_dimm_thresholds thresholds;
+       } dimm[DIMM_NUMS_MAX];
+       char **dimmtemp_label;
+       DECLARE_BITMAP(dimm_mask, DIMM_NUMS_MAX);
+};
+
+static u8 __dimm_temp(u32 reg, int dimm_order)
+{
+       return (reg >> (dimm_order * 8)) & 0xff;
+}
+
+static int get_dimm_temp(struct peci_dimmtemp *priv, int dimm_no, long
*val)
+{
+       int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+       int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+       u32 data;
+       int ret;
         int ret = 0;
quoted
+
+       mutex_lock(&priv->dimm[dimm_no].temp.state.lock);
+       if (!peci_sensor_need_update(&priv->dimm[dimm_no].temp.state))
+               goto skip_update;
+
+       ret = peci_pcs_read(priv->peci_dev, PECI_PCS_DDR_DIMM_TEMP,
chan_rank, &data);
+       if (ret) {
+               mutex_unlock(&priv->dimm[dimm_no].temp.state.lock);
+               return ret;
+       }
         if (ret)
                 goto unlock;
quoted
+
+       priv->dimm[dimm_no].temp.value = __dimm_temp(data, dimm_order) *
MILLIDEGREE_PER_DEGREE;
+
+       peci_sensor_mark_updated(&priv->dimm[dimm_no].temp.state);
+
+skip_update:
+       *val = priv->dimm[dimm_no].temp.value;
unlock:
quoted
+       mutex_unlock(&priv->dimm[dimm_no].temp.state.lock);
+       return 0;
         return ret;
Ack.
quoted
quoted
+}
+
+static int update_thresholds(struct peci_dimmtemp *priv, int dimm_no)
+{
+       int dimm_order = dimm_no % priv->gen_info->dimm_idx_max;
+       int chan_rank = dimm_no / priv->gen_info->dimm_idx_max;
+       u32 data;
+       int ret;
+
+       if (!peci_sensor_need_update(&priv->dimm[dimm_no].thresholds.state))
+               return 0;
+
+       ret = priv->gen_info->read_thresholds(priv, dimm_order, chan_rank,
&data);
+       if (ret == -ENODATA) /* Use default or previous value */
+               return 0;
+       if (ret)
+               return ret;
+
+       priv->dimm[dimm_no].thresholds.temp_max = GET_TEMP_MAX(data) *
MILLIDEGREE_PER_DEGREE;
+       priv->dimm[dimm_no].thresholds.temp_crit = GET_TEMP_CRIT(data) *
MILLIDEGREE_PER_DEGREE;
+
+       peci_sensor_mark_updated(&priv->dimm[dimm_no].thresholds.state);
+
+       return 0;
+}
+
+static int get_dimm_thresholds(struct peci_dimmtemp *priv, enum
peci_dimm_threshold_type type,
+                              int dimm_no, long *val)
+{
+       int ret;
+
+       mutex_lock(&priv->dimm[dimm_no].thresholds.state.lock);
+       ret = update_thresholds(priv, dimm_no);
+       if (ret)
+               goto unlock;
+
+       switch (type) {
+       case temp_max_type:
+               *val = priv->dimm[dimm_no].thresholds.temp_max;
+               break;
+       case temp_crit_type:
+               *val = priv->dimm[dimm_no].thresholds.temp_crit;
+               break;
+       default:
+               ret = -EOPNOTSUPP;
+               break;
+       }
+unlock:
+       mutex_unlock(&priv->dimm[dimm_no].thresholds.state.lock);
+
+       return ret;
+}
+
+static int dimmtemp_read_string(struct device *dev,
+                               enum hwmon_sensor_types type,
+                               u32 attr, int channel, const char **str)
+{
+       struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+
+       if (attr != hwmon_temp_label)
+               return -EOPNOTSUPP;
+
+       *str = (const char *)priv->dimmtemp_label[channel];
+
+       return 0;
+}
+
+static int dimmtemp_read(struct device *dev, enum hwmon_sensor_types type,
+                        u32 attr, int channel, long *val)
+{
+       struct peci_dimmtemp *priv = dev_get_drvdata(dev);
+
+       switch (attr) {
+       case hwmon_temp_input:
+               return get_dimm_temp(priv, channel, val);
+       case hwmon_temp_max:
+               return get_dimm_thresholds(priv, temp_max_type, channel,
val);
+       case hwmon_temp_crit:
+               return get_dimm_thresholds(priv, temp_crit_type, channel,
val);
+       default:
+               break;
+       }
+
+       return -EOPNOTSUPP;
+}
+
+static umode_t dimmtemp_is_visible(const void *data, enum
hwmon_sensor_types type,
+                                  u32 attr, int channel)
+{
+       const struct peci_dimmtemp *priv = data;
+
+       if (test_bit(channel, priv->dimm_mask))
+               return 0444;
+
+       return 0;
+}
+
+static const struct hwmon_ops peci_dimmtemp_ops = {
+       .is_visible = dimmtemp_is_visible,
+       .read_string = dimmtemp_read_string,
+       .read = dimmtemp_read,
+};
+
+static int check_populated_dimms(struct peci_dimmtemp *priv)
+{
+       int chan_rank_max = priv->gen_info->chan_rank_max;
+       int dimm_idx_max = priv->gen_info->dimm_idx_max;
+       u32 chan_rank_empty = 0;
+       u64 dimm_mask = 0;
+       int chan_rank, dimm_idx, ret;
+       u32 pcs;
+
+       BUILD_BUG_ON(CHAN_RANK_MAX > 32);
+       BUILD_BUG_ON(DIMM_NUMS_MAX > 64);
I don't immediately see the value of those build bugs. What happens if
CHAN_RANK_MAX > 32 or DIMM_NUMS_MAX > 64 ? Where do those limits come
from ?
Supported HW doesn't come near the limit for now - it's just an "artificial"
limit imposed by variables we're using (u64 for dimm_mask and u32 for
chan_rank_empty).
Please use a value derived from the size of those variables for the check
to clarify and explain the constraints.

Thanks,
Guenter
Keyboard shortcuts
hback out one level
jnext message in thread
kprevious message in thread
ldrill in
Escclose help / fold thread tree
?toggle this help