Re: [PATCHv3 4/5] edac: altera: Add Altera L2 Cache and OCRAM EDAC Support
From: Borislav Petkov <bp@alien8.de>
Date: 2014-11-04 15:12:30
Also in:
linux-arm-kernel, lkml
On Thu, Oct 30, 2014 at 10:32:10AM -0500, tthayer@opensource.altera.com wrote:
quoted hunk ↗ jump to hunk
From: Thor Thayer <redacted> Adding L2 Cache and On-Chip RAM EDAC support for the Altera SoCs using the EDAC device model. The SDRAM controller is using the Memory Controller model. All Altera EDAC functions live in altera_edac.c. Signed-off-by: Thor Thayer <redacted> --- v2: Fix L2 dependency comments. v3: Move OCRAM and L2 cache EDAC functions into altera_edac.c instead of separate files. --- drivers/edac/Kconfig | 14 ++ drivers/edac/Makefile | 5 +- drivers/edac/altera_edac.c | 475 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 492 insertions(+), 2 deletions(-)diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 1719975..b145a52 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig@@ -385,4 +385,18 @@ config EDAC_ALTERA_MC preloader must initialize the SDRAM before loading the kernel. +config EDAC_ALTERA_L2C + bool "Altera L2 Cache EDAC" + depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA && CACHE_L2X0 + help + Support for error detection and correction on the + Altera L2 cache Memory for Altera SoCs. + +config EDAC_ALTERA_OCRAM + bool "Altera On-Chip RAM EDAC" + depends on EDAC_MM_EDAC=y && ARCH_SOCFPGA && SRAM && GENERIC_ALLOCATOR + help + Support for error detection and correction on the + Altera On-Chip RAM Memory for Altera SoCs.
Why are those separate config items? Why not switch on EDAC_ALTERA_MC and get it all? We can always split them out later, if needed but I don't think we should burden the user with unnecessary questions if it is not absolutely necessary.
quoted hunk ↗ jump to hunk
+ endif # EDACdiff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index 359aa49..fa8aebc 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile@@ -66,4 +66,7 @@ obj-$(CONFIG_EDAC_OCTEON_L2C) += octeon_edac-l2c.o obj-$(CONFIG_EDAC_OCTEON_LMC) += octeon_edac-lmc.o obj-$(CONFIG_EDAC_OCTEON_PCI) += octeon_edac-pci.o -obj-$(CONFIG_EDAC_ALTERA_MC) += altera_edac.o +alt_edac-y := altera_edac.o +obj-$(CONFIG_EDAC_ALTERA_MC) += alt_edac.o +obj-$(CONFIG_EDAC_ALTERA_L2C) += alt_edac.o +obj-$(CONFIG_EDAC_ALTERA_OCRAM) += alt_edac.odiff --git a/drivers/edac/altera_edac.c b/drivers/edac/altera_edac.c index 3c4929f..1ee8d94 100644 --- a/drivers/edac/altera_edac.c +++ b/drivers/edac/altera_edac.c@@ -17,8 +17,10 @@ * Adapted from the highbank_mc_edac driver. */ +#include <asm/cacheflush.h> #include <linux/ctype.h> #include <linux/edac.h> +#include <linux/genalloc.h> #include <linux/interrupt.h> #include <linux/kernel.h> #include <linux/mfd/syscon.h>@@ -33,6 +35,7 @@ #define EDAC_MOD_STR "altera_edac" #define EDAC_VERSION "1" +#define EDAC_DEVICE "DEVICE"
That name is not very descriptive.
quoted hunk ↗ jump to hunk
/* SDRAM Controller CtrlCfg Register */ #define CTLCFG_OFST 0x00@@ -107,6 +110,30 @@ struct altr_sdram_mc_data { struct regmap *mc_vbase; }; +/************************** EDAC Device Defines **************************/ + +/* OCRAM ECC Management Group Defines */ +#define ALTR_MAN_GRP_OCRAM_ECC_OFFSET 0x04 +#define ALTR_OCR_ECC_EN_MASK 0x00000001 +#define ALTR_OCR_ECC_INJS_MASK 0x00000002 +#define ALTR_OCR_ECC_INJD_MASK 0x00000004 +#define ALTR_OCR_ECC_SERR_MASK 0x00000008 +#define ALTR_OCR_ECC_DERR_MASK 0x00000010 + +/* MPU L2 Register Defines */ +#define ALTR_MPUL2_CONTROL_OFFSET 0x100 +#define ALTR_MPUL2_CTL_CACHE_EN_MASK 0x00000001 + +/* L2 ECC Management Group Defines */ +#define ALTR_MAN_GRP_L2_ECC_OFFSET 0x00 +#define ALTR_L2_ECC_EN_MASK 0x00000001 +#define ALTR_L2_ECC_INJS_MASK 0x00000002 +#define ALTR_L2_ECC_INJD_MASK 0x00000004
You can use BIT() for those.
quoted hunk ↗ jump to hunk
+ +/*********************** EDAC Memory Controller Functions ****************/ + +/* The SDRAM controller uses the EDAC Memory Controller framework. */ + static irqreturn_t altr_sdram_mc_err_handler(int irq, void *dev_id) { struct mem_ctl_info *mci = dev_id;@@ -405,6 +432,452 @@ static struct platform_driver altr_sdram_edac_driver = { module_platform_driver(altr_sdram_edac_driver); +/************************* EDAC Device Functions *************************/ + +/* + * EDAC Device Functions (shared between various IPs). + * The discrete memories use the EDAC Device framework. The probe + * and error handling functions are very similar between memories + * so they are shared. The memory allocation and free for EDAC trigger + * testing are different for each memory. + */ + +const struct edac_device_prv_data ocramecc_data; +const struct edac_device_prv_data l2ecc_data; + +struct edac_device_prv_data { + int (*setup)(struct platform_device *pdev, void __iomem *base); + int ce_clear_mask; + int ue_clear_mask; +#ifdef CONFIG_EDAC_DEBUG + struct edac_dev_sysfs_attribute *eccmgr_sysfs_attr; + void * (*alloc_mem)(size_t size, void **other); + void (*free_mem)(void *p, size_t size, void *other); + int ecc_enable_mask; + int ce_set_mask; + int ue_set_mask; + int trig_alloc_sz; +#endif +}; + +struct altr_edac_device_dev { + void __iomem *base; + int sb_irq; + int db_irq; + const struct edac_device_prv_data *data; + char *edac_dev_name; +}; + +static irqreturn_t altr_edac_device_handler(int irq, void *dev_id) +{ + struct edac_device_ctl_info *dci = dev_id; + struct altr_edac_device_dev *drvdata = dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + + if (irq == drvdata->sb_irq) { + if (priv->ce_clear_mask) + writel(priv->ce_clear_mask, drvdata->base); + edac_device_handle_ce(dci, 0, 0, drvdata->edac_dev_name); + } + if (irq == drvdata->db_irq) { + if (priv->ue_clear_mask) + writel(priv->ue_clear_mask, drvdata->base); + edac_device_handle_ue(dci, 0, 0, drvdata->edac_dev_name); + panic("\nEDAC:ECC_DEVICE[Uncorrectable errors]\n"); + } + + return IRQ_HANDLED; +} + +#ifdef CONFIG_EDAC_DEBUG +ssize_t altr_edac_device_trig(struct edac_device_ctl_info *edac_dci, + const char *buffer, size_t count) +{ + u32 *ptemp, i, error_mask; + int result = 0; + unsigned long flags; + struct altr_edac_device_dev *drvdata = edac_dci->pvt_info; + const struct edac_device_prv_data *priv = drvdata->data; + void *generic_ptr = edac_dci->dev; + + if (!priv->alloc_mem) + return -ENOMEM; + + /* Note that generic_ptr is initialized to the device * but in + * some init_functions, this is overridden and returns data */
Kernel comment style: /* * Blabla. * More Bla. */
+ ptemp = priv->alloc_mem(priv->trig_alloc_sz, &generic_ptr);
+ if (!ptemp) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Inject: Buffer Allocation error\n");
+ return -ENOMEM;
+ }
+
+ if (count == 3)Magic number 3. /me needs crystal ball :)
+ error_mask = priv->ue_set_mask;
+ else
+ error_mask = priv->ce_set_mask;
+
+ edac_printk(KERN_ALERT, EDAC_DEVICE,
+ "Trigger Error Mask (0x%X)\n", error_mask);
+
+ local_irq_save(flags);
+ /* write ECC corrupted data out. */
+ for (i = 0; i < (priv->trig_alloc_sz/sizeof(*ptemp)); i++) {
+ /* Read data so we're in the correct state */
+ rmb();
+ if (ACCESS_ONCE(ptemp[i]))
+ result = -1;
+ /* Toggle Error bit (it is latched), leave ECC enabled */
+ writel(error_mask, drvdata->base);
+ writel(priv->ecc_enable_mask, drvdata->base);
+ ptemp[i] = i;
+ }
+ /* Ensure it has been written out */
+ wmb();
+ local_irq_restore(flags);Doesn't the interrupt reenabling on ARM already cause outstanding writes to commit so that you don't really need the memory barrier?
+ + if (result) + edac_printk(KERN_ERR, EDAC_DEVICE, "Mem Not Cleared\n"); + + /* Read out written data. ECC error caused here */ + for (i = 0; i < 16; i++)
Magic number 16. Where's my ball again?! :)
+ if (ACCESS_ONCE(ptemp[i]) != i) + edac_printk(KERN_ERR, EDAC_DEVICE, + "Mem Doesn't match\n");
I guess injectors should know what this printk means? I can assume it says something about inject data written doesn't match what we're reading?
+
+ if (priv->free_mem)
+ priv->free_mem(ptemp, priv->trig_alloc_sz, generic_ptr);
+
+ return count;
+}
+
+static void altr_set_sysfs_attr(struct edac_device_ctl_info *edac_dci,
+ const struct edac_device_prv_data *priv)
+{
+ struct edac_dev_sysfs_attribute *ecc_attr = priv->eccmgr_sysfs_attr;
+
+ if (ecc_attr) {
+ edac_dci->sysfs_attributes = ecc_attr;
+ edac_printk(KERN_ERR, EDAC_DEVICE, "Set SysFS trigger\n");Why do we have to say this here? Debugging leftovers maybe?
+ }
+}
+#else
+static void altr_set_sysfs_attr(struct edac_device_ctl_info *edac_dci,
+ const struct edac_device_prv_data *priv)
+{}
+#endif /* #ifdef CONFIG_EDAC_DEBUG */
+
+static const struct of_device_id altr_edac_device_of_match[] = {
+#ifdef CONFIG_EDAC_ALTERA_L2C
+ { .compatible = "altr,l2-edac", .data = (void *)&l2ecc_data },
+#endif
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+ { .compatible = "altr,ocram-edac", .data = (void *)&ocramecc_data },
+#endifIf you enable l2c and ocram by default, you can save yourself the ugly ifdeffery here too.
+ {},
+};
+MODULE_DEVICE_TABLE(of, altr_edac_device_of_match);
+
+/*
+ * altr_edac_device_probe()
+ * This is a generic EDAC device driver that will support
+ * various Altera memory devices such as the L2 cache ECC and
+ * OCRAM ECC as well as the memories for other peripherals.
+ * Module specific initialization is done by passing the
+ * function index in the device tree.
+ */
+static int altr_edac_device_probe(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *dci;
+ struct altr_edac_device_dev *drvdata;
+ struct resource *r;
+ int res = 0;
+ struct device_node *np = pdev->dev.of_node;
+ char *ecc_name = (char *)np->name;
+ static int dev_instance;
+
+ if (!devres_open_group(&pdev->dev, NULL, GFP_KERNEL))
+ return -ENOMEM;
+
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ if (!r) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "Unable to get mem resource\n");
+ return -ENODEV;
+ }
+
+ if (!devm_request_mem_region(&pdev->dev, r->start, resource_size(r),
+ dev_name(&pdev->dev))) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s:Error requesting mem region\n", ecc_name);
+ return -EBUSY;
+ }
+
+ dci = edac_device_alloc_ctl_info(sizeof(*drvdata), ecc_name,
+ 1, ecc_name, 1, 0, NULL, 0,
+ dev_instance++);
+
+ if (!dci)
+ return -ENOMEM;
+
+ drvdata = dci->pvt_info;
+ dci->dev = &pdev->dev;
+ platform_set_drvdata(pdev, dci);
+ drvdata->edac_dev_name = ecc_name;
+
+ drvdata->base = devm_ioremap(&pdev->dev, r->start, resource_size(r));
+ if (!drvdata->base) {
+ edac_printk(KERN_ERR, EDAC_DEVICE,
+ "%s:Unable to map regs\n", ecc_name);
+ return -ENOMEM;This should be goto err, no? To unwind what edac_device_alloc_ctl_info() allocated.
+ }
+
+ /* Get driver specific data for this EDAC device */
+ drvdata->data = of_match_node(altr_edac_device_of_match, np)->data;
+
+ /* Check specific dependencies for the module */
+ if (drvdata->data->setup) {
+ res = drvdata->data->setup(pdev, drvdata->base);
+ if (res < 0)
+ goto err;
+ }
+
+ drvdata->sb_irq = platform_get_irq(pdev, 0);
+ res = devm_request_irq(&pdev->dev, drvdata->sb_irq,
+ altr_edac_device_handler,
+ 0, dev_name(&pdev->dev), dci);
+ if (res < 0)
+ goto err;
+
+ drvdata->db_irq = platform_get_irq(pdev, 1);
+ res = devm_request_irq(&pdev->dev, drvdata->db_irq,
+ altr_edac_device_handler,
+ 0, dev_name(&pdev->dev), dci);
+ if (res < 0)
+ goto err;
+
+ dci->mod_name = "EDAC_DEVICE";Maybe more descriptive name with "ALTERA" in it?
+ dci->dev_name = drvdata->edac_dev_name;
+
+ altr_set_sysfs_attr(dci, drvdata->data);
+
+ if (edac_device_add_device(dci))
+ goto err;
+
+ devres_close_group(&pdev->dev, NULL);
+
+ return 0;
+err:
+ devres_release_group(&pdev->dev, NULL);
+ edac_device_free_ctl_info(dci);
+
+ return res;
+}
+
+static int altr_edac_device_remove(struct platform_device *pdev)
+{
+ struct edac_device_ctl_info *dci = platform_get_drvdata(pdev);
+
+ edac_device_del_device(&pdev->dev);
+ edac_device_free_ctl_info(dci);
+
+ return 0;
+}
+
+static struct platform_driver altr_edac_device_driver = {
+ .probe = altr_edac_device_probe,
+ .remove = altr_edac_device_remove,
+ .driver = {
+ .name = "altr_edac_device",
+ .of_match_table = altr_edac_device_of_match,
+ },
+};
+module_platform_driver(altr_edac_device_driver);
+
+/*********************** OCRAM EDAC Device Functions *********************/
+
+#ifdef CONFIG_EDAC_ALTERA_OCRAM
+
+#ifdef CONFIG_EDAC_DEBUG
+static void *ocram_alloc_mem(size_t size, void **other)
+{
+ struct device_node *np;
+ struct gen_pool *gp;
+ void *sram_addr;
+
+ np = of_find_compatible_node(NULL, NULL, "altr,ocram-edac");
+ if (!np)
+ return NULL;
+
+ gp = of_get_named_gen_pool(np, "iram", 0);
+ if (!gp)
+ return NULL;
+ *other = gp;There's this assignment here to the supplied pointer...
+ + sram_addr = (void *)gen_pool_alloc(gp, size); + if (!sram_addr) + return NULL;
... and when we fail here, caller still gets to play with it. Perhaps do the assignment after all calls here have succeeded first...?
+
+ memset(sram_addr, 0, size);
+ wmb(); /* Ensure data is written out */
+
+ return sram_addr;
+}
+
+static void ocram_free_mem(void *p, size_t size, void *other)
+{
+ gen_pool_free((struct gen_pool *)other, (u32)p, size);
+}
+
+static struct edac_dev_sysfs_attribute altr_ocr_sysfs_attributes[] = {
+ {
+ .attr = { .name = "altr_ocram_trigger",
+ .mode = (S_IRUGO | S_IWUSR) },
+ .show = NULL,
+ .store = altr_edac_device_trig
+ },
+ {
+ .attr = {.name = NULL }
+ }
+};
+#endif /* #ifdef CONFIG_EDAC_DEBUG */
+
+/*
+ * altr_ocram_dependencies()
+ * Test for OCRAM cache ECC dependencies upon entry because
+ * platform specific startup should have initialized the
+ * On-Chip RAM memory and enabled the ECC.
+ * Can't turn on ECC here because accessing un-initialized
+ * memory will cause CE/UE errors possibly causing an ABORT.
+ */
+static int altr_ocram_dependencies(struct platform_device *pdev,
+ void __iomem *base)
+{
+ u32 control;
+
+ control = readl(base) & ALTR_OCR_ECC_EN_MASK;
+ if (!control) {
+ dev_err(&pdev->dev,
+ "OCRAM: No ECC present or ECC disabled.\n");
+ return -ENODEV;
+ }Flip logic to save an indentation level: if (control) return 0; dev_err... return -ENODEV;
+
+ return 0;
+}
+
+const struct edac_device_prv_data ocramecc_data = {
+ .setup = altr_ocram_dependencies,
+ .ce_clear_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_SERR_MASK),
+ .ue_clear_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_DERR_MASK),
+#ifdef CONFIG_EDAC_DEBUG
+ .eccmgr_sysfs_attr = altr_ocr_sysfs_attributes,
+ .alloc_mem = ocram_alloc_mem,
+ .free_mem = ocram_free_mem,
+ .ecc_enable_mask = ALTR_OCR_ECC_EN_MASK,
+ .ce_set_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_INJS_MASK),
+ .ue_set_mask = (ALTR_OCR_ECC_EN_MASK | ALTR_OCR_ECC_INJD_MASK),
+ .trig_alloc_sz = (32 * sizeof(u32)),
+#endif
+};
+
+#endif /* #ifdef CONFIG_EDAC_ALTERA_OCRAM */
+
+/********************* L2 Cache EDAC Device Functions ********************/
+
+#ifdef CONFIG_EDAC_ALTERA_L2C
+
+#ifdef CONFIG_EDAC_DEBUG
+static void *l2_alloc_mem(size_t size, void **other)
+{
+ struct device *dev = *other;
+ void *ptemp = devm_kzalloc(dev, size, GFP_KERNEL);
+
+ if (!ptemp)
+ return NULL;
+
+ /* Make sure everything is written out */
+ wmb();
+ flush_cache_all();
+
+ return ptemp;
+}
+
+static void l2_free_mem(void *p, size_t size, void *other)
+{
+ struct device *dev = other;
+
+ if (dev && p)
+ devm_kfree(dev, p);
+}
+
+static struct edac_dev_sysfs_attribute altr_l2_sysfs_attributes[] = {
+ {
+ .attr = { .name = "altr_l2_trigger",
+ .mode = (S_IRUGO | S_IWUSR) },
+ .show = NULL,
+ .store = altr_edac_device_trig
+ },
+ {
+ .attr = {.name = NULL }
+ }
+};
+#endif /* #ifdef CONFIG_EDAC_DEBUG */
+
+/*
+ * altr_l2_dependencies()
+ * Test for L2 cache ECC dependencies upon entry because
+ * platform specific startup should have initialized the L2
+ * memory and enabled the ECC.
+ * Can't turn on ECC here because accessing un-initialized
+ * memory will cause CE/UE errors possibly causing an ABORT.
+ * Bail if ECC is not on.
+ * Test For 1) L2 ECC is enabled and 2) L2 Cache is enabled.
+ */
+static int altr_l2_dependencies(struct platform_device *pdev,
+ void __iomem *base)
+{
+ u32 control;
+ struct regmap *l2_vbase;
+
+ control = readl(base) & ALTR_L2_ECC_EN_MASK;
+ if (!control) {
+ dev_err(&pdev->dev, "L2: No ECC present, or ECC disabled\n");
+ return -ENODEV;
+ }
+
+ l2_vbase = syscon_regmap_lookup_by_compatible("arm,pl310-cache");
+ if (IS_ERR(l2_vbase)) {
+ dev_err(&pdev->dev,
+ "L2 ECC:regmap for arm,pl310-cache lookup failed.\n");
+ return -ENODEV;
+ }
+
+ regmap_read(l2_vbase, ALTR_MPUL2_CONTROL_OFFSET, &control);
+ if (!(control & ALTR_MPUL2_CTL_CACHE_EN_MASK)) {
+ dev_err(&pdev->dev, "L2: Cache disabled\n");
+ return -ENODEV;
+ }
+
+ return 0;
+}
+
+const struct edac_device_prv_data l2ecc_data = {
+ .setup = altr_l2_dependencies,
+ .ce_clear_mask = 0,
+ .ue_clear_mask = 0,
+#ifdef CONFIG_EDAC_DEBUG
+ .eccmgr_sysfs_attr = altr_l2_sysfs_attributes,
+ .alloc_mem = l2_alloc_mem,
+ .free_mem = l2_free_mem,
+ .ecc_enable_mask = ALTR_L2_ECC_EN_MASK,
+ .ce_set_mask = (ALTR_L2_ECC_EN_MASK | ALTR_L2_ECC_INJS_MASK),
+ .ue_set_mask = (ALTR_L2_ECC_EN_MASK | ALTR_L2_ECC_INJD_MASK),
+ .trig_alloc_sz = 4 * 1024,
+#endif
+};
+
+#endif /* #ifdef CONFIG_EDAC_ALTERA_L2C */
+
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Thor Thayer");
-MODULE_DESCRIPTION("EDAC Driver for Altera SDRAM Controller");
+MODULE_DESCRIPTION("EDAC Driver for Altera Memories");
--
1.7.9.5
--
Regards/Gruss,
Boris.
Sent from a fat crate under my desk. Formatting is fine.
--