[PATCH v9 07/11] arm64: kexec_file: add crash dump support
From: james.morse@arm.com (James Morse)
Date: 2018-05-15 17:14:25
Also in:
kexec, lkml
Hi Akashi, On 25/04/18 07:26, AKASHI Takahiro wrote:
Enabling crash dump (kdump) includes * prepare contents of ELF header of a core dump file, /proc/vmcore, using crash_prepare_elf64_headers(), and * add two device tree properties, "linux,usable-memory-range" and "linux,elfcorehdr", which represent repsectively a memory range
(Nit: respectively)
to be used by crash dump kernel and the header's location
arch/arm64/include/asm/kexec.h | 4 + arch/arm64/kernel/kexec_image.c | 9 +- arch/arm64/kernel/machine_kexec_file.c | 202 +++++++++++++++++++++++++
In this patch, machine_kexec_file.c gains its own private fdt array encoder.
quoted hunk ↗ jump to hunk
diff --git a/arch/arm64/kernel/machine_kexec_file.c b/arch/arm64/kernel/machine_kexec_file.c index 37c0a9dc2e47..ec674f4d267c 100644 --- a/arch/arm64/kernel/machine_kexec_file.c +++ b/arch/arm64/kernel/machine_kexec_file.c@@ -76,6 +81,78 @@ int arch_kexec_walk_mem(struct kexec_buf *kbuf, return ret; } +static int __init arch_kexec_file_init(void) +{ + /* Those values are used later on loading the kernel */ + __dt_root_addr_cells = dt_root_addr_cells; + __dt_root_size_cells = dt_root_size_cells; + + return 0; +} +late_initcall(arch_kexec_file_init);
If we need these is it worth taking them out of __initdata? I note they've been 'temporary' for quite a long time.
+
+#define FDT_ALIGN(x, a) (((x) + (a) - 1) & ~((a) - 1))
+#define FDT_TAGALIGN(x) (FDT_ALIGN((x), FDT_TAGSIZE))
+
+static int fdt_prop_len(const char *prop_name, int len)
+{
+ return (strlen(prop_name) + 1) +
+ sizeof(struct fdt_property) +
+ FDT_TAGALIGN(len);
+}This stuff should really be in libfdt.h Those macros come from libfdt_internal.h, so we're probably doing something wrong here.
+static bool cells_size_fitted(unsigned long base, unsigned long size)
+{
+ /* if *_cells >= 2, cells can hold 64-bit values anyway */
+ if ((__dt_root_addr_cells == 1) && (base >= (1ULL << 32)))
+ return false;
+
+ if ((__dt_root_size_cells == 1) && (size >= (1ULL << 32)))
+ return false;Using '> U32_MAX' here may be more readable.
+ return true;
+}
+
+static void fill_property(void *buf, u64 val64, int cells)
+{
+ u32 val32;
+
+ if (cells == 1) {
+ val32 = cpu_to_fdt32((u32)val64);
+ memcpy(buf, &val32, sizeof(val32));
+ } else {+ memset(buf, 0, cells * sizeof(u32) - sizeof(u64)); + buf += cells * sizeof(u32) - sizeof(u64);
Is this trying to clear the 'top' cells and shuffle the pointer to point at the 'bottom' 2? I'm pretty sure this isn't endian safe. Do we really expect a system to have #address-cells > 2?
+ val64 = cpu_to_fdt64(val64); + memcpy(buf, &val64, sizeof(val64)); + } +} + +static int fdt_setprop_range(void *fdt, int nodeoffset, const char *name, + unsigned long addr, unsigned long size)
(the device-tree spec describes a 'ranges' property, which had me confused. This is encoding a prop-encoded-array)
+{
+ void *buf, *prop;
+ size_t buf_size;
+ int result;
+
+ buf_size = (__dt_root_addr_cells + __dt_root_size_cells) * sizeof(u32);
+ prop = buf = vmalloc(buf_size);virtual memory allocation for something less than PAGE_SIZE?
+ if (!buf) + return -ENOMEM; + + fill_property(prop, addr, __dt_root_addr_cells); + prop += __dt_root_addr_cells * sizeof(u32); + + fill_property(prop, size, __dt_root_size_cells); + + result = fdt_setprop(fdt, nodeoffset, name, buf, buf_size); + + vfree(buf); + + return result; +}
Doesn't this stuff belong in libfdt? I guess there is no 'add array element' api because this the first time we've wanted to create a node with more than key=fixed-size-value. I don't think this belongs in arch C code. Do we have a plan for getting libfdt to support encoding prop-arrays? Can we put it somewhere anyone else duplicating this will find it, until we can (re)move it? I have no idea how that happens... it looks like the devicetree list is the place to ask.
quoted hunk ↗ jump to hunk
static int setup_dtb(struct kimage *image, unsigned long initrd_load_addr, unsigned long initrd_len, char *cmdline, unsigned long cmdline_len,@@ -88,10 +165,26 @@ static int setup_dtb(struct kimage *image, int range_len; int ret; + /* check ranges against root's #address-cells and #size-cells */ + if (image->type == KEXEC_TYPE_CRASH && + (!cells_size_fitted(image->arch.elf_load_addr, + image->arch.elf_headers_sz) || + !cells_size_fitted(crashk_res.start, + crashk_res.end - crashk_res.start + 1))) { + pr_err("Crash memory region doesn't fit into DT's root cell sizes.\n"); + ret = -EINVAL; + goto out_err; + }
To check I've understood this properly: This can happen if the firmware provided a DTB with 32bit address/size cells, but at least some of the memory requires 64 bit address/size cells. This could only happen on a UEFI system where the firmware-DTB doesn't describe memory. ACPI-only systems would have the EFIstub DT.
quoted hunk ↗ jump to hunk
/* duplicate dt blob */ buf_size = fdt_totalsize(initial_boot_params); range_len = (__dt_root_addr_cells + __dt_root_size_cells) * sizeof(u32); + if (image->type == KEXEC_TYPE_CRASH) + buf_size += fdt_prop_len("linux,elfcorehdr", range_len) + + fdt_prop_len("linux,usable-memory-range", + range_len); + if (initrd_load_addr) buf_size += fdt_prop_len("linux,initrd-start", sizeof(u64)) + fdt_prop_len("linux,initrd-end", sizeof(u64));@@ -113,6 +206,23 @@ static int setup_dtb(struct kimage *image, if (nodeoffset < 0) goto out_err; + if (image->type == KEXEC_TYPE_CRASH) { + /* add linux,elfcorehdr */ + ret = fdt_setprop_range(buf, nodeoffset, "linux,elfcorehdr", + image->arch.elf_load_addr, + image->arch.elf_headers_sz); + if (ret) + goto out_err; + + /* add linux,usable-memory-range */ + ret = fdt_setprop_range(buf, nodeoffset, + "linux,usable-memory-range", + crashk_res.start, + crashk_res.end - crashk_res.start + 1);
Don't you need to add "linux,usable-memory-range" to the buf_size estimate?
+ if (ret) + goto out_err; + }
quoted hunk ↗ jump to hunk
@@ -148,17 +258,109 @@ static int setup_dtb(struct kimage *image,
+static struct crash_mem *get_crash_memory_ranges(void)
+{
+ unsigned int nr_ranges;
+ struct crash_mem *cmem;
+
+ nr_ranges = 1; /* for exclusion of crashkernel region */
+ walk_system_ram_res(0, -1, &nr_ranges, get_nr_ranges_callback);
+
+ cmem = vmalloc(sizeof(struct crash_mem) +
+ sizeof(struct crash_mem_range) * nr_ranges);
+ if (!cmem)
+ return NULL;
+
+ cmem->max_nr_ranges = nr_ranges;
+ cmem->nr_ranges = 0;
+ walk_system_ram_res(0, -1, cmem, add_mem_range_callback);
+
+ /* Exclude crashkernel region */
+ if (crash_exclude_mem_range(cmem, crashk_res.start, crashk_res.end)) {
+ vfree(cmem);
+ return NULL;
+ }
+
+ return cmem;
+}Could this function be included in prepare_elf_headers() so that the alloc() and free() occur together.
+static int prepare_elf_headers(void **addr, unsigned long *sz)
+{
+ struct crash_mem *cmem;
+ int ret = 0;
+
+ cmem = get_crash_memory_ranges();
+ if (!cmem)
+ return -ENOMEM;
+
+ ret = crash_prepare_elf64_headers(cmem, true, addr, sz);
+
+ vfree(cmem);+ return ret; +}
All this is moving memory-range information from core-code's walk_system_ram_res() into core-code's struct crash_mem, and excluding crashk_res, which again is accessible to the core code. It looks like this is duplicated in arch/x86 and arch/arm64 because arm64 doesn't have a second 'crashk_low_res' region, and always wants elf64, instead of when IS_ENABLED(CONFIG_X86_64). If we can abstract just those two, more of this could be moved to core code where powerpc can make use of it if they want to support kdump with kexec_file_load(). But, its getting late for cross-architecture dependencies, lets put that on the for-later list. (assuming there isn't a powerpc-kdump series out there adding a third copy of this) Thanks, James