[PATCH v3 2/4] firmware: use acpi to detect QEMU fw_cfg device for sysfs fw_cfg driver
From: somlo@cmu.edu (Gabriel L. Somlo)
Date: 2015-10-04 20:24:31
Also in:
linux-api, lkml, qemu-devel
On Sun, Oct 04, 2015 at 10:54:57AM +0300, Michael S. Tsirkin wrote:
On Sat, Oct 03, 2015 at 07:28:07PM -0400, Gabriel L. Somlo wrote:quoted
From: Gabriel Somlo <somlo@cmu.edu> Instead of blindly probing fw_cfg registers at known IOport and MMIO locations, use the ACPI subsystem to determine whether a QEMU fw_cfg device is present, and, if found, to initialize it. This limits portability to architectures which support ACPI (x86 and UEFI-enabled aarch64), but avoids touching hardware registers before being certain that our device is present. NOTE: The standard way to verify the presence of fw_cfg on arm VMs would have been to use the device tree, but that would have left out x86, which is the primary architecture targeted by this patch. Signed-off-by: Gabriel Somlo <somlo@cmu.edu>IMHO it's not a good idea to probe registers provided by CRS like this. It seems quite reasonable that we'd want to add some extra registers in the future, and this probing will break. Further, accessing registers directly means that there's no way to have ACPI code access them as that would cause race conditions. Maybe we should provide access methods in ACPI instead?
OK, I think I understand what you meant by "don't poke CRS" in the
other thread...
So, you're proposing I move the follwing bits:
/* atomic access to fw_cfg device (potentially slow i/o, so using
* mutex) */
static DEFINE_MUTEX(fw_cfg_dev_lock);
/* pick appropriate endianness for selector key */
static inline u16 fw_cfg_sel_endianness(u16 key)
{
return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key);
}
/* type for fw_cfg "directory scan" visitor/callback function */
typedef int (*fw_cfg_file_callback)(const struct fw_cfg_file *f);
/* run a given callback on each fw_cfg directory entry */
static int fw_cfg_scan_dir(fw_cfg_file_callback callback)
{
int ret = 0;
u32 count, i;
struct fw_cfg_file f;
mutex_lock(&fw_cfg_dev_lock);
iowrite16(fw_cfg_sel_endianness(FW_CFG_FILE_DIR), fw_cfg_reg_ctrl);
ioread8_rep(fw_cfg_reg_data, &count, sizeof(count));
for (i = 0; i < be32_to_cpu(count); i++) {
ioread8_rep(fw_cfg_reg_data, &f, sizeof(f));
ret = callback(&f);
if (ret)
break;
}
mutex_unlock(&fw_cfg_dev_lock);
return ret;
}
/* read chunk of given fw_cfg blob (caller responsible for
* sanity-check) */
static inline void fw_cfg_read_blob(u16 key,
void *buf, loff_t pos, size_t count)
{
mutex_lock(&fw_cfg_dev_lock);
iowrite16(fw_cfg_sel_endianness(key), fw_cfg_reg_ctrl);
while (pos-- > 0)
ioread8(fw_cfg_reg_data);
ioread8_rep(fw_cfg_reg_data, buf, count);
mutex_unlock(&fw_cfg_dev_lock);
}
into the FWCF, "QEMU0002" node as an AML method ? Have ACPI provide
mutual exclusion against competing readers, and somehow figure out how
to call the ACPI/AML code from the guest-side kernel driver whenever
I need to call fw_cfg_read_blob() ?
I guess I could implement fw_cfg_scan_dir() using fw_cfg_read_blob():
u32 count;
size_t bufsize;
void *buf;
fw_cfg_read_blob(FW_CFG_FILE_DIR, &count, 0, sizeof(u32));
bufsize = sizeof(u32) + count * sizeof(struct fw_cfg_file);
buf = kalloc(bufsize);
fw_cfg_read_blob(FW_CFG_FILE_DIR, buf, 0, bufsize);
...
/* now read all the blob meta-data from buf ... */
It would be 100% atomic, but since we can safely assume the fw_cfg
contents never change, it'd be OK.
The atomicity of the ACPI version of fw_cfg_read_blob(), picking the
right endianness for the selector, etc. would have to be done in AML
within the QEMU host-side patch.
If you know of anything I can look at for a good ASL example, please
point it out to me. I'm going to go away now and spend some quality
time with the ACPI spec :)
Thanks,
--Gabriel
quoted
--- .../ABI/testing/sysfs-firmware-qemu_fw_cfg | 4 + drivers/firmware/Kconfig | 2 +- drivers/firmware/qemu_fw_cfg.c | 201 +++++++++++---------- 3 files changed, 113 insertions(+), 94 deletions(-)diff --git a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg index f1ef44e..e9761bf 100644 --- a/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg +++ b/Documentation/ABI/testing/sysfs-firmware-qemu_fw_cfg@@ -76,6 +76,10 @@ Description: the port number of the control register. I.e., the two ports are overlapping, and can not be mapped separately. + NOTE 2. QEMU publishes the register details in the device tree + on arm guests, and in ACPI (under _HID "QEMU0002") on x86 and + select arm (aarch64) VM types. + === Firmware Configuration Items of Interest === Originally, the index key, size, and formatting of blobs indiff --git a/drivers/firmware/Kconfig b/drivers/firmware/Kconfig index 0466e80..bc12d31 100644 --- a/drivers/firmware/Kconfig +++ b/drivers/firmware/Kconfig@@ -137,7 +137,7 @@ config ISCSI_IBFT config FW_CFG_SYSFS tristate "QEMU fw_cfg device support in sysfs" - depends on SYSFS + depends on SYSFS && ACPI default n help Say Y or M here to enable the exporting of the QEMU firmwarediff --git a/drivers/firmware/qemu_fw_cfg.c b/drivers/firmware/qemu_fw_cfg.c index 3a67a16..f935afb 100644 --- a/drivers/firmware/qemu_fw_cfg.c +++ b/drivers/firmware/qemu_fw_cfg.c@@ -8,6 +8,7 @@ */ #include <linux/module.h> +#include <linux/acpi.h> #include <linux/slab.h> #include <linux/io.h> #include <linux/ioport.h>@@ -35,53 +36,10 @@ struct fw_cfg_file { char name[FW_CFG_MAX_FILE_PATH]; }; -/* fw_cfg device i/o access options type */ -struct fw_cfg_access { - const char *name; - phys_addr_t base; - u8 size; - u8 ctrl_offset; - u8 data_offset; - bool is_mmio; -}; - -/* table of fw_cfg device i/o access options for known architectures */ -static struct fw_cfg_access fw_cfg_modes[] = { - { - .name = "fw_cfg IOport on i386, sun4u", - .base = 0x510, - .size = 0x02, - .ctrl_offset = 0x00, - .data_offset = 0x01, - .is_mmio = false, - }, { - .name = "fw_cfg MMIO on arm", - .base = 0x9020000, - .size = 0x0a, - .ctrl_offset = 0x08, - .data_offset = 0x00, - .is_mmio = true, - }, { - .name = "fw_cfg MMIO on sun4m", - .base = 0xd00000510, - .size = 0x03, - .ctrl_offset = 0x00, - .data_offset = 0x02, - .is_mmio = true, - }, { - .name = "fw_cfg MMIO on ppc/mac", - .base = 0xf0000510, - .size = 0x03, - .ctrl_offset = 0x00, - .data_offset = 0x02, - .is_mmio = true, - }, { } /* END */ -}; - -/* fw_cfg device i/o currently selected option set */ -static struct fw_cfg_access *fw_cfg_mode; - /* fw_cfg device i/o register addresses */ +static bool fw_cfg_is_mmio; +static phys_addr_t fw_cfg_phys_base; +static u32 fw_cfg_phys_size; static void __iomem *fw_cfg_dev_base; static void __iomem *fw_cfg_reg_ctrl; static void __iomem *fw_cfg_reg_data;@@ -92,7 +50,7 @@ static DEFINE_MUTEX(fw_cfg_dev_lock); /* pick appropriate endianness for selector key */ static inline u16 fw_cfg_sel_endianness(u16 key) { - return fw_cfg_mode->is_mmio ? cpu_to_be16(key) : cpu_to_le16(key); + return fw_cfg_is_mmio ? cpu_to_be16(key) : cpu_to_le16(key); } /* type for fw_cfg "directory scan" visitor/callback function */@@ -133,60 +91,100 @@ static inline void fw_cfg_read_blob(u16 key, /* clean up fw_cfg device i/o */ static void fw_cfg_io_cleanup(void) { - if (fw_cfg_mode->is_mmio) { + if (fw_cfg_is_mmio) { iounmap(fw_cfg_dev_base); - release_mem_region(fw_cfg_mode->base, fw_cfg_mode->size); + release_mem_region(fw_cfg_phys_base, fw_cfg_phys_size); } else { ioport_unmap(fw_cfg_dev_base); - release_region(fw_cfg_mode->base, fw_cfg_mode->size); + release_region(fw_cfg_phys_base, fw_cfg_phys_size); } } -/* probe and map fw_cfg device */ -static int __init fw_cfg_io_probe(void) +/* configure fw_cfg device i/o from ACPI _CRS method */ +static acpi_status fw_cfg_walk_crs(struct acpi_resource *r, void *context) +{ + struct acpi_resource_io *io; + struct acpi_resource_fixed_memory32 *mmio; + + switch (r->type) { + case ACPI_RESOURCE_TYPE_END_TAG: + return AE_OK; + case ACPI_RESOURCE_TYPE_IO: + io = &r->data.io; + /* physical base addr should NOT be already set */ + if (fw_cfg_phys_base) + return AE_ERROR; + if (!request_region(io->minimum, + io->address_length, "fw_cfg_io")) + return AE_ERROR; + fw_cfg_dev_base = ioport_map(io->minimum, io->address_length); + if (!fw_cfg_dev_base) { + release_region(io->minimum, io->address_length); + return AE_ERROR; + } + fw_cfg_phys_base = io->minimum; + fw_cfg_phys_size = io->address_length; + fw_cfg_is_mmio = false; + /* set register addresses (pc/i386 offsets) */ + fw_cfg_reg_ctrl = fw_cfg_dev_base + 0x00; + fw_cfg_reg_data = fw_cfg_dev_base + 0x01; + return AE_OK; + case ACPI_RESOURCE_TYPE_FIXED_MEMORY32: + mmio = &r->data.fixed_memory32; + /* physical base addr should NOT be already set */ + if (fw_cfg_phys_base) + return AE_ERROR; + /* MMIO and ACPI, but not on ARM ?!?! */ + if (mmio->address_length < 0x0a) + return AE_ERROR; + if (!request_mem_region(mmio->address, + mmio->address_length, "fw_cfg_mem")) + return AE_ERROR; + fw_cfg_dev_base = ioremap(mmio->address, mmio->address_length); + if (!fw_cfg_dev_base) { + release_mem_region(mmio->address, mmio->address_length); + return AE_ERROR; + } + fw_cfg_phys_base = mmio->address; + fw_cfg_phys_size = mmio->address_length; + fw_cfg_is_mmio = true; + /* set register addresses (arm offsets) */ + fw_cfg_reg_ctrl = fw_cfg_dev_base + 0x08; + fw_cfg_reg_data = fw_cfg_dev_base + 0x00; + return AE_OK; + default: + return AE_ERROR; + } +} + +/* initialize fw_cfg device i/o from ACPI data */ +static int fw_cfg_acpi_init(struct acpi_device *dev) { char sig[FW_CFG_SIG_SIZE]; + acpi_status status; + int err; - for (fw_cfg_mode = &fw_cfg_modes[0]; - fw_cfg_mode->base; fw_cfg_mode++) { - - phys_addr_t base = fw_cfg_mode->base; - u8 size = fw_cfg_mode->size; - - /* reserve and map mmio or ioport region */ - if (fw_cfg_mode->is_mmio) { - if (!request_mem_region(base, size, fw_cfg_mode->name)) - continue; - fw_cfg_dev_base = ioremap(base, size); - if (!fw_cfg_dev_base) { - release_mem_region(base, size); - continue; - } - } else { - if (!request_region(base, size, fw_cfg_mode->name)) - continue; - fw_cfg_dev_base = ioport_map(base, size); - if (!fw_cfg_dev_base) { - release_region(base, size); - continue; - } - } + err = acpi_bus_get_status(dev); + if (err < 0) + return err; - /* set control and data register addresses */ - fw_cfg_reg_ctrl = fw_cfg_dev_base + fw_cfg_mode->ctrl_offset; - fw_cfg_reg_data = fw_cfg_dev_base + fw_cfg_mode->data_offset; + if (!(dev->status.enabled && dev->status.functional)) + return -ENODEV; - /* verify fw_cfg device signature */ - fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE); - if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) == 0) - /* success, we're done */ - return 0; + /* extract device i/o details from _CRS */ + status = acpi_walk_resources(dev->handle, METHOD_NAME__CRS, + fw_cfg_walk_crs, NULL); + if (status != AE_OK || !fw_cfg_phys_base) + return -ENODEV; - /* clean up before probing next access mode */ + /* verify fw_cfg device signature */ + fw_cfg_read_blob(FW_CFG_SIGNATURE, sig, 0, FW_CFG_SIG_SIZE); + if (memcmp(sig, "QEMU", FW_CFG_SIG_SIZE) != 0) { fw_cfg_io_cleanup(); + return -ENODEV; } - return -ENODEV; + return 0; } /* fw_cfg revision attribute, in /sys/firmware/qemu_fw_cfg top-level dir. */@@ -353,7 +351,7 @@ static struct kobject *fw_cfg_top_ko; static struct kobject *fw_cfg_sel_ko; /* callback function to register an individual fw_cfg file */ -static int __init fw_cfg_register_file(const struct fw_cfg_file *f) +static int fw_cfg_register_file(const struct fw_cfg_file *f) { int err; struct fw_cfg_sysfs_entry *entry;@@ -397,12 +395,12 @@ static inline void fw_cfg_kobj_cleanup(struct kobject *kobj) kobject_put(kobj); } -static int __init fw_cfg_sysfs_init(void) +static int fw_cfg_sysfs_add(struct acpi_device *dev) { int err; - /* probe for the fw_cfg "hardware" */ - err = fw_cfg_io_probe(); + /* initialize fw_cfg device i/o from ACPI data */ + err = fw_cfg_acpi_init(dev); if (err) return err;@@ -443,14 +441,31 @@ err_top: return err; } -static void __exit fw_cfg_sysfs_exit(void) +static int fw_cfg_sysfs_remove(struct acpi_device *dev) { pr_debug("fw_cfg: unloading.\n"); fw_cfg_sysfs_cache_cleanup(); fw_cfg_kobj_cleanup(fw_cfg_sel_ko); fw_cfg_kobj_cleanup(fw_cfg_top_ko); fw_cfg_io_cleanup(); + return 0; } -module_init(fw_cfg_sysfs_init); -module_exit(fw_cfg_sysfs_exit); +static const struct acpi_device_id fw_cfg_sysfs_device_ids[] = { + { "QEMU0002", 0 }, + { "", 0 }, +}; +MODULE_DEVICE_TABLE(acpi, fw_cfg_sysfs_device_ids); + +static struct acpi_driver fw_cfg_sysfs_driver = { + .name = "fw_cfg", + .class = "QEMU", + .ids = fw_cfg_sysfs_device_ids, + .ops = { + .add = fw_cfg_sysfs_add, + .remove = fw_cfg_sysfs_remove, + }, + .owner = THIS_MODULE, +}; + +module_acpi_driver(fw_cfg_sysfs_driver);-- 2.4.3