first commit

This commit is contained in:
Fabio Belavenuto
2022-07-01 13:57:55 -03:00
commit a08662802d
1085 changed files with 341425 additions and 0 deletions

14
addons/virtio/install.sh Normal file
View File

@@ -0,0 +1,14 @@
if [ "${1}" = "rd" ]; then
echo "Checking for VirtIO..."
if (grep -r -q -E "(QEMU|VirtualBox)" /sys/devices/virtual/dmi/id/); then
echo "VirtIO hypervisor detected!"
${INSMOD} "/modules/virtio.ko" ${PARAMS}
${INSMOD} "/modules/virtio_ring.ko" ${PARAMS}
${INSMOD} "/modules/virtio_mmio.ko" ${PARAMS}
${INSMOD} "/modules/virtio_pci.ko" ${PARAMS}
${INSMOD} "/modules/virtio_net.ko" ${PARAMS}
${INSMOD} "/modules/virtio_scsi.ko" ${PARAMS}
else
echo "No VirtIO hypervisor detected!"
fi
fi

View File

@@ -0,0 +1,28 @@
version: 1
name: virtio
description: "Drivers for QEMU/Virtualbox hypervisor"
available-for:
bromolow-3.10.108:
install-script: &script "install.sh"
modules: true
apollolake-4.4.180:
install-script: *script
modules: true
broadwell-4.4.180:
install-script: *script
modules: true
broadwellnk-4.4.180:
install-script: *script
modules: true
denverton-4.4.180:
install-script: *script
modules: true
geminilake-4.4.180:
install-script: *script
modules: true
v1000-4.4.180:
install-script: *script
modules: true
purley-4.4.180:
install-script: *script
modules: true

View File

@@ -0,0 +1,5 @@
obj-m += virtio.o virtio_ring.o
obj-m += virtio_mmio.o
obj-m += virtio_pci.o
obj-m += virtio_net.o
obj-m += virtio_scsi.o

View File

@@ -0,0 +1,12 @@
/* Configuration space parsing helpers for virtio.
*
* The configuration is [type][len][... len bytes ...] fields.
*
* Copyright 2007 Rusty Russell, IBM Corporation.
* GPL v2 or later.
*/
#include <linux/err.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/bug.h>

View File

@@ -0,0 +1,246 @@
#include <linux/virtio.h>
#include <linux/spinlock.h>
#include <linux/virtio_config.h>
#include <linux/module.h>
#include <linux/idr.h>
/* Unique numbering for virtio devices. */
static DEFINE_IDA(virtio_index_ida);
static ssize_t device_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "0x%04x\n", dev->id.device);
}
static ssize_t vendor_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "0x%04x\n", dev->id.vendor);
}
static ssize_t status_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));
}
static ssize_t modalias_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "virtio:d%08Xv%08X\n",
dev->id.device, dev->id.vendor);
}
static ssize_t features_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
unsigned int i;
ssize_t len = 0;
/* We actually represent this as a bitstring, as it could be
* arbitrary length in future. */
for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
len += sprintf(buf+len, "%c",
test_bit(i, dev->features) ? '1' : '0');
len += sprintf(buf+len, "\n");
return len;
}
static struct device_attribute virtio_dev_attrs[] = {
__ATTR_RO(device),
__ATTR_RO(vendor),
__ATTR_RO(status),
__ATTR_RO(modalias),
__ATTR_RO(features),
__ATTR_NULL
};
static inline int virtio_id_match(const struct virtio_device *dev,
const struct virtio_device_id *id)
{
if (id->device != dev->id.device && id->device != VIRTIO_DEV_ANY_ID)
return 0;
return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor;
}
/* This looks through all the IDs a driver claims to support. If any of them
* match, we return 1 and the kernel will call virtio_dev_probe(). */
static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
{
unsigned int i;
struct virtio_device *dev = dev_to_virtio(_dv);
const struct virtio_device_id *ids;
ids = drv_to_virtio(_dr)->id_table;
for (i = 0; ids[i].device; i++)
if (virtio_id_match(dev, &ids[i]))
return 1;
return 0;
}
static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
{
struct virtio_device *dev = dev_to_virtio(_dv);
return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
dev->id.device, dev->id.vendor);
}
static void add_status(struct virtio_device *dev, unsigned status)
{
dev->config->set_status(dev, dev->config->get_status(dev) | status);
}
void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
unsigned int fbit)
{
unsigned int i;
struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
for (i = 0; i < drv->feature_table_size; i++)
if (drv->feature_table[i] == fbit)
return;
BUG();
}
EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
static int virtio_dev_probe(struct device *_d)
{
int err, i;
struct virtio_device *dev = dev_to_virtio(_d);
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
u32 device_features;
/* We have a driver! */
add_status(dev, VIRTIO_CONFIG_S_DRIVER);
/* Figure out what features the device supports. */
device_features = dev->config->get_features(dev);
/* Features supported by both device and driver into dev->features. */
memset(dev->features, 0, sizeof(dev->features));
for (i = 0; i < drv->feature_table_size; i++) {
unsigned int f = drv->feature_table[i];
BUG_ON(f >= 32);
if (device_features & (1 << f))
set_bit(f, dev->features);
}
/* Transport features always preserved to pass to finalize_features. */
for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
if (device_features & (1 << i))
set_bit(i, dev->features);
dev->config->finalize_features(dev);
err = drv->probe(dev);
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
else {
add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
if (drv->scan)
drv->scan(dev);
}
return err;
}
static int virtio_dev_remove(struct device *_d)
{
struct virtio_device *dev = dev_to_virtio(_d);
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
drv->remove(dev);
/* Driver should have reset device. */
WARN_ON_ONCE(dev->config->get_status(dev));
/* Acknowledge the device's existence again. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
return 0;
}
static struct bus_type virtio_bus = {
.name = "virtio",
.match = virtio_dev_match,
.dev_attrs = virtio_dev_attrs,
.uevent = virtio_uevent,
.probe = virtio_dev_probe,
.remove = virtio_dev_remove,
};
int register_virtio_driver(struct virtio_driver *driver)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
return driver_register(&driver->driver);
}
EXPORT_SYMBOL_GPL(register_virtio_driver);
void unregister_virtio_driver(struct virtio_driver *driver)
{
driver_unregister(&driver->driver);
}
EXPORT_SYMBOL_GPL(unregister_virtio_driver);
int register_virtio_device(struct virtio_device *dev)
{
int err;
dev->dev.bus = &virtio_bus;
/* Assign a unique device index and hence name. */
err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL);
if (err < 0)
goto out;
dev->index = err;
dev_set_name(&dev->dev, "virtio%u", dev->index);
/* We always start by resetting the device, in case a previous
* driver messed it up. This also tests that code path a little. */
dev->config->reset(dev);
/* Acknowledge that we've seen the device. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
INIT_LIST_HEAD(&dev->vqs);
/* device_register() causes the bus infrastructure to look for a
* matching driver. */
err = device_register(&dev->dev);
out:
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
return err;
}
EXPORT_SYMBOL_GPL(register_virtio_device);
void unregister_virtio_device(struct virtio_device *dev)
{
int index = dev->index; /* save for after device release */
device_unregister(&dev->dev);
ida_simple_remove(&virtio_index_ida, index);
}
EXPORT_SYMBOL_GPL(unregister_virtio_device);
static int virtio_init(void)
{
if (bus_register(&virtio_bus) != 0)
panic("virtio bus registration failed");
return 0;
}
static void __exit virtio_exit(void)
{
bus_unregister(&virtio_bus);
ida_destroy(&virtio_index_ida);
}
core_initcall(virtio_init);
module_exit(virtio_exit);
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,665 @@
/*
* Virtio memory mapped device driver
*
* Copyright 2011, ARM Ltd.
*
* This module allows virtio devices to be used over a virtual, memory mapped
* platform device.
*
* The guest device(s) may be instantiated in one of three equivalent ways:
*
* 1. Static platform device in board's code, eg.:
*
* static struct platform_device v2m_virtio_device = {
* .name = "virtio-mmio",
* .id = -1,
* .num_resources = 2,
* .resource = (struct resource []) {
* {
* .start = 0x1001e000,
* .end = 0x1001e0ff,
* .flags = IORESOURCE_MEM,
* }, {
* .start = 42 + 32,
* .end = 42 + 32,
* .flags = IORESOURCE_IRQ,
* },
* }
* };
*
* 2. Device Tree node, eg.:
*
* virtio_block@1e000 {
* compatible = "virtio,mmio";
* reg = <0x1e000 0x100>;
* interrupts = <42>;
* }
*
* 3. Kernel module (or command line) parameter. Can be used more than once -
* one device will be created for each one. Syntax:
*
* [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>]
* where:
* <size> := size (can use standard suffixes like K, M or G)
* <baseaddr> := physical base address
* <irq> := interrupt number (as passed to request_irq())
* <id> := (optional) platform device id
* eg.:
* virtio_mmio.device=0x100@0x100b0000:48 \
* virtio_mmio.device=1K@0x1001e000:74
*
*
*
* Registers layout (all 32-bit wide):
*
* offset d. name description
* ------ -- ---------------- -----------------
*
* 0x000 R MagicValue Magic value "virt"
* 0x004 R Version Device version (current max. 1)
* 0x008 R DeviceID Virtio device ID
* 0x00c R VendorID Virtio vendor ID
*
* 0x010 R HostFeatures Features supported by the host
* 0x014 W HostFeaturesSel Set of host features to access via HostFeatures
*
* 0x020 W GuestFeatures Features activated by the guest
* 0x024 W GuestFeaturesSel Set of activated features to set via GuestFeatures
* 0x028 W GuestPageSize Size of guest's memory page in bytes
*
* 0x030 W QueueSel Queue selector
* 0x034 R QueueNumMax Maximum size of the currently selected queue
* 0x038 W QueueNum Queue size for the currently selected queue
* 0x03c W QueueAlign Used Ring alignment for the current queue
* 0x040 RW QueuePFN PFN for the currently selected queue
*
* 0x050 W QueueNotify Queue notifier
* 0x060 R InterruptStatus Interrupt status register
* 0x064 W InterruptACK Interrupt acknowledge register
* 0x070 RW Status Device status register
*
* 0x100+ RW Device-specific configuration space
*
* Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#define pr_fmt(fmt) "virtio-mmio: " fmt
#include <linux/highmem.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_mmio.h>
#include <linux/virtio_ring.h>
/* The alignment to use between consumer and producer parts of vring.
* Currently hardcoded to the page size. */
#define VIRTIO_MMIO_VRING_ALIGN PAGE_SIZE
#define to_virtio_mmio_device(_plat_dev) \
container_of(_plat_dev, struct virtio_mmio_device, vdev)
struct virtio_mmio_device {
struct virtio_device vdev;
struct platform_device *pdev;
void __iomem *base;
unsigned long version;
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
};
struct virtio_mmio_vq_info {
/* the actual virtqueue */
struct virtqueue *vq;
/* the number of entries in the queue */
unsigned int num;
/* the virtual address of the ring queue */
void *queue;
/* the list node for the virtqueues list */
struct list_head node;
};
/* Configuration interface */
static u32 vm_get_features(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
/* TODO: Features > 32 bits */
writel(0, vm_dev->base + VIRTIO_MMIO_HOST_FEATURES_SEL);
return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES);
}
static void vm_finalize_features(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
int i;
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
writel(vdev->features[i],
vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
}
}
static void vm_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
ptr[i] = readb(vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
}
static void vm_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
const u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
writeb(ptr[i], vm_dev->base + VIRTIO_MMIO_CONFIG + offset + i);
}
static u8 vm_get_status(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
return readl(vm_dev->base + VIRTIO_MMIO_STATUS) & 0xff;
}
static void vm_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
writel(status, vm_dev->base + VIRTIO_MMIO_STATUS);
}
static void vm_reset(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
/* 0 status means a reset. */
writel(0, vm_dev->base + VIRTIO_MMIO_STATUS);
}
/* Transport interface */
/* the notify function used when creating a virt queue */
static void vm_notify(struct virtqueue *vq)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
/* We write the queue's selector into the notification register to
* signal the other end */
writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
}
/* Notify all virtqueues on an interrupt. */
static irqreturn_t vm_interrupt(int irq, void *opaque)
{
struct virtio_mmio_device *vm_dev = opaque;
struct virtio_mmio_vq_info *info;
struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
struct virtio_driver, driver);
unsigned long status;
unsigned long flags;
irqreturn_t ret = IRQ_NONE;
/* Read and acknowledge interrupts */
status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
&& vdrv && vdrv->config_changed) {
vdrv->config_changed(&vm_dev->vdev);
ret = IRQ_HANDLED;
}
if (likely(status & VIRTIO_MMIO_INT_VRING)) {
spin_lock_irqsave(&vm_dev->lock, flags);
list_for_each_entry(info, &vm_dev->virtqueues, node)
ret |= vring_interrupt(irq, info->vq);
spin_unlock_irqrestore(&vm_dev->lock, flags);
}
return ret;
}
static void vm_del_vq(struct virtqueue *vq)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
struct virtio_mmio_vq_info *info = vq->priv;
unsigned long flags, size;
unsigned int index = vq->index;
spin_lock_irqsave(&vm_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vm_dev->lock, flags);
vring_del_virtqueue(vq);
/* Select and deactivate the queue */
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
free_pages_exact(info->queue, size);
kfree(info);
}
static void vm_del_vqs(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtqueue *vq, *n;
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
vm_del_vq(vq);
free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
}
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info;
struct virtqueue *vq;
unsigned long flags, size;
int err;
if (!name)
return NULL;
/* Select the queue we're interested in */
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
/* Queue shouldn't already be set up. */
if (readl(vm_dev->base + VIRTIO_MMIO_QUEUE_PFN)) {
err = -ENOENT;
goto error_available;
}
/* Allocate and fill out our active queue description */
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
err = -ENOMEM;
goto error_kmalloc;
}
/* Allocate pages for the queue - start with a queue as big as
* possible (limited by maximum size allowed by device), drop down
* to a minimal size, just big enough to fit descriptor table
* and two rings (which makes it "alignment_size * 2")
*/
info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
/* If the device reports a 0 entry queue, we won't be able to
* use it to perform I/O, and vring_new_virtqueue() can't create
* empty queues anyway, so don't bother to set up the device.
*/
if (info->num == 0) {
err = -ENOENT;
goto error_alloc_pages;
}
while (1) {
size = PAGE_ALIGN(vring_size(info->num,
VIRTIO_MMIO_VRING_ALIGN));
/* Did the last iter shrink the queue below minimum size? */
if (size < VIRTIO_MMIO_VRING_ALIGN * 2) {
err = -ENOMEM;
goto error_alloc_pages;
}
info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (info->queue)
break;
info->num /= 2;
}
/* Activate the queue */
writel(info->num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
writel(VIRTIO_MMIO_VRING_ALIGN,
vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN);
writel(virt_to_phys(info->queue) >> PAGE_SHIFT,
vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
/* Create the vring */
vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
true, info->queue, vm_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto error_new_virtqueue;
}
vq->priv = info;
info->vq = vq;
spin_lock_irqsave(&vm_dev->lock, flags);
list_add(&info->node, &vm_dev->virtqueues);
spin_unlock_irqrestore(&vm_dev->lock, flags);
return vq;
error_new_virtqueue:
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
free_pages_exact(info->queue, size);
error_alloc_pages:
kfree(info);
error_kmalloc:
error_available:
return ERR_PTR(err);
}
static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[])
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
int i, err;
err = request_irq(irq, vm_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vm_dev);
if (err)
return err;
for (i = 0; i < nvqs; ++i) {
vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev);
return PTR_ERR(vqs[i]);
}
}
return 0;
}
static const char *vm_bus_name(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
return vm_dev->pdev->name;
}
static const struct virtio_config_ops virtio_mmio_config_ops = {
.get = vm_get,
.set = vm_set,
.get_status = vm_get_status,
.set_status = vm_set_status,
.reset = vm_reset,
.find_vqs = vm_find_vqs,
.del_vqs = vm_del_vqs,
.get_features = vm_get_features,
.finalize_features = vm_finalize_features,
.bus_name = vm_bus_name,
};
/* Platform device */
static int virtio_mmio_probe(struct platform_device *pdev)
{
struct virtio_mmio_device *vm_dev;
struct resource *mem;
unsigned long magic;
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!mem)
return -EINVAL;
if (!devm_request_mem_region(&pdev->dev, mem->start,
resource_size(mem), pdev->name))
return -EBUSY;
vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
if (!vm_dev)
return -ENOMEM;
vm_dev->vdev.dev.parent = &pdev->dev;
vm_dev->vdev.config = &virtio_mmio_config_ops;
vm_dev->pdev = pdev;
INIT_LIST_HEAD(&vm_dev->virtqueues);
spin_lock_init(&vm_dev->lock);
vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
if (vm_dev->base == NULL)
return -EFAULT;
/* Check magic value */
magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE);
if (memcmp(&magic, "virt", 4) != 0) {
dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic);
return -ENODEV;
}
/* Check device version */
vm_dev->version = readl(vm_dev->base + VIRTIO_MMIO_VERSION);
if (vm_dev->version != 1) {
dev_err(&pdev->dev, "Version %ld not supported!\n",
vm_dev->version);
return -ENXIO;
}
vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID);
vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
platform_set_drvdata(pdev, vm_dev);
return register_virtio_device(&vm_dev->vdev);
}
static int virtio_mmio_remove(struct platform_device *pdev)
{
struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev);
unregister_virtio_device(&vm_dev->vdev);
return 0;
}
/* Devices list parameter */
#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES)
static struct device vm_cmdline_parent = {
.init_name = "virtio-mmio-cmdline",
};
static int vm_cmdline_parent_registered;
static int vm_cmdline_id;
static int vm_cmdline_set(const char *device,
const struct kernel_param *kp)
{
int err;
struct resource resources[2] = {};
char *str;
long long int base, size;
unsigned int irq;
int processed, consumed = 0;
struct platform_device *pdev;
/* Consume "size" part of the command line parameter */
size = memparse(device, &str);
/* Get "@<base>:<irq>[:<id>]" chunks */
processed = sscanf(str, "@%lli:%u%n:%d%n",
&base, &irq, &consumed,
&vm_cmdline_id, &consumed);
/*
* sscanf() must processes at least 2 chunks; also there
* must be no extra characters after the last chunk, so
* str[consumed] must be '\0'
*/
if (processed < 2 || str[consumed])
return -EINVAL;
resources[0].flags = IORESOURCE_MEM;
resources[0].start = base;
resources[0].end = base + size - 1;
resources[1].flags = IORESOURCE_IRQ;
resources[1].start = resources[1].end = irq;
if (!vm_cmdline_parent_registered) {
err = device_register(&vm_cmdline_parent);
if (err) {
pr_err("Failed to register parent device!\n");
return err;
}
vm_cmdline_parent_registered = 1;
}
pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
vm_cmdline_id,
(unsigned long long)resources[0].start,
(unsigned long long)resources[0].end,
(int)resources[1].start);
pdev = platform_device_register_resndata(&vm_cmdline_parent,
"virtio-mmio", vm_cmdline_id++,
resources, ARRAY_SIZE(resources), NULL, 0);
if (IS_ERR(pdev))
return PTR_ERR(pdev);
return 0;
}
static int vm_cmdline_get_device(struct device *dev, void *data)
{
char *buffer = data;
unsigned int len = strlen(buffer);
struct platform_device *pdev = to_platform_device(dev);
snprintf(buffer + len, PAGE_SIZE - len, "0x%llx@0x%llx:%llu:%d\n",
pdev->resource[0].end - pdev->resource[0].start + 1ULL,
(unsigned long long)pdev->resource[0].start,
(unsigned long long)pdev->resource[1].start,
pdev->id);
return 0;
}
static int vm_cmdline_get(char *buffer, const struct kernel_param *kp)
{
buffer[0] = '\0';
device_for_each_child(&vm_cmdline_parent, buffer,
vm_cmdline_get_device);
return strlen(buffer) + 1;
}
static struct kernel_param_ops vm_cmdline_param_ops = {
.set = vm_cmdline_set,
.get = vm_cmdline_get,
};
device_param_cb(device, &vm_cmdline_param_ops, NULL, S_IRUSR);
static int vm_unregister_cmdline_device(struct device *dev,
void *data)
{
platform_device_unregister(to_platform_device(dev));
return 0;
}
static void vm_unregister_cmdline_devices(void)
{
if (vm_cmdline_parent_registered) {
device_for_each_child(&vm_cmdline_parent, NULL,
vm_unregister_cmdline_device);
device_unregister(&vm_cmdline_parent);
vm_cmdline_parent_registered = 0;
}
}
#else
static void vm_unregister_cmdline_devices(void)
{
}
#endif
/* Platform driver */
static struct of_device_id virtio_mmio_match[] = {
{ .compatible = "virtio,mmio", },
{},
};
MODULE_DEVICE_TABLE(of, virtio_mmio_match);
static struct platform_driver virtio_mmio_driver = {
.probe = virtio_mmio_probe,
.remove = virtio_mmio_remove,
.driver = {
.name = "virtio-mmio",
.owner = THIS_MODULE,
.of_match_table = virtio_mmio_match,
},
};
static int __init virtio_mmio_init(void)
{
return platform_driver_register(&virtio_mmio_driver);
}
static void __exit virtio_mmio_exit(void)
{
platform_driver_unregister(&virtio_mmio_driver);
vm_unregister_cmdline_devices();
}
module_init(virtio_mmio_init);
module_exit(virtio_mmio_exit);
MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
MODULE_DESCRIPTION("Platform bus driver for memory mapped virtio devices");
MODULE_LICENSE("GPL");

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,858 @@
/*
* Virtio PCI driver
*
* This module allows virtio devices to be used over a virtual PCI device.
* This can be used with QEMU based VMMs like KVM or Xen.
*
* Copyright IBM Corp. 2007
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include <linux/module.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_pci.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
MODULE_DESCRIPTION("virtio-pci");
MODULE_LICENSE("GPL");
MODULE_VERSION("1");
/* Our device structure */
struct virtio_pci_device
{
struct virtio_device vdev;
struct pci_dev *pci_dev;
/* the IO mapping for the PCI config space */
void __iomem *ioaddr;
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
/* MSI-X support */
int msix_enabled;
int intx_enabled;
struct msix_entry *msix_entries;
cpumask_var_t *msix_affinity_masks;
/* Name strings for interrupts. This size should be enough,
* and I'm too lazy to allocate each name separately. */
char (*msix_names)[256];
/* Number of available vectors */
unsigned msix_vectors;
/* Vectors allocated, excluding per-vq vectors if any */
unsigned msix_used_vectors;
/* Status saved during hibernate/restore */
u8 saved_status;
/* Whether we have vector per vq */
bool per_vq_vectors;
};
/* Constants for MSI-X */
/* Use first vector for configuration changes, second and the rest for
* virtqueues Thus, we need at least 2 vectors for MSI. */
enum {
VP_MSIX_CONFIG_VECTOR = 0,
VP_MSIX_VQ_VECTOR = 1,
};
struct virtio_pci_vq_info
{
/* the actual virtqueue */
struct virtqueue *vq;
/* the number of entries in the queue */
int num;
/* the virtual address of the ring queue */
void *queue;
/* the list node for the virtqueues list */
struct list_head node;
/* MSI-X vector (or none) */
unsigned msix_vector;
};
/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
static DEFINE_PCI_DEVICE_TABLE(virtio_pci_id_table) = {
{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
{ 0 }
};
MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
/* Convert a generic virtio device to our structure */
static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
{
return container_of(vdev, struct virtio_pci_device, vdev);
}
/* virtio config->get_features() implementation */
static u32 vp_get_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* When someone needs more than 32 feature bits, we'll need to
* steal a bit to indicate that the rest are somewhere else. */
return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
}
/* virtio config->finalize_features() implementation */
static void vp_finalize_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
/* We only support 32 feature bits. */
BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
}
/* virtio config->get() implementation */
static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
void __iomem *ioaddr = vp_dev->ioaddr +
VIRTIO_PCI_CONFIG(vp_dev) + offset;
u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
ptr[i] = ioread8(ioaddr + i);
}
/* the config->set() implementation. it's symmetric to the config->get()
* implementation */
static void vp_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
void __iomem *ioaddr = vp_dev->ioaddr +
VIRTIO_PCI_CONFIG(vp_dev) + offset;
const u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
iowrite8(ptr[i], ioaddr + i);
}
/* config->{get,set}_status() implementations */
static u8 vp_get_status(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
}
static void vp_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
}
/* wait for pending irq handlers */
static void vp_synchronize_vectors(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;
if (vp_dev->intx_enabled)
synchronize_irq(vp_dev->pci_dev->irq);
for (i = 0; i < vp_dev->msix_vectors; ++i)
synchronize_irq(vp_dev->msix_entries[i].vector);
}
static void vp_reset(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* 0 status means a reset. */
iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
/* Flush out the status write, and flush in device writes,
* including MSi-X interrupts, if any. */
ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
/* Flush pending VQ/configuration callbacks. */
vp_synchronize_vectors(vdev);
}
/* the notify function used when creating a virt queue */
static void vp_notify(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
/* we write the queue's selector into the notification register to
* signal the other end */
iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY);
}
/* Handle a configuration change: Tell driver if it wants to know. */
static irqreturn_t vp_config_changed(int irq, void *opaque)
{
struct virtio_pci_device *vp_dev = opaque;
struct virtio_driver *drv;
drv = container_of(vp_dev->vdev.dev.driver,
struct virtio_driver, driver);
if (drv && drv->config_changed)
drv->config_changed(&vp_dev->vdev);
return IRQ_HANDLED;
}
/* Notify all virtqueues on an interrupt. */
static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
{
struct virtio_pci_device *vp_dev = opaque;
struct virtio_pci_vq_info *info;
irqreturn_t ret = IRQ_NONE;
unsigned long flags;
spin_lock_irqsave(&vp_dev->lock, flags);
list_for_each_entry(info, &vp_dev->virtqueues, node) {
if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
ret = IRQ_HANDLED;
}
spin_unlock_irqrestore(&vp_dev->lock, flags);
return ret;
}
/* A small wrapper to also acknowledge the interrupt when it's handled.
* I really need an EIO hook for the vring so I can ack the interrupt once we
* know that we'll be handling the IRQ but before we invoke the callback since
* the callback may notify the host which results in the host attempting to
* raise an interrupt that we would then mask once we acknowledged the
* interrupt. */
static irqreturn_t vp_interrupt(int irq, void *opaque)
{
struct virtio_pci_device *vp_dev = opaque;
u8 isr;
/* reading the ISR has the effect of also clearing it so it's very
* important to save off the value. */
isr = ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
/* It's definitely not us if the ISR was not high */
if (!isr)
return IRQ_NONE;
/* Configuration change? Tell driver if it wants to know. */
if (isr & VIRTIO_PCI_ISR_CONFIG)
vp_config_changed(irq, opaque);
return vp_vring_interrupt(irq, opaque);
}
static void vp_free_vectors(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;
if (vp_dev->intx_enabled) {
free_irq(vp_dev->pci_dev->irq, vp_dev);
vp_dev->intx_enabled = 0;
}
for (i = 0; i < vp_dev->msix_used_vectors; ++i)
free_irq(vp_dev->msix_entries[i].vector, vp_dev);
for (i = 0; i < vp_dev->msix_vectors; i++)
if (vp_dev->msix_affinity_masks[i])
free_cpumask_var(vp_dev->msix_affinity_masks[i]);
if (vp_dev->msix_enabled) {
/* Disable the vector used for configuration */
iowrite16(VIRTIO_MSI_NO_VECTOR,
vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
/* Flush the write out to device */
ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
pci_disable_msix(vp_dev->pci_dev);
vp_dev->msix_enabled = 0;
vp_dev->msix_vectors = 0;
}
vp_dev->msix_used_vectors = 0;
kfree(vp_dev->msix_names);
vp_dev->msix_names = NULL;
kfree(vp_dev->msix_entries);
vp_dev->msix_entries = NULL;
kfree(vp_dev->msix_affinity_masks);
vp_dev->msix_affinity_masks = NULL;
}
static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
bool per_vq_vectors)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
const char *name = dev_name(&vp_dev->vdev.dev);
unsigned i, v;
int err = -ENOMEM;
vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
GFP_KERNEL);
if (!vp_dev->msix_entries)
goto error;
vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
GFP_KERNEL);
if (!vp_dev->msix_names)
goto error;
vp_dev->msix_affinity_masks
= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
GFP_KERNEL);
if (!vp_dev->msix_affinity_masks)
goto error;
for (i = 0; i < nvectors; ++i)
if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
GFP_KERNEL))
goto error;
for (i = 0; i < nvectors; ++i)
vp_dev->msix_entries[i].entry = i;
/* pci_enable_msix returns positive if we can't get this many. */
err = pci_enable_msix(vp_dev->pci_dev, vp_dev->msix_entries, nvectors);
if (err > 0)
err = -ENOSPC;
if (err)
goto error;
vp_dev->msix_vectors = nvectors;
vp_dev->msix_enabled = 1;
/* Set the vector used for configuration */
v = vp_dev->msix_used_vectors;
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-config", name);
err = request_irq(vp_dev->msix_entries[v].vector,
vp_config_changed, 0, vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
++vp_dev->msix_used_vectors;
iowrite16(v, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
/* Verify we had enough resources to assign the vector */
v = ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
if (v == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto error;
}
if (!per_vq_vectors) {
/* Shared vector for all VQs */
v = vp_dev->msix_used_vectors;
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-virtqueues", name);
err = request_irq(vp_dev->msix_entries[v].vector,
vp_vring_interrupt, 0, vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
++vp_dev->msix_used_vectors;
}
return 0;
error:
vp_free_vectors(vdev);
return err;
}
static int vp_request_intx(struct virtio_device *vdev)
{
int err;
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
if (!err)
vp_dev->intx_enabled = 1;
return err;
}
static struct virtqueue *setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_vq_info *info;
struct virtqueue *vq;
unsigned long flags, size;
u16 num;
int err;
/* Select the queue we're interested in */
iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
/* Check if queue is either not available or already active. */
num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
return ERR_PTR(-ENOENT);
/* allocate and fill out our structure the represents an active
* queue */
info = kmalloc(sizeof(struct virtio_pci_vq_info), GFP_KERNEL);
if (!info)
return ERR_PTR(-ENOMEM);
info->num = num;
info->msix_vector = msix_vec;
size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
if (info->queue == NULL) {
err = -ENOMEM;
goto out_info;
}
/* activate the queue */
iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
/* create the vring */
vq = vring_new_virtqueue(index, info->num, VIRTIO_PCI_VRING_ALIGN, vdev,
true, info->queue, vp_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto out_activate_queue;
}
vq->priv = info;
info->vq = vq;
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto out_assign;
}
}
if (callback) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_add(&info->node, &vp_dev->virtqueues);
spin_unlock_irqrestore(&vp_dev->lock, flags);
} else {
INIT_LIST_HEAD(&info->node);
}
return vq;
out_assign:
vring_del_virtqueue(vq);
out_activate_queue:
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
free_pages_exact(info->queue, size);
out_info:
kfree(info);
return ERR_PTR(err);
}
static void vp_del_vq(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_vq_info *info = vq->priv;
unsigned long flags, size;
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
if (vp_dev->msix_enabled) {
iowrite16(VIRTIO_MSI_NO_VECTOR,
vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
/* Flush the write out to device */
ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
}
vring_del_virtqueue(vq);
/* Select and deactivate the queue */
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
free_pages_exact(info->queue, size);
kfree(info);
}
/* the config->del_vqs() implementation */
static void vp_del_vqs(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq, *n;
struct virtio_pci_vq_info *info;
list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
info = vq->priv;
if (vp_dev->per_vq_vectors &&
info->msix_vector != VIRTIO_MSI_NO_VECTOR)
free_irq(vp_dev->msix_entries[info->msix_vector].vector,
vq);
vp_del_vq(vq);
}
vp_dev->per_vq_vectors = false;
vp_free_vectors(vdev);
}
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[],
bool use_msix,
bool per_vq_vectors)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u16 msix_vec;
int i, err, nvectors, allocated_vectors;
if (!use_msix) {
/* Old style: one normal interrupt for change and all vqs. */
err = vp_request_intx(vdev);
if (err)
goto error_request;
} else {
if (per_vq_vectors) {
/* Best option: one for change interrupt, one per vq. */
nvectors = 1;
for (i = 0; i < nvqs; ++i)
if (callbacks[i])
++nvectors;
} else {
/* Second best: one for change, shared for all vqs. */
nvectors = 2;
}
err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
if (err)
goto error_request;
}
vp_dev->per_vq_vectors = per_vq_vectors;
allocated_vectors = vp_dev->msix_used_vectors;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
} else if (!callbacks[i] || !vp_dev->msix_enabled)
msix_vec = VIRTIO_MSI_NO_VECTOR;
else if (vp_dev->per_vq_vectors)
msix_vec = allocated_vectors++;
else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto error_find;
}
if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
continue;
/* allocate per-vq irq if available and necessary */
snprintf(vp_dev->msix_names[msix_vec],
sizeof *vp_dev->msix_names,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(vp_dev->msix_entries[msix_vec].vector,
vring_interrupt, 0,
vp_dev->msix_names[msix_vec],
vqs[i]);
if (err) {
vp_del_vq(vqs[i]);
goto error_find;
}
}
return 0;
error_find:
vp_del_vqs(vdev);
error_request:
return err;
}
/* the config->find_vqs() implementation */
static int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[])
{
int err;
/* Try MSI-X with one vector per queue. */
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
if (!err)
return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
true, false);
if (!err)
return 0;
/* Finally fall back to regular interrupts. */
return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
false, false);
}
static const char *vp_bus_name(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return pci_name(vp_dev->pci_dev);
}
/* Setup the affinity for a virtqueue:
* - force the affinity for per vq vector
* - OR over all affinities for shared MSI
* - ignore the affinity request if we're using INTX
*/
static int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
{
struct virtio_device *vdev = vq->vdev;
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_vq_info *info = vq->priv;
struct cpumask *mask;
unsigned int irq;
if (!vq->callback)
return -EINVAL;
if (vp_dev->msix_enabled) {
mask = vp_dev->msix_affinity_masks[info->msix_vector];
irq = vp_dev->msix_entries[info->msix_vector].vector;
if (cpu == -1)
irq_set_affinity_hint(irq, NULL);
else {
cpumask_set_cpu(cpu, mask);
irq_set_affinity_hint(irq, mask);
}
}
return 0;
}
static const struct virtio_config_ops virtio_pci_config_ops = {
.get = vp_get,
.set = vp_set,
.get_status = vp_get_status,
.set_status = vp_set_status,
.reset = vp_reset,
.find_vqs = vp_find_vqs,
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
};
static void virtio_pci_release_dev(struct device *_d)
{
/*
* No need for a release method as we allocate/free
* all devices together with the pci devices.
* Provide an empty one to avoid getting a warning from core.
*/
}
/* the PCI probing function */
static int virtio_pci_probe(struct pci_dev *pci_dev,
const struct pci_device_id *id)
{
struct virtio_pci_device *vp_dev;
int err;
/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
return -ENODEV;
if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
return -ENODEV;
}
/* allocate our structure and fill it out */
vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
if (vp_dev == NULL)
return -ENOMEM;
vp_dev->vdev.dev.parent = &pci_dev->dev;
vp_dev->vdev.dev.release = virtio_pci_release_dev;
vp_dev->vdev.config = &virtio_pci_config_ops;
vp_dev->pci_dev = pci_dev;
INIT_LIST_HEAD(&vp_dev->virtqueues);
spin_lock_init(&vp_dev->lock);
/* Disable MSI/MSIX to bring device to a known good state. */
pci_msi_off(pci_dev);
/* enable the device */
err = pci_enable_device(pci_dev);
if (err)
goto out;
err = pci_request_regions(pci_dev, "virtio-pci");
if (err)
goto out_enable_device;
vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
if (vp_dev->ioaddr == NULL) {
err = -ENOMEM;
goto out_req_regions;
}
pci_set_drvdata(pci_dev, vp_dev);
pci_set_master(pci_dev);
/* we use the subsystem vendor/device id as the virtio vendor/device
* id. this allows us to use the same PCI vendor/device id for all
* virtio devices and to identify the particular virtio driver by
* the subsystem ids */
vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
vp_dev->vdev.id.device = pci_dev->subsystem_device;
/* finally register the virtio device */
err = register_virtio_device(&vp_dev->vdev);
if (err)
goto out_set_drvdata;
return 0;
out_set_drvdata:
pci_set_drvdata(pci_dev, NULL);
pci_iounmap(pci_dev, vp_dev->ioaddr);
out_req_regions:
pci_release_regions(pci_dev);
out_enable_device:
pci_disable_device(pci_dev);
out:
kfree(vp_dev);
return err;
}
static void virtio_pci_remove(struct pci_dev *pci_dev)
{
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
unregister_virtio_device(&vp_dev->vdev);
vp_del_vqs(&vp_dev->vdev);
pci_set_drvdata(pci_dev, NULL);
pci_iounmap(pci_dev, vp_dev->ioaddr);
pci_release_regions(pci_dev);
pci_disable_device(pci_dev);
kfree(vp_dev);
}
#ifdef CONFIG_PM
static int virtio_pci_freeze(struct device *dev)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
struct virtio_driver *drv;
int ret;
drv = container_of(vp_dev->vdev.dev.driver,
struct virtio_driver, driver);
ret = 0;
vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
if (drv && drv->freeze)
ret = drv->freeze(&vp_dev->vdev);
if (!ret)
pci_disable_device(pci_dev);
return ret;
}
static int virtio_pci_restore(struct device *dev)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
struct virtio_driver *drv;
unsigned status = 0;
int ret;
drv = container_of(vp_dev->vdev.dev.driver,
struct virtio_driver, driver);
ret = pci_enable_device(pci_dev);
if (ret)
return ret;
pci_set_master(pci_dev);
/* We always start by resetting the device, in case a previous
* driver messed it up. */
vp_reset(&vp_dev->vdev);
/* Acknowledge that we've seen the device. */
status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
vp_set_status(&vp_dev->vdev, status);
/* Maybe driver failed before freeze.
* Restore the failed status, for debugging. */
status |= vp_dev->saved_status & VIRTIO_CONFIG_S_FAILED;
vp_set_status(&vp_dev->vdev, status);
if (!drv)
return 0;
/* We have a driver! */
status |= VIRTIO_CONFIG_S_DRIVER;
vp_set_status(&vp_dev->vdev, status);
vp_finalize_features(&vp_dev->vdev);
if (drv->restore) {
ret = drv->restore(&vp_dev->vdev);
if (ret) {
status |= VIRTIO_CONFIG_S_FAILED;
vp_set_status(&vp_dev->vdev, status);
return ret;
}
}
/* Finally, tell the device we're all set */
status |= VIRTIO_CONFIG_S_DRIVER_OK;
vp_set_status(&vp_dev->vdev, status);
return ret;
}
static const struct dev_pm_ops virtio_pci_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
};
#endif
static struct pci_driver virtio_pci_driver = {
.name = "virtio-pci",
.id_table = virtio_pci_id_table,
.probe = virtio_pci_probe,
.remove = virtio_pci_remove,
#ifdef CONFIG_PM
.driver.pm = &virtio_pci_pm_ops,
#endif
};
module_pci_driver(virtio_pci_driver);

View File

@@ -0,0 +1,871 @@
/* Virtio ring implementation.
*
* Copyright 2007 Rusty Russell IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/virtio.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_config.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/hrtimer.h>
#ifdef DEBUG
/* For development, we want to crash whenever the ring is screwed. */
#define BAD_RING(_vq, fmt, args...) \
do { \
dev_err(&(_vq)->vq.vdev->dev, \
"%s:"fmt, (_vq)->vq.name, ##args); \
BUG(); \
} while (0)
/* Caller is supposed to guarantee no reentry. */
#define START_USE(_vq) \
do { \
if ((_vq)->in_use) \
panic("%s:in_use = %i\n", \
(_vq)->vq.name, (_vq)->in_use); \
(_vq)->in_use = __LINE__; \
} while (0)
#define END_USE(_vq) \
do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
#else
#define BAD_RING(_vq, fmt, args...) \
do { \
dev_err(&_vq->vq.vdev->dev, \
"%s:"fmt, (_vq)->vq.name, ##args); \
(_vq)->broken = true; \
} while (0)
#define START_USE(vq)
#define END_USE(vq)
#endif
struct vring_virtqueue
{
struct virtqueue vq;
/* Actual memory layout for this queue */
struct vring vring;
/* Can we use weak barriers? */
bool weak_barriers;
/* Other side has made a mess, don't try any more. */
bool broken;
/* Host supports indirect buffers */
bool indirect;
/* Host publishes avail event idx */
bool event;
/* Head of free buffer list. */
unsigned int free_head;
/* Number we've added since last sync. */
unsigned int num_added;
/* Last used index we've seen. */
u16 last_used_idx;
/* How to notify other side. FIXME: commonalize hcalls! */
void (*notify)(struct virtqueue *vq);
#ifdef DEBUG
/* They're supposed to lock for us. */
unsigned int in_use;
/* Figure out if their kicks are too delayed. */
bool last_add_time_valid;
ktime_t last_add_time;
#endif
/* Tokens for callbacks. */
void *data[];
};
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
static inline struct scatterlist *sg_next_chained(struct scatterlist *sg,
unsigned int *count)
{
return sg_next(sg);
}
static inline struct scatterlist *sg_next_arr(struct scatterlist *sg,
unsigned int *count)
{
if (--(*count) == 0)
return NULL;
return sg + 1;
}
/* Set up an indirect table of descriptors and add it to the queue. */
static inline int vring_add_indirect(struct vring_virtqueue *vq,
struct scatterlist *sgs[],
struct scatterlist *(*next)
(struct scatterlist *, unsigned int *),
unsigned int total_sg,
unsigned int total_out,
unsigned int total_in,
unsigned int out_sgs,
unsigned int in_sgs,
gfp_t gfp)
{
struct vring_desc *desc;
unsigned head;
struct scatterlist *sg;
int i, n;
/*
* We require lowmem mappings for the descriptors because
* otherwise virt_to_phys will give us bogus addresses in the
* virtqueue.
*/
gfp &= ~(__GFP_HIGHMEM | __GFP_HIGH);
desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
return -ENOMEM;
/* Transfer entries from the sg lists into the indirect page */
i = 0;
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
desc[i].flags = VRING_DESC_F_NEXT;
desc[i].addr = sg_phys(sg);
desc[i].len = sg->length;
desc[i].next = i+1;
i++;
}
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
desc[i].addr = sg_phys(sg);
desc[i].len = sg->length;
desc[i].next = i+1;
i++;
}
}
BUG_ON(i != total_sg);
/* Last one doesn't continue. */
desc[i-1].flags &= ~VRING_DESC_F_NEXT;
desc[i-1].next = 0;
/* We're about to use a buffer */
vq->vq.num_free--;
/* Use a single buffer which doesn't continue */
head = vq->free_head;
vq->vring.desc[head].flags = VRING_DESC_F_INDIRECT;
vq->vring.desc[head].addr = virt_to_phys(desc);
vq->vring.desc[head].len = i * sizeof(struct vring_desc);
/* Update free pointer */
vq->free_head = vq->vring.desc[head].next;
return head;
}
static inline int virtqueue_add(struct virtqueue *_vq,
struct scatterlist *sgs[],
struct scatterlist *(*next)
(struct scatterlist *, unsigned int *),
unsigned int total_out,
unsigned int total_in,
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp)
{
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
unsigned int i, n, avail, uninitialized_var(prev), total_sg;
int head;
START_USE(vq);
BUG_ON(data == NULL);
#ifdef DEBUG
{
ktime_t now = ktime_get();
/* No kick or get, with .1 second between? Warn. */
if (vq->last_add_time_valid)
WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> 100);
vq->last_add_time = now;
vq->last_add_time_valid = true;
}
#endif
total_sg = total_in + total_out;
/* If the host supports indirect descriptor tables, and we have multiple
* buffers, then go indirect. FIXME: tune this threshold */
if (vq->indirect && total_sg > 1 && vq->vq.num_free) {
head = vring_add_indirect(vq, sgs, next, total_sg, total_out,
total_in,
out_sgs, in_sgs, gfp);
if (likely(head >= 0))
goto add_head;
}
BUG_ON(total_sg > vq->vring.num);
BUG_ON(total_sg == 0);
if (vq->vq.num_free < total_sg) {
pr_debug("Can't add buf len %i - avail = %i\n",
total_sg, vq->vq.num_free);
/* FIXME: for historical reasons, we force a notify here if
* there are outgoing parts to the buffer. Presumably the
* host should service the ring ASAP. */
if (out_sgs)
vq->notify(&vq->vq);
END_USE(vq);
return -ENOSPC;
}
/* We're about to use some buffers from the free list. */
vq->vq.num_free -= total_sg;
head = i = vq->free_head;
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = next(sg, &total_out)) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT;
vq->vring.desc[i].addr = sg_phys(sg);
vq->vring.desc[i].len = sg->length;
prev = i;
i = vq->vring.desc[i].next;
}
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = next(sg, &total_in)) {
vq->vring.desc[i].flags = VRING_DESC_F_NEXT|VRING_DESC_F_WRITE;
vq->vring.desc[i].addr = sg_phys(sg);
vq->vring.desc[i].len = sg->length;
prev = i;
i = vq->vring.desc[i].next;
}
}
/* Last one doesn't continue. */
vq->vring.desc[prev].flags &= ~VRING_DESC_F_NEXT;
/* Update free pointer */
vq->free_head = i;
add_head:
/* Set token. */
vq->data[head] = data;
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
avail = (vq->vring.avail->idx & (vq->vring.num-1));
vq->vring.avail->ring[avail] = head;
/* Descriptors and available array need to be set before we expose the
* new available array entries. */
virtio_wmb(vq->weak_barriers);
vq->vring.avail->idx++;
vq->num_added++;
/* This is very unlikely, but theoretically possible. Kick
* just in case. */
if (unlikely(vq->num_added == (1 << 16) - 1))
virtqueue_kick(_vq);
pr_debug("Added buffer head %i to %p\n", head, vq);
END_USE(vq);
return 0;
}
/**
* virtqueue_add_buf - expose buffer to other end
* @vq: the struct virtqueue we're talking about.
* @sg: the description of the buffer(s).
* @out_num: the number of sg readable by other side
* @in_num: the number of sg which are writable (after readable ones)
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
*/
int virtqueue_add_buf(struct virtqueue *_vq,
struct scatterlist sg[],
unsigned int out,
unsigned int in,
void *data,
gfp_t gfp)
{
struct scatterlist *sgs[2];
sgs[0] = sg;
sgs[1] = sg + out;
return virtqueue_add(_vq, sgs, sg_next_arr,
out, in, out ? 1 : 0, in ? 1 : 0, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_buf);
/**
* virtqueue_add_sgs - expose buffers to other end
* @vq: the struct virtqueue we're talking about.
* @sgs: array of terminated scatterlists.
* @out_num: the number of scatterlists readable by other side
* @in_num: the number of scatterlists which are writable (after readable ones)
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
*/
int virtqueue_add_sgs(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp)
{
unsigned int i, total_out, total_in;
/* Count them first. */
for (i = total_out = total_in = 0; i < out_sgs; i++) {
struct scatterlist *sg;
for (sg = sgs[i]; sg; sg = sg_next(sg))
total_out++;
}
for (; i < out_sgs + in_sgs; i++) {
struct scatterlist *sg;
for (sg = sgs[i]; sg; sg = sg_next(sg))
total_in++;
}
return virtqueue_add(_vq, sgs, sg_next_chained,
total_out, total_in, out_sgs, in_sgs, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
/**
* virtqueue_add_outbuf - expose output buffers to other end
* @vq: the struct virtqueue we're talking about.
* @sgs: array of scatterlists (need not be terminated!)
* @num: the number of scatterlists readable by other side
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
*/
int virtqueue_add_outbuf(struct virtqueue *vq,
struct scatterlist sg[], unsigned int num,
void *data,
gfp_t gfp)
{
return virtqueue_add(vq, &sg, sg_next_arr, num, 0, 1, 0, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
/**
* virtqueue_add_inbuf - expose input buffers to other end
* @vq: the struct virtqueue we're talking about.
* @sgs: array of scatterlists (need not be terminated!)
* @num: the number of scatterlists writable by other side
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM).
*/
int virtqueue_add_inbuf(struct virtqueue *vq,
struct scatterlist sg[], unsigned int num,
void *data,
gfp_t gfp)
{
return virtqueue_add(vq, &sg, sg_next_arr, 0, num, 0, 1, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
/**
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @vq: the struct virtqueue
*
* Instead of virtqueue_kick(), you can do:
* if (virtqueue_kick_prepare(vq))
* virtqueue_notify(vq);
*
* This is sometimes useful because the virtqueue_kick_prepare() needs
* to be serialized, but the actual virtqueue_notify() call does not.
*/
bool virtqueue_kick_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 new, old;
bool needs_kick;
START_USE(vq);
/* We need to expose available array entries before checking avail
* event. */
virtio_mb(vq->weak_barriers);
old = vq->vring.avail->idx - vq->num_added;
new = vq->vring.avail->idx;
vq->num_added = 0;
#ifdef DEBUG
if (vq->last_add_time_valid) {
WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
vq->last_add_time)) > 100);
}
vq->last_add_time_valid = false;
#endif
if (vq->event) {
needs_kick = vring_need_event(vring_avail_event(&vq->vring),
new, old);
} else {
needs_kick = !(vq->vring.used->flags & VRING_USED_F_NO_NOTIFY);
}
END_USE(vq);
return needs_kick;
}
EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
/**
* virtqueue_notify - second half of split virtqueue_kick call.
* @vq: the struct virtqueue
*
* This does not need to be serialized.
*/
void virtqueue_notify(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
/* Prod other side to tell it about changes. */
vq->notify(_vq);
}
EXPORT_SYMBOL_GPL(virtqueue_notify);
/**
* virtqueue_kick - update after add_buf
* @vq: the struct virtqueue
*
* After one or more virtqueue_add_buf calls, invoke this to kick
* the other side.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
void virtqueue_kick(struct virtqueue *vq)
{
if (virtqueue_kick_prepare(vq))
virtqueue_notify(vq);
}
EXPORT_SYMBOL_GPL(virtqueue_kick);
static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
{
unsigned int i;
/* Clear data ptr. */
vq->data[head] = NULL;
/* Put back on free list: find end */
i = head;
/* Free the indirect table */
if (vq->vring.desc[i].flags & VRING_DESC_F_INDIRECT)
kfree(phys_to_virt(vq->vring.desc[i].addr));
while (vq->vring.desc[i].flags & VRING_DESC_F_NEXT) {
i = vq->vring.desc[i].next;
vq->vq.num_free++;
}
vq->vring.desc[i].next = vq->free_head;
vq->free_head = head;
/* Plus final descriptor */
vq->vq.num_free++;
}
static inline bool more_used(const struct vring_virtqueue *vq)
{
return vq->last_used_idx != vq->vring.used->idx;
}
/**
* virtqueue_get_buf - get the next used buffer
* @vq: the struct virtqueue we're talking about.
* @len: the length written into the buffer
*
* If the driver wrote data into the buffer, @len will be set to the
* amount written. This means you don't need to clear the buffer
* beforehand to ensure there's no data leakage in the case of short
* writes.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*
* Returns NULL if there are no used buffers, or the "data" token
* handed to virtqueue_add_buf().
*/
void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *ret;
unsigned int i;
u16 last_used;
START_USE(vq);
if (unlikely(vq->broken)) {
END_USE(vq);
return NULL;
}
if (!more_used(vq)) {
pr_debug("No more buffers in queue\n");
END_USE(vq);
return NULL;
}
/* Only get used array entries after they have been exposed by host. */
virtio_rmb(vq->weak_barriers);
last_used = (vq->last_used_idx & (vq->vring.num - 1));
i = vq->vring.used->ring[last_used].id;
*len = vq->vring.used->ring[last_used].len;
if (unlikely(i >= vq->vring.num)) {
BAD_RING(vq, "id %u out of range\n", i);
return NULL;
}
if (unlikely(!vq->data[i])) {
BAD_RING(vq, "id %u is not a head!\n", i);
return NULL;
}
/* detach_buf clears data, so grab it now. */
ret = vq->data[i];
detach_buf(vq, i);
vq->last_used_idx++;
/* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
* the read in the next get_buf call. */
if (!(vq->vring.avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
vring_used_event(&vq->vring) = vq->last_used_idx;
virtio_mb(vq->weak_barriers);
}
#ifdef DEBUG
vq->last_add_time_valid = false;
#endif
END_USE(vq);
return ret;
}
EXPORT_SYMBOL_GPL(virtqueue_get_buf);
/**
* virtqueue_disable_cb - disable callbacks
* @vq: the struct virtqueue we're talking about.
*
* Note that this is not necessarily synchronous, hence unreliable and only
* useful as an optimization.
*
* Unlike other operations, this need not be serialized.
*/
void virtqueue_disable_cb(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
}
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
/**
* virtqueue_enable_cb_prepare - restart callbacks after disable_cb
* @vq: the struct virtqueue we're talking about.
*
* This re-enables callbacks; it returns current queue state
* in an opaque unsigned value. This value should be later tested by
* virtqueue_poll, to detect a possible race between the driver checking for
* more work, and enabling callbacks.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 last_used_idx;
START_USE(vq);
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
vring_used_event(&vq->vring) = last_used_idx = vq->last_used_idx;
END_USE(vq);
return last_used_idx;
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
/**
* virtqueue_poll - query pending used buffers
* @vq: the struct virtqueue we're talking about.
* @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
*
* Returns "true" if there are pending used buffers in the queue.
*
* This does not need to be serialized.
*/
bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
virtio_mb(vq->weak_barriers);
return (u16)last_used_idx != vq->vring.used->idx;
}
EXPORT_SYMBOL_GPL(virtqueue_poll);
/**
* virtqueue_enable_cb - restart callbacks after disable_cb.
* @vq: the struct virtqueue we're talking about.
*
* This re-enables callbacks; it returns "false" if there are pending
* buffers in the queue, to detect a possible race between the driver
* checking for more work, and enabling callbacks.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
bool virtqueue_enable_cb(struct virtqueue *_vq)
{
unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
return !virtqueue_poll(_vq, last_used_idx);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
/**
* virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
* @vq: the struct virtqueue we're talking about.
*
* This re-enables callbacks but hints to the other side to delay
* interrupts until most of the available buffers have been processed;
* it returns "false" if there are many pending buffers in the queue,
* to detect a possible race between the driver checking for more work,
* and enabling callbacks.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 bufs;
START_USE(vq);
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
vq->vring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
/* TODO: tune this threshold */
bufs = (u16)(vq->vring.avail->idx - vq->last_used_idx) * 3 / 4;
vring_used_event(&vq->vring) = vq->last_used_idx + bufs;
virtio_mb(vq->weak_barriers);
if (unlikely((u16)(vq->vring.used->idx - vq->last_used_idx) > bufs)) {
END_USE(vq);
return false;
}
END_USE(vq);
return true;
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
/**
* virtqueue_detach_unused_buf - detach first unused buffer
* @vq: the struct virtqueue we're talking about.
*
* Returns NULL or the "data" token handed to virtqueue_add_buf().
* This is not valid on an active queue; it is useful only for device
* shutdown.
*/
void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
unsigned int i;
void *buf;
START_USE(vq);
for (i = 0; i < vq->vring.num; i++) {
if (!vq->data[i])
continue;
/* detach_buf clears data, so grab it now. */
buf = vq->data[i];
detach_buf(vq, i);
vq->vring.avail->idx--;
END_USE(vq);
return buf;
}
/* That should have freed everything. */
BUG_ON(vq->vq.num_free != vq->vring.num);
END_USE(vq);
return NULL;
}
EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
irqreturn_t vring_interrupt(int irq, void *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
if (!more_used(vq)) {
pr_debug("virtqueue interrupt with no work for %p\n", vq);
return IRQ_NONE;
}
if (unlikely(vq->broken))
return IRQ_HANDLED;
pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
if (vq->vq.callback)
vq->vq.callback(&vq->vq);
return IRQ_HANDLED;
}
EXPORT_SYMBOL_GPL(vring_interrupt);
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
void *pages,
void (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name)
{
struct vring_virtqueue *vq;
unsigned int i;
/* We assume num is a power of 2. */
if (num & (num - 1)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL;
}
vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
if (!vq)
return NULL;
vring_init(&vq->vring, num, pages, vring_align);
vq->vq.callback = callback;
vq->vq.vdev = vdev;
vq->vq.name = name;
vq->vq.num_free = num;
vq->vq.index = index;
vq->notify = notify;
vq->weak_barriers = weak_barriers;
vq->broken = false;
vq->last_used_idx = 0;
vq->num_added = 0;
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
#endif
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
/* No callback? Tell other side not to bother us. */
if (!callback)
vq->vring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
/* Put everything in free lists. */
vq->free_head = 0;
for (i = 0; i < num-1; i++) {
vq->vring.desc[i].next = i+1;
vq->data[i] = NULL;
}
vq->data[i] = NULL;
return &vq->vq;
}
EXPORT_SYMBOL_GPL(vring_new_virtqueue);
void vring_del_virtqueue(struct virtqueue *vq)
{
list_del(&vq->list);
kfree(to_vvq(vq));
}
EXPORT_SYMBOL_GPL(vring_del_virtqueue);
/* Manipulates transport-specific feature bits. */
void vring_transport_features(struct virtio_device *vdev)
{
unsigned int i;
for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
switch (i) {
case VIRTIO_RING_F_INDIRECT_DESC:
break;
case VIRTIO_RING_F_EVENT_IDX:
break;
default:
/* We don't understand this bit. */
clear_bit(i, vdev->features);
}
}
}
EXPORT_SYMBOL_GPL(vring_transport_features);
/**
* virtqueue_get_vring_size - return the size of the virtqueue's vring
* @vq: the struct virtqueue containing the vring of interest.
*
* Returns the size of the vring. This is mainly used for boasting to
* userspace. Unlike other operations, this need not be serialized.
*/
unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
return vq->vring.num;
}
EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
MODULE_LICENSE("GPL");

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,7 @@
obj-m += virtio.o virtio_ring.o
obj-m += virtio_mmio.o
obj-m += virtio_pci.o
virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
virtio_pci-m += virtio_pci_legacy.o
obj-m += virtio_net.o
obj-m += virtio_scsi.o

View File

@@ -0,0 +1,12 @@
/* Configuration space parsing helpers for virtio.
*
* The configuration is [type][len][... len bytes ...] fields.
*
* Copyright 2007 Rusty Russell, IBM Corporation.
* GPL v2 or later.
*/
#include <linux/err.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/bug.h>

View File

@@ -0,0 +1,422 @@
#include <linux/virtio.h>
#include <linux/spinlock.h>
#include <linux/virtio_config.h>
#include <linux/module.h>
#include <linux/idr.h>
#include <uapi/linux/virtio_ids.h>
/* Unique numbering for virtio devices. */
static DEFINE_IDA(virtio_index_ida);
static ssize_t device_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "0x%04x\n", dev->id.device);
}
static DEVICE_ATTR_RO(device);
static ssize_t vendor_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "0x%04x\n", dev->id.vendor);
}
static DEVICE_ATTR_RO(vendor);
static ssize_t status_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "0x%08x\n", dev->config->get_status(dev));
}
static DEVICE_ATTR_RO(status);
static ssize_t modalias_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
return sprintf(buf, "virtio:d%08Xv%08X\n",
dev->id.device, dev->id.vendor);
}
static DEVICE_ATTR_RO(modalias);
static ssize_t features_show(struct device *_d,
struct device_attribute *attr, char *buf)
{
struct virtio_device *dev = dev_to_virtio(_d);
unsigned int i;
ssize_t len = 0;
/* We actually represent this as a bitstring, as it could be
* arbitrary length in future. */
for (i = 0; i < sizeof(dev->features)*8; i++)
len += sprintf(buf+len, "%c",
__virtio_test_bit(dev, i) ? '1' : '0');
len += sprintf(buf+len, "\n");
return len;
}
static DEVICE_ATTR_RO(features);
static struct attribute *virtio_dev_attrs[] = {
&dev_attr_device.attr,
&dev_attr_vendor.attr,
&dev_attr_status.attr,
&dev_attr_modalias.attr,
&dev_attr_features.attr,
NULL,
};
ATTRIBUTE_GROUPS(virtio_dev);
static inline int virtio_id_match(const struct virtio_device *dev,
const struct virtio_device_id *id)
{
if (id->device != dev->id.device && id->device != VIRTIO_DEV_ANY_ID)
return 0;
return id->vendor == VIRTIO_DEV_ANY_ID || id->vendor == dev->id.vendor;
}
/* This looks through all the IDs a driver claims to support. If any of them
* match, we return 1 and the kernel will call virtio_dev_probe(). */
static int virtio_dev_match(struct device *_dv, struct device_driver *_dr)
{
unsigned int i;
struct virtio_device *dev = dev_to_virtio(_dv);
const struct virtio_device_id *ids;
ids = drv_to_virtio(_dr)->id_table;
for (i = 0; ids[i].device; i++)
if (virtio_id_match(dev, &ids[i]))
return 1;
return 0;
}
static int virtio_uevent(struct device *_dv, struct kobj_uevent_env *env)
{
struct virtio_device *dev = dev_to_virtio(_dv);
return add_uevent_var(env, "MODALIAS=virtio:d%08Xv%08X",
dev->id.device, dev->id.vendor);
}
static void add_status(struct virtio_device *dev, unsigned status)
{
dev->config->set_status(dev, dev->config->get_status(dev) | status);
}
void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
unsigned int fbit)
{
unsigned int i;
struct virtio_driver *drv = drv_to_virtio(vdev->dev.driver);
for (i = 0; i < drv->feature_table_size; i++)
if (drv->feature_table[i] == fbit)
return;
if (drv->feature_table_legacy) {
for (i = 0; i < drv->feature_table_size_legacy; i++)
if (drv->feature_table_legacy[i] == fbit)
return;
}
BUG();
}
EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
static void __virtio_config_changed(struct virtio_device *dev)
{
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
if (!dev->config_enabled)
dev->config_change_pending = true;
else if (drv && drv->config_changed)
drv->config_changed(dev);
}
void virtio_config_changed(struct virtio_device *dev)
{
unsigned long flags;
spin_lock_irqsave(&dev->config_lock, flags);
__virtio_config_changed(dev);
spin_unlock_irqrestore(&dev->config_lock, flags);
}
EXPORT_SYMBOL_GPL(virtio_config_changed);
static void virtio_config_disable(struct virtio_device *dev)
{
spin_lock_irq(&dev->config_lock);
dev->config_enabled = false;
spin_unlock_irq(&dev->config_lock);
}
static void virtio_config_enable(struct virtio_device *dev)
{
spin_lock_irq(&dev->config_lock);
dev->config_enabled = true;
if (dev->config_change_pending)
__virtio_config_changed(dev);
dev->config_change_pending = false;
spin_unlock_irq(&dev->config_lock);
}
static int virtio_finalize_features(struct virtio_device *dev)
{
int ret = dev->config->finalize_features(dev);
unsigned status;
if (ret)
return ret;
if (!virtio_has_feature(dev, VIRTIO_F_VERSION_1))
return 0;
add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
status = dev->config->get_status(dev);
if (!(status & VIRTIO_CONFIG_S_FEATURES_OK)) {
dev_err(&dev->dev, "virtio: device refuses features: %x\n",
status);
return -ENODEV;
}
return 0;
}
static int virtio_dev_probe(struct device *_d)
{
int err, i;
struct virtio_device *dev = dev_to_virtio(_d);
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
u64 device_features;
u64 driver_features;
u64 driver_features_legacy;
/* We have a driver! */
add_status(dev, VIRTIO_CONFIG_S_DRIVER);
/* Figure out what features the device supports. */
device_features = dev->config->get_features(dev);
/* Figure out what features the driver supports. */
driver_features = 0;
for (i = 0; i < drv->feature_table_size; i++) {
unsigned int f = drv->feature_table[i];
BUG_ON(f >= 64);
driver_features |= (1ULL << f);
}
/* Some drivers have a separate feature table for virtio v1.0 */
if (drv->feature_table_legacy) {
driver_features_legacy = 0;
for (i = 0; i < drv->feature_table_size_legacy; i++) {
unsigned int f = drv->feature_table_legacy[i];
BUG_ON(f >= 64);
driver_features_legacy |= (1ULL << f);
}
} else {
driver_features_legacy = driver_features;
}
if (device_features & (1ULL << VIRTIO_F_VERSION_1))
dev->features = driver_features & device_features;
else
dev->features = driver_features_legacy & device_features;
/* Transport features always preserved to pass to finalize_features. */
for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
if (device_features & (1ULL << i))
__virtio_set_bit(dev, i);
err = virtio_finalize_features(dev);
if (err)
goto err;
err = drv->probe(dev);
if (err)
goto err;
/* If probe didn't do it, mark device DRIVER_OK ourselves. */
if (!(dev->config->get_status(dev) & VIRTIO_CONFIG_S_DRIVER_OK))
virtio_device_ready(dev);
if (drv->scan)
drv->scan(dev);
virtio_config_enable(dev);
return 0;
err:
add_status(dev, VIRTIO_CONFIG_S_FAILED);
return err;
}
static int virtio_dev_remove(struct device *_d)
{
struct virtio_device *dev = dev_to_virtio(_d);
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
virtio_config_disable(dev);
drv->remove(dev);
/* Driver should have reset device. */
WARN_ON_ONCE(dev->config->get_status(dev));
/* Acknowledge the device's existence again. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
return 0;
}
static struct bus_type virtio_bus = {
.name = "virtio",
.match = virtio_dev_match,
.dev_groups = virtio_dev_groups,
.uevent = virtio_uevent,
.probe = virtio_dev_probe,
.remove = virtio_dev_remove,
};
int register_virtio_driver(struct virtio_driver *driver)
{
/* Catch this early. */
BUG_ON(driver->feature_table_size && !driver->feature_table);
driver->driver.bus = &virtio_bus;
return driver_register(&driver->driver);
}
EXPORT_SYMBOL_GPL(register_virtio_driver);
void unregister_virtio_driver(struct virtio_driver *driver)
{
driver_unregister(&driver->driver);
}
EXPORT_SYMBOL_GPL(unregister_virtio_driver);
int register_virtio_device(struct virtio_device *dev)
{
int err;
dev->dev.bus = &virtio_bus;
/* Assign a unique device index and hence name. */
err = ida_simple_get(&virtio_index_ida, 0, 0, GFP_KERNEL);
if (err < 0)
goto out;
dev->index = err;
dev_set_name(&dev->dev, "virtio%u", dev->index);
spin_lock_init(&dev->config_lock);
dev->config_enabled = false;
dev->config_change_pending = false;
/* We always start by resetting the device, in case a previous
* driver messed it up. This also tests that code path a little. */
dev->config->reset(dev);
/* Acknowledge that we've seen the device. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
INIT_LIST_HEAD(&dev->vqs);
/* device_register() causes the bus infrastructure to look for a
* matching driver. */
err = device_register(&dev->dev);
if (err)
ida_simple_remove(&virtio_index_ida, dev->index);
out:
if (err)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
return err;
}
EXPORT_SYMBOL_GPL(register_virtio_device);
void unregister_virtio_device(struct virtio_device *dev)
{
int index = dev->index; /* save for after device release */
device_unregister(&dev->dev);
ida_simple_remove(&virtio_index_ida, index);
}
EXPORT_SYMBOL_GPL(unregister_virtio_device);
#ifdef CONFIG_PM_SLEEP
int virtio_device_freeze(struct virtio_device *dev)
{
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
virtio_config_disable(dev);
dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
if (drv && drv->freeze)
return drv->freeze(dev);
return 0;
}
EXPORT_SYMBOL_GPL(virtio_device_freeze);
int virtio_device_restore(struct virtio_device *dev)
{
struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
int ret;
/* We always start by resetting the device, in case a previous
* driver messed it up. */
dev->config->reset(dev);
/* Acknowledge that we've seen the device. */
add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
/* Maybe driver failed before freeze.
* Restore the failed status, for debugging. */
if (dev->failed)
add_status(dev, VIRTIO_CONFIG_S_FAILED);
if (!drv)
return 0;
/* We have a driver! */
add_status(dev, VIRTIO_CONFIG_S_DRIVER);
ret = virtio_finalize_features(dev);
if (ret)
goto err;
if (drv->restore) {
ret = drv->restore(dev);
if (ret)
goto err;
}
/* Finally, tell the device we're all set */
add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
virtio_config_enable(dev);
return 0;
err:
add_status(dev, VIRTIO_CONFIG_S_FAILED);
return ret;
}
EXPORT_SYMBOL_GPL(virtio_device_restore);
#endif
static int virtio_init(void)
{
if (bus_register(&virtio_bus) != 0)
panic("virtio bus registration failed");
return 0;
}
static void __exit virtio_exit(void)
{
bus_unregister(&virtio_bus);
ida_destroy(&virtio_index_ida);
}
core_initcall(virtio_init);
module_exit(virtio_exit);
MODULE_LICENSE("GPL");

View File

@@ -0,0 +1,770 @@
/*
* Virtio memory mapped device driver
*
* Copyright 2011-2014, ARM Ltd.
*
* This module allows virtio devices to be used over a virtual, memory mapped
* platform device.
*
* The guest device(s) may be instantiated in one of three equivalent ways:
*
* 1. Static platform device in board's code, eg.:
*
* static struct platform_device v2m_virtio_device = {
* .name = "virtio-mmio",
* .id = -1,
* .num_resources = 2,
* .resource = (struct resource []) {
* {
* .start = 0x1001e000,
* .end = 0x1001e0ff,
* .flags = IORESOURCE_MEM,
* }, {
* .start = 42 + 32,
* .end = 42 + 32,
* .flags = IORESOURCE_IRQ,
* },
* }
* };
*
* 2. Device Tree node, eg.:
*
* virtio_block@1e000 {
* compatible = "virtio,mmio";
* reg = <0x1e000 0x100>;
* interrupts = <42>;
* }
*
* 3. Kernel module (or command line) parameter. Can be used more than once -
* one device will be created for each one. Syntax:
*
* [virtio_mmio.]device=<size>@<baseaddr>:<irq>[:<id>]
* where:
* <size> := size (can use standard suffixes like K, M or G)
* <baseaddr> := physical base address
* <irq> := interrupt number (as passed to request_irq())
* <id> := (optional) platform device id
* eg.:
* virtio_mmio.device=0x100@0x100b0000:48 \
* virtio_mmio.device=1K@0x1001e000:74
*
*
*
* Based on Virtio PCI driver by Anthony Liguori, copyright IBM Corp. 2007
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*/
#define pr_fmt(fmt) "virtio-mmio: " fmt
#include <linux/acpi.h>
#include <linux/highmem.h>
#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/list.h>
#include <linux/module.h>
#include <linux/platform_device.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_mmio.h>
#include <linux/virtio_ring.h>
/* The alignment to use between consumer and producer parts of vring.
* Currently hardcoded to the page size. */
#define VIRTIO_MMIO_VRING_ALIGN PAGE_SIZE
#define to_virtio_mmio_device(_plat_dev) \
container_of(_plat_dev, struct virtio_mmio_device, vdev)
struct virtio_mmio_device {
struct virtio_device vdev;
struct platform_device *pdev;
void __iomem *base;
unsigned long version;
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
};
struct virtio_mmio_vq_info {
/* the actual virtqueue */
struct virtqueue *vq;
/* the number of entries in the queue */
unsigned int num;
/* the virtual address of the ring queue */
void *queue;
/* the list node for the virtqueues list */
struct list_head node;
};
/* Configuration interface */
static u64 vm_get_features(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
u64 features;
writel(1, vm_dev->base + VIRTIO_MMIO_DEVICE_FEATURES_SEL);
features = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_FEATURES);
features <<= 32;
writel(0, vm_dev->base + VIRTIO_MMIO_DEVICE_FEATURES_SEL);
features |= readl(vm_dev->base + VIRTIO_MMIO_DEVICE_FEATURES);
return features;
}
static int vm_finalize_features(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
/* Make sure there is are no mixed devices */
if (vm_dev->version == 2 &&
!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
dev_err(&vdev->dev, "New virtio-mmio devices (version 2) must provide VIRTIO_F_VERSION_1 feature!\n");
return -EINVAL;
}
writel(1, vm_dev->base + VIRTIO_MMIO_DRIVER_FEATURES_SEL);
writel((u32)(vdev->features >> 32),
vm_dev->base + VIRTIO_MMIO_DRIVER_FEATURES);
writel(0, vm_dev->base + VIRTIO_MMIO_DRIVER_FEATURES_SEL);
writel((u32)vdev->features,
vm_dev->base + VIRTIO_MMIO_DRIVER_FEATURES);
return 0;
}
static void vm_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG;
u8 b;
__le16 w;
__le32 l;
if (vm_dev->version == 1) {
u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
ptr[i] = readb(base + offset + i);
return;
}
switch (len) {
case 1:
b = readb(base + offset);
memcpy(buf, &b, sizeof b);
break;
case 2:
w = cpu_to_le16(readw(base + offset));
memcpy(buf, &w, sizeof w);
break;
case 4:
l = cpu_to_le32(readl(base + offset));
memcpy(buf, &l, sizeof l);
break;
case 8:
l = cpu_to_le32(readl(base + offset));
memcpy(buf, &l, sizeof l);
l = cpu_to_le32(ioread32(base + offset + sizeof l));
memcpy(buf + sizeof l, &l, sizeof l);
break;
default:
BUG();
}
}
static void vm_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
void __iomem *base = vm_dev->base + VIRTIO_MMIO_CONFIG;
u8 b;
__le16 w;
__le32 l;
if (vm_dev->version == 1) {
const u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
writeb(ptr[i], base + offset + i);
return;
}
switch (len) {
case 1:
memcpy(&b, buf, sizeof b);
writeb(b, base + offset);
break;
case 2:
memcpy(&w, buf, sizeof w);
writew(le16_to_cpu(w), base + offset);
break;
case 4:
memcpy(&l, buf, sizeof l);
writel(le32_to_cpu(l), base + offset);
break;
case 8:
memcpy(&l, buf, sizeof l);
writel(le32_to_cpu(l), base + offset);
memcpy(&l, buf + sizeof l, sizeof l);
writel(le32_to_cpu(l), base + offset + sizeof l);
break;
default:
BUG();
}
}
static u32 vm_generation(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
if (vm_dev->version == 1)
return 0;
else
return readl(vm_dev->base + VIRTIO_MMIO_CONFIG_GENERATION);
}
static u8 vm_get_status(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
return readl(vm_dev->base + VIRTIO_MMIO_STATUS) & 0xff;
}
static void vm_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
writel(status, vm_dev->base + VIRTIO_MMIO_STATUS);
}
static void vm_reset(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
/* 0 status means a reset. */
writel(0, vm_dev->base + VIRTIO_MMIO_STATUS);
}
/* Transport interface */
/* the notify function used when creating a virt queue */
static bool vm_notify(struct virtqueue *vq)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
/* We write the queue's selector into the notification register to
* signal the other end */
writel(vq->index, vm_dev->base + VIRTIO_MMIO_QUEUE_NOTIFY);
return true;
}
/* Notify all virtqueues on an interrupt. */
static irqreturn_t vm_interrupt(int irq, void *opaque)
{
struct virtio_mmio_device *vm_dev = opaque;
struct virtio_mmio_vq_info *info;
unsigned long status;
unsigned long flags;
irqreturn_t ret = IRQ_NONE;
/* Read and acknowledge interrupts */
status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)) {
virtio_config_changed(&vm_dev->vdev);
ret = IRQ_HANDLED;
}
if (likely(status & VIRTIO_MMIO_INT_VRING)) {
spin_lock_irqsave(&vm_dev->lock, flags);
list_for_each_entry(info, &vm_dev->virtqueues, node)
ret |= vring_interrupt(irq, info->vq);
spin_unlock_irqrestore(&vm_dev->lock, flags);
}
return ret;
}
static void vm_del_vq(struct virtqueue *vq)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vq->vdev);
struct virtio_mmio_vq_info *info = vq->priv;
unsigned long flags, size;
unsigned int index = vq->index;
spin_lock_irqsave(&vm_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vm_dev->lock, flags);
vring_del_virtqueue(vq);
/* Select and deactivate the queue */
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
if (vm_dev->version == 1) {
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
} else {
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY));
}
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_MMIO_VRING_ALIGN));
free_pages_exact(info->queue, size);
kfree(info);
}
static void vm_del_vqs(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtqueue *vq, *n;
list_for_each_entry_safe(vq, n, &vdev->vqs, list)
vm_del_vq(vq);
free_irq(platform_get_irq(vm_dev->pdev, 0), vm_dev);
}
static struct virtqueue *vm_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
struct virtio_mmio_vq_info *info;
struct virtqueue *vq;
unsigned long flags, size;
int err;
if (!name)
return NULL;
/* Select the queue we're interested in */
writel(index, vm_dev->base + VIRTIO_MMIO_QUEUE_SEL);
/* Queue shouldn't already be set up. */
if (readl(vm_dev->base + (vm_dev->version == 1 ?
VIRTIO_MMIO_QUEUE_PFN : VIRTIO_MMIO_QUEUE_READY))) {
err = -ENOENT;
goto error_available;
}
/* Allocate and fill out our active queue description */
info = kmalloc(sizeof(*info), GFP_KERNEL);
if (!info) {
err = -ENOMEM;
goto error_kmalloc;
}
/* Allocate pages for the queue - start with a queue as big as
* possible (limited by maximum size allowed by device), drop down
* to a minimal size, just big enough to fit descriptor table
* and two rings (which makes it "alignment_size * 2")
*/
info->num = readl(vm_dev->base + VIRTIO_MMIO_QUEUE_NUM_MAX);
/* If the device reports a 0 entry queue, we won't be able to
* use it to perform I/O, and vring_new_virtqueue() can't create
* empty queues anyway, so don't bother to set up the device.
*/
if (info->num == 0) {
err = -ENOENT;
goto error_alloc_pages;
}
while (1) {
size = PAGE_ALIGN(vring_size(info->num,
VIRTIO_MMIO_VRING_ALIGN));
/* Did the last iter shrink the queue below minimum size? */
if (size < VIRTIO_MMIO_VRING_ALIGN * 2) {
err = -ENOMEM;
goto error_alloc_pages;
}
info->queue = alloc_pages_exact(size, GFP_KERNEL | __GFP_ZERO);
if (info->queue)
break;
info->num /= 2;
}
/* Create the vring */
vq = vring_new_virtqueue(index, info->num, VIRTIO_MMIO_VRING_ALIGN, vdev,
true, info->queue, vm_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto error_new_virtqueue;
}
/* Activate the queue */
writel(info->num, vm_dev->base + VIRTIO_MMIO_QUEUE_NUM);
if (vm_dev->version == 1) {
writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_QUEUE_ALIGN);
writel(virt_to_phys(info->queue) >> PAGE_SHIFT,
vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
} else {
u64 addr;
addr = virt_to_phys(info->queue);
writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_LOW);
writel((u32)(addr >> 32),
vm_dev->base + VIRTIO_MMIO_QUEUE_DESC_HIGH);
addr = virt_to_phys(virtqueue_get_avail(vq));
writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_LOW);
writel((u32)(addr >> 32),
vm_dev->base + VIRTIO_MMIO_QUEUE_AVAIL_HIGH);
addr = virt_to_phys(virtqueue_get_used(vq));
writel((u32)addr, vm_dev->base + VIRTIO_MMIO_QUEUE_USED_LOW);
writel((u32)(addr >> 32),
vm_dev->base + VIRTIO_MMIO_QUEUE_USED_HIGH);
writel(1, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
}
vq->priv = info;
info->vq = vq;
spin_lock_irqsave(&vm_dev->lock, flags);
list_add(&info->node, &vm_dev->virtqueues);
spin_unlock_irqrestore(&vm_dev->lock, flags);
return vq;
error_new_virtqueue:
if (vm_dev->version == 1) {
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_PFN);
} else {
writel(0, vm_dev->base + VIRTIO_MMIO_QUEUE_READY);
WARN_ON(readl(vm_dev->base + VIRTIO_MMIO_QUEUE_READY));
}
free_pages_exact(info->queue, size);
error_alloc_pages:
kfree(info);
error_kmalloc:
error_available:
return ERR_PTR(err);
}
static int vm_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[])
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
unsigned int irq = platform_get_irq(vm_dev->pdev, 0);
int i, err;
err = request_irq(irq, vm_interrupt, IRQF_SHARED,
dev_name(&vdev->dev), vm_dev);
if (err)
return err;
for (i = 0; i < nvqs; ++i) {
vqs[i] = vm_setup_vq(vdev, i, callbacks[i], names[i]);
if (IS_ERR(vqs[i])) {
vm_del_vqs(vdev);
return PTR_ERR(vqs[i]);
}
}
return 0;
}
static const char *vm_bus_name(struct virtio_device *vdev)
{
struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
return vm_dev->pdev->name;
}
static const struct virtio_config_ops virtio_mmio_config_ops = {
.get = vm_get,
.set = vm_set,
.generation = vm_generation,
.get_status = vm_get_status,
.set_status = vm_set_status,
.reset = vm_reset,
.find_vqs = vm_find_vqs,
.del_vqs = vm_del_vqs,
.get_features = vm_get_features,
.finalize_features = vm_finalize_features,
.bus_name = vm_bus_name,
};
/* Platform device */
static int virtio_mmio_probe(struct platform_device *pdev)
{
struct virtio_mmio_device *vm_dev;
struct resource *mem;
unsigned long magic;
mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
if (!mem)
return -EINVAL;
if (!devm_request_mem_region(&pdev->dev, mem->start,
resource_size(mem), pdev->name))
return -EBUSY;
vm_dev = devm_kzalloc(&pdev->dev, sizeof(*vm_dev), GFP_KERNEL);
if (!vm_dev)
return -ENOMEM;
vm_dev->vdev.dev.parent = &pdev->dev;
vm_dev->vdev.config = &virtio_mmio_config_ops;
vm_dev->pdev = pdev;
INIT_LIST_HEAD(&vm_dev->virtqueues);
spin_lock_init(&vm_dev->lock);
vm_dev->base = devm_ioremap(&pdev->dev, mem->start, resource_size(mem));
if (vm_dev->base == NULL)
return -EFAULT;
/* Check magic value */
magic = readl(vm_dev->base + VIRTIO_MMIO_MAGIC_VALUE);
if (magic != ('v' | 'i' << 8 | 'r' << 16 | 't' << 24)) {
dev_warn(&pdev->dev, "Wrong magic value 0x%08lx!\n", magic);
return -ENODEV;
}
/* Check device version */
vm_dev->version = readl(vm_dev->base + VIRTIO_MMIO_VERSION);
if (vm_dev->version < 1 || vm_dev->version > 2) {
dev_err(&pdev->dev, "Version %ld not supported!\n",
vm_dev->version);
return -ENXIO;
}
vm_dev->vdev.id.device = readl(vm_dev->base + VIRTIO_MMIO_DEVICE_ID);
if (vm_dev->vdev.id.device == 0) {
/*
* virtio-mmio device with an ID 0 is a (dummy) placeholder
* with no function. End probing now with no error reported.
*/
return -ENODEV;
}
vm_dev->vdev.id.vendor = readl(vm_dev->base + VIRTIO_MMIO_VENDOR_ID);
if (vm_dev->version == 1)
writel(PAGE_SIZE, vm_dev->base + VIRTIO_MMIO_GUEST_PAGE_SIZE);
platform_set_drvdata(pdev, vm_dev);
return register_virtio_device(&vm_dev->vdev);
}
static int virtio_mmio_remove(struct platform_device *pdev)
{
struct virtio_mmio_device *vm_dev = platform_get_drvdata(pdev);
unregister_virtio_device(&vm_dev->vdev);
return 0;
}
/* Devices list parameter */
#if defined(CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES)
static struct device vm_cmdline_parent = {
.init_name = "virtio-mmio-cmdline",
};
static int vm_cmdline_parent_registered;
static int vm_cmdline_id;
static int vm_cmdline_set(const char *device,
const struct kernel_param *kp)
{
int err;
struct resource resources[2] = {};
char *str;
long long int base, size;
unsigned int irq;
int processed, consumed = 0;
struct platform_device *pdev;
/* Consume "size" part of the command line parameter */
size = memparse(device, &str);
/* Get "@<base>:<irq>[:<id>]" chunks */
processed = sscanf(str, "@%lli:%u%n:%d%n",
&base, &irq, &consumed,
&vm_cmdline_id, &consumed);
/*
* sscanf() must processes at least 2 chunks; also there
* must be no extra characters after the last chunk, so
* str[consumed] must be '\0'
*/
if (processed < 2 || str[consumed])
return -EINVAL;
resources[0].flags = IORESOURCE_MEM;
resources[0].start = base;
resources[0].end = base + size - 1;
resources[1].flags = IORESOURCE_IRQ;
resources[1].start = resources[1].end = irq;
if (!vm_cmdline_parent_registered) {
err = device_register(&vm_cmdline_parent);
if (err) {
pr_err("Failed to register parent device!\n");
return err;
}
vm_cmdline_parent_registered = 1;
}
pr_info("Registering device virtio-mmio.%d at 0x%llx-0x%llx, IRQ %d.\n",
vm_cmdline_id,
(unsigned long long)resources[0].start,
(unsigned long long)resources[0].end,
(int)resources[1].start);
pdev = platform_device_register_resndata(&vm_cmdline_parent,
"virtio-mmio", vm_cmdline_id++,
resources, ARRAY_SIZE(resources), NULL, 0);
if (IS_ERR(pdev))
return PTR_ERR(pdev);
return 0;
}
static int vm_cmdline_get_device(struct device *dev, void *data)
{
char *buffer = data;
unsigned int len = strlen(buffer);
struct platform_device *pdev = to_platform_device(dev);
snprintf(buffer + len, PAGE_SIZE - len, "0x%llx@0x%llx:%llu:%d\n",
pdev->resource[0].end - pdev->resource[0].start + 1ULL,
(unsigned long long)pdev->resource[0].start,
(unsigned long long)pdev->resource[1].start,
pdev->id);
return 0;
}
static int vm_cmdline_get(char *buffer, const struct kernel_param *kp)
{
buffer[0] = '\0';
device_for_each_child(&vm_cmdline_parent, buffer,
vm_cmdline_get_device);
return strlen(buffer) + 1;
}
static const struct kernel_param_ops vm_cmdline_param_ops = {
.set = vm_cmdline_set,
.get = vm_cmdline_get,
};
device_param_cb(device, &vm_cmdline_param_ops, NULL, S_IRUSR);
static int vm_unregister_cmdline_device(struct device *dev,
void *data)
{
platform_device_unregister(to_platform_device(dev));
return 0;
}
static void vm_unregister_cmdline_devices(void)
{
if (vm_cmdline_parent_registered) {
device_for_each_child(&vm_cmdline_parent, NULL,
vm_unregister_cmdline_device);
device_unregister(&vm_cmdline_parent);
vm_cmdline_parent_registered = 0;
}
}
#else
static void vm_unregister_cmdline_devices(void)
{
}
#endif
/* Platform driver */
static struct of_device_id virtio_mmio_match[] = {
{ .compatible = "virtio,mmio", },
{},
};
MODULE_DEVICE_TABLE(of, virtio_mmio_match);
#ifdef CONFIG_ACPI
static const struct acpi_device_id virtio_mmio_acpi_match[] = {
{ "LNRO0005", },
{ }
};
MODULE_DEVICE_TABLE(acpi, virtio_mmio_acpi_match);
#endif
static struct platform_driver virtio_mmio_driver = {
.probe = virtio_mmio_probe,
.remove = virtio_mmio_remove,
.driver = {
.name = "virtio-mmio",
.of_match_table = virtio_mmio_match,
.acpi_match_table = ACPI_PTR(virtio_mmio_acpi_match),
},
};
static int __init virtio_mmio_init(void)
{
return platform_driver_register(&virtio_mmio_driver);
}
static void __exit virtio_mmio_exit(void)
{
platform_driver_unregister(&virtio_mmio_driver);
vm_unregister_cmdline_devices();
}
module_init(virtio_mmio_init);
module_exit(virtio_mmio_exit);
MODULE_AUTHOR("Pawel Moll <pawel.moll@arm.com>");
MODULE_DESCRIPTION("Platform bus driver for memory mapped virtio devices");
MODULE_LICENSE("GPL");

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,576 @@
/*
* Virtio PCI driver - common functionality for all device versions
*
* This module allows virtio devices to be used over a virtual PCI device.
* This can be used with QEMU based VMMs like KVM or Xen.
*
* Copyright IBM Corp. 2007
* Copyright Red Hat, Inc. 2014
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
* Rusty Russell <rusty@rustcorp.com.au>
* Michael S. Tsirkin <mst@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "virtio_pci_common.h"
static bool force_legacy = false;
#if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
module_param(force_legacy, bool, 0444);
MODULE_PARM_DESC(force_legacy,
"Force legacy mode for transitional virtio 1 devices");
#endif
/* wait for pending irq handlers */
void vp_synchronize_vectors(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;
if (vp_dev->intx_enabled)
synchronize_irq(vp_dev->pci_dev->irq);
for (i = 0; i < vp_dev->msix_vectors; ++i)
synchronize_irq(vp_dev->msix_entries[i].vector);
}
/* the notify function used when creating a virt queue */
bool vp_notify(struct virtqueue *vq)
{
/* we write the queue's selector into the notification register to
* signal the other end */
iowrite16(vq->index, (void __iomem *)vq->priv);
return true;
}
/* Handle a configuration change: Tell driver if it wants to know. */
static irqreturn_t vp_config_changed(int irq, void *opaque)
{
struct virtio_pci_device *vp_dev = opaque;
virtio_config_changed(&vp_dev->vdev);
return IRQ_HANDLED;
}
/* Notify all virtqueues on an interrupt. */
static irqreturn_t vp_vring_interrupt(int irq, void *opaque)
{
struct virtio_pci_device *vp_dev = opaque;
struct virtio_pci_vq_info *info;
irqreturn_t ret = IRQ_NONE;
unsigned long flags;
spin_lock_irqsave(&vp_dev->lock, flags);
list_for_each_entry(info, &vp_dev->virtqueues, node) {
if (vring_interrupt(irq, info->vq) == IRQ_HANDLED)
ret = IRQ_HANDLED;
}
spin_unlock_irqrestore(&vp_dev->lock, flags);
return ret;
}
/* A small wrapper to also acknowledge the interrupt when it's handled.
* I really need an EIO hook for the vring so I can ack the interrupt once we
* know that we'll be handling the IRQ but before we invoke the callback since
* the callback may notify the host which results in the host attempting to
* raise an interrupt that we would then mask once we acknowledged the
* interrupt. */
static irqreturn_t vp_interrupt(int irq, void *opaque)
{
struct virtio_pci_device *vp_dev = opaque;
u8 isr;
/* reading the ISR has the effect of also clearing it so it's very
* important to save off the value. */
isr = ioread8(vp_dev->isr);
/* It's definitely not us if the ISR was not high */
if (!isr)
return IRQ_NONE;
/* Configuration change? Tell driver if it wants to know. */
if (isr & VIRTIO_PCI_ISR_CONFIG)
vp_config_changed(irq, opaque);
return vp_vring_interrupt(irq, opaque);
}
static void vp_free_vectors(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
int i;
if (vp_dev->intx_enabled) {
free_irq(vp_dev->pci_dev->irq, vp_dev);
vp_dev->intx_enabled = 0;
}
for (i = 0; i < vp_dev->msix_used_vectors; ++i)
free_irq(vp_dev->msix_entries[i].vector, vp_dev);
for (i = 0; i < vp_dev->msix_vectors; i++)
if (vp_dev->msix_affinity_masks[i])
free_cpumask_var(vp_dev->msix_affinity_masks[i]);
if (vp_dev->msix_enabled) {
/* Disable the vector used for configuration */
vp_dev->config_vector(vp_dev, VIRTIO_MSI_NO_VECTOR);
pci_disable_msix(vp_dev->pci_dev);
vp_dev->msix_enabled = 0;
}
vp_dev->msix_vectors = 0;
vp_dev->msix_used_vectors = 0;
kfree(vp_dev->msix_names);
vp_dev->msix_names = NULL;
kfree(vp_dev->msix_entries);
vp_dev->msix_entries = NULL;
kfree(vp_dev->msix_affinity_masks);
vp_dev->msix_affinity_masks = NULL;
}
static int vp_request_msix_vectors(struct virtio_device *vdev, int nvectors,
bool per_vq_vectors)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
const char *name = dev_name(&vp_dev->vdev.dev);
unsigned i, v;
int err = -ENOMEM;
vp_dev->msix_vectors = nvectors;
vp_dev->msix_entries = kmalloc(nvectors * sizeof *vp_dev->msix_entries,
GFP_KERNEL);
if (!vp_dev->msix_entries)
goto error;
vp_dev->msix_names = kmalloc(nvectors * sizeof *vp_dev->msix_names,
GFP_KERNEL);
if (!vp_dev->msix_names)
goto error;
vp_dev->msix_affinity_masks
= kzalloc(nvectors * sizeof *vp_dev->msix_affinity_masks,
GFP_KERNEL);
if (!vp_dev->msix_affinity_masks)
goto error;
for (i = 0; i < nvectors; ++i)
if (!alloc_cpumask_var(&vp_dev->msix_affinity_masks[i],
GFP_KERNEL))
goto error;
for (i = 0; i < nvectors; ++i)
vp_dev->msix_entries[i].entry = i;
err = pci_enable_msix_exact(vp_dev->pci_dev,
vp_dev->msix_entries, nvectors);
if (err)
goto error;
vp_dev->msix_enabled = 1;
/* Set the vector used for configuration */
v = vp_dev->msix_used_vectors;
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-config", name);
err = request_irq(vp_dev->msix_entries[v].vector,
vp_config_changed, 0, vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
++vp_dev->msix_used_vectors;
v = vp_dev->config_vector(vp_dev, v);
/* Verify we had enough resources to assign the vector */
if (v == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto error;
}
if (!per_vq_vectors) {
/* Shared vector for all VQs */
v = vp_dev->msix_used_vectors;
snprintf(vp_dev->msix_names[v], sizeof *vp_dev->msix_names,
"%s-virtqueues", name);
err = request_irq(vp_dev->msix_entries[v].vector,
vp_vring_interrupt, 0, vp_dev->msix_names[v],
vp_dev);
if (err)
goto error;
++vp_dev->msix_used_vectors;
}
return 0;
error:
vp_free_vectors(vdev);
return err;
}
static int vp_request_intx(struct virtio_device *vdev)
{
int err;
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
err = request_irq(vp_dev->pci_dev->irq, vp_interrupt,
IRQF_SHARED, dev_name(&vdev->dev), vp_dev);
if (!err)
vp_dev->intx_enabled = 1;
return err;
}
static struct virtqueue *vp_setup_vq(struct virtio_device *vdev, unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
u16 msix_vec)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_vq_info *info = kmalloc(sizeof *info, GFP_KERNEL);
struct virtqueue *vq;
unsigned long flags;
/* fill out our structure that represents an active queue */
if (!info)
return ERR_PTR(-ENOMEM);
vq = vp_dev->setup_vq(vp_dev, info, index, callback, name, msix_vec);
if (IS_ERR(vq))
goto out_info;
info->vq = vq;
if (callback) {
spin_lock_irqsave(&vp_dev->lock, flags);
list_add(&info->node, &vp_dev->virtqueues);
spin_unlock_irqrestore(&vp_dev->lock, flags);
} else {
INIT_LIST_HEAD(&info->node);
}
vp_dev->vqs[index] = info;
return vq;
out_info:
kfree(info);
return vq;
}
static void vp_del_vq(struct virtqueue *vq)
{
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
unsigned long flags;
spin_lock_irqsave(&vp_dev->lock, flags);
list_del(&info->node);
spin_unlock_irqrestore(&vp_dev->lock, flags);
vp_dev->del_vq(info);
kfree(info);
}
/* the config->del_vqs() implementation */
void vp_del_vqs(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq, *n;
struct virtio_pci_vq_info *info;
list_for_each_entry_safe(vq, n, &vdev->vqs, list) {
info = vp_dev->vqs[vq->index];
if (vp_dev->per_vq_vectors &&
info->msix_vector != VIRTIO_MSI_NO_VECTOR)
free_irq(vp_dev->msix_entries[info->msix_vector].vector,
vq);
vp_del_vq(vq);
}
vp_dev->per_vq_vectors = false;
vp_free_vectors(vdev);
kfree(vp_dev->vqs);
vp_dev->vqs = NULL;
}
static int vp_try_to_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[],
bool use_msix,
bool per_vq_vectors)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u16 msix_vec;
int i, err, nvectors, allocated_vectors;
vp_dev->vqs = kmalloc(nvqs * sizeof *vp_dev->vqs, GFP_KERNEL);
if (!vp_dev->vqs)
return -ENOMEM;
if (!use_msix) {
/* Old style: one normal interrupt for change and all vqs. */
err = vp_request_intx(vdev);
if (err)
goto error_find;
} else {
if (per_vq_vectors) {
/* Best option: one for change interrupt, one per vq. */
nvectors = 1;
for (i = 0; i < nvqs; ++i)
if (callbacks[i])
++nvectors;
} else {
/* Second best: one for change, shared for all vqs. */
nvectors = 2;
}
err = vp_request_msix_vectors(vdev, nvectors, per_vq_vectors);
if (err)
goto error_find;
}
vp_dev->per_vq_vectors = per_vq_vectors;
allocated_vectors = vp_dev->msix_used_vectors;
for (i = 0; i < nvqs; ++i) {
if (!names[i]) {
vqs[i] = NULL;
continue;
} else if (!callbacks[i] || !vp_dev->msix_enabled)
msix_vec = VIRTIO_MSI_NO_VECTOR;
else if (vp_dev->per_vq_vectors)
msix_vec = allocated_vectors++;
else
msix_vec = VP_MSIX_VQ_VECTOR;
vqs[i] = vp_setup_vq(vdev, i, callbacks[i], names[i], msix_vec);
if (IS_ERR(vqs[i])) {
err = PTR_ERR(vqs[i]);
goto error_find;
}
if (!vp_dev->per_vq_vectors || msix_vec == VIRTIO_MSI_NO_VECTOR)
continue;
/* allocate per-vq irq if available and necessary */
snprintf(vp_dev->msix_names[msix_vec],
sizeof *vp_dev->msix_names,
"%s-%s",
dev_name(&vp_dev->vdev.dev), names[i]);
err = request_irq(vp_dev->msix_entries[msix_vec].vector,
vring_interrupt, 0,
vp_dev->msix_names[msix_vec],
vqs[i]);
if (err) {
vp_del_vq(vqs[i]);
goto error_find;
}
}
return 0;
error_find:
vp_del_vqs(vdev);
return err;
}
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[])
{
int err;
/* Try MSI-X with one vector per queue. */
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names, true, true);
if (!err)
return 0;
/* Fallback: MSI-X with one vector for config, one shared for queues. */
err = vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
true, false);
if (!err)
return 0;
/* Finally fall back to regular interrupts. */
return vp_try_to_find_vqs(vdev, nvqs, vqs, callbacks, names,
false, false);
}
const char *vp_bus_name(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return pci_name(vp_dev->pci_dev);
}
/* Setup the affinity for a virtqueue:
* - force the affinity for per vq vector
* - OR over all affinities for shared MSI
* - ignore the affinity request if we're using INTX
*/
int vp_set_vq_affinity(struct virtqueue *vq, int cpu)
{
struct virtio_device *vdev = vq->vdev;
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtio_pci_vq_info *info = vp_dev->vqs[vq->index];
struct cpumask *mask;
unsigned int irq;
if (!vq->callback)
return -EINVAL;
if (vp_dev->msix_enabled) {
mask = vp_dev->msix_affinity_masks[info->msix_vector];
irq = vp_dev->msix_entries[info->msix_vector].vector;
if (cpu == -1)
irq_set_affinity_hint(irq, NULL);
else {
cpumask_clear(mask);
cpumask_set_cpu(cpu, mask);
irq_set_affinity_hint(irq, mask);
}
}
return 0;
}
#ifdef CONFIG_PM_SLEEP
static int virtio_pci_freeze(struct device *dev)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
int ret;
ret = virtio_device_freeze(&vp_dev->vdev);
if (!ret)
pci_disable_device(pci_dev);
return ret;
}
static int virtio_pci_restore(struct device *dev)
{
struct pci_dev *pci_dev = to_pci_dev(dev);
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
int ret;
ret = pci_enable_device(pci_dev);
if (ret)
return ret;
pci_set_master(pci_dev);
return virtio_device_restore(&vp_dev->vdev);
}
static const struct dev_pm_ops virtio_pci_pm_ops = {
SET_SYSTEM_SLEEP_PM_OPS(virtio_pci_freeze, virtio_pci_restore)
};
#endif
/* Qumranet donated their vendor ID for devices 0x1000 thru 0x10FF. */
static const struct pci_device_id virtio_pci_id_table[] = {
{ PCI_DEVICE(0x1af4, PCI_ANY_ID) },
{ 0 }
};
MODULE_DEVICE_TABLE(pci, virtio_pci_id_table);
static void virtio_pci_release_dev(struct device *_d)
{
struct virtio_device *vdev = dev_to_virtio(_d);
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* As struct device is a kobject, it's not safe to
* free the memory (including the reference counter itself)
* until it's release callback. */
kfree(vp_dev);
}
static int virtio_pci_probe(struct pci_dev *pci_dev,
const struct pci_device_id *id)
{
struct virtio_pci_device *vp_dev;
int rc;
/* allocate our structure and fill it out */
vp_dev = kzalloc(sizeof(struct virtio_pci_device), GFP_KERNEL);
if (!vp_dev)
return -ENOMEM;
pci_set_drvdata(pci_dev, vp_dev);
vp_dev->vdev.dev.parent = &pci_dev->dev;
vp_dev->vdev.dev.release = virtio_pci_release_dev;
vp_dev->pci_dev = pci_dev;
INIT_LIST_HEAD(&vp_dev->virtqueues);
spin_lock_init(&vp_dev->lock);
/* enable the device */
rc = pci_enable_device(pci_dev);
if (rc)
goto err_enable_device;
if (force_legacy) {
rc = virtio_pci_legacy_probe(vp_dev);
/* Also try modern mode if we can't map BAR0 (no IO space). */
if (rc == -ENODEV || rc == -ENOMEM)
rc = virtio_pci_modern_probe(vp_dev);
if (rc)
goto err_probe;
} else {
rc = virtio_pci_modern_probe(vp_dev);
if (rc == -ENODEV)
rc = virtio_pci_legacy_probe(vp_dev);
if (rc)
goto err_probe;
}
pci_set_master(pci_dev);
rc = register_virtio_device(&vp_dev->vdev);
if (rc)
goto err_register;
return 0;
err_register:
if (vp_dev->ioaddr)
virtio_pci_legacy_remove(vp_dev);
else
virtio_pci_modern_remove(vp_dev);
err_probe:
pci_disable_device(pci_dev);
err_enable_device:
kfree(vp_dev);
return rc;
}
static void virtio_pci_remove(struct pci_dev *pci_dev)
{
struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
struct device *dev = get_device(&vp_dev->vdev.dev);
unregister_virtio_device(&vp_dev->vdev);
if (vp_dev->ioaddr)
virtio_pci_legacy_remove(vp_dev);
else
virtio_pci_modern_remove(vp_dev);
pci_disable_device(pci_dev);
put_device(dev);
}
static struct pci_driver virtio_pci_driver = {
.name = "virtio-pci",
.id_table = virtio_pci_id_table,
.probe = virtio_pci_probe,
.remove = virtio_pci_remove,
#ifdef CONFIG_PM_SLEEP
.driver.pm = &virtio_pci_pm_ops,
#endif
};
module_pci_driver(virtio_pci_driver);
MODULE_AUTHOR("Anthony Liguori <aliguori@us.ibm.com>");
MODULE_DESCRIPTION("virtio-pci");
MODULE_LICENSE("GPL");
MODULE_VERSION("1");

View File

@@ -0,0 +1,167 @@
#ifndef _DRIVERS_VIRTIO_VIRTIO_PCI_COMMON_H
#define _DRIVERS_VIRTIO_VIRTIO_PCI_COMMON_H
/*
* Virtio PCI driver - APIs for common functionality for all device versions
*
* This module allows virtio devices to be used over a virtual PCI device.
* This can be used with QEMU based VMMs like KVM or Xen.
*
* Copyright IBM Corp. 2007
* Copyright Red Hat, Inc. 2014
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
* Rusty Russell <rusty@rustcorp.com.au>
* Michael S. Tsirkin <mst@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include <linux/module.h>
#include <linux/list.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/interrupt.h>
#include <linux/virtio.h>
#include <linux/virtio_config.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_pci.h>
#include <linux/highmem.h>
#include <linux/spinlock.h>
struct virtio_pci_vq_info {
/* the actual virtqueue */
struct virtqueue *vq;
/* the number of entries in the queue */
int num;
/* the virtual address of the ring queue */
void *queue;
/* the list node for the virtqueues list */
struct list_head node;
/* MSI-X vector (or none) */
unsigned msix_vector;
};
/* Our device structure */
struct virtio_pci_device {
struct virtio_device vdev;
struct pci_dev *pci_dev;
/* In legacy mode, these two point to within ->legacy. */
/* Where to read and clear interrupt */
u8 __iomem *isr;
/* Modern only fields */
/* The IO mapping for the PCI config space (non-legacy mode) */
struct virtio_pci_common_cfg __iomem *common;
/* Device-specific data (non-legacy mode) */
void __iomem *device;
/* Base of vq notifications (non-legacy mode). */
void __iomem *notify_base;
/* So we can sanity-check accesses. */
size_t notify_len;
size_t device_len;
/* Capability for when we need to map notifications per-vq. */
int notify_map_cap;
/* Multiply queue_notify_off by this value. (non-legacy mode). */
u32 notify_offset_multiplier;
int modern_bars;
/* Legacy only field */
/* the IO mapping for the PCI config space */
void __iomem *ioaddr;
/* a list of queues so we can dispatch IRQs */
spinlock_t lock;
struct list_head virtqueues;
/* array of all queues for house-keeping */
struct virtio_pci_vq_info **vqs;
/* MSI-X support */
int msix_enabled;
int intx_enabled;
struct msix_entry *msix_entries;
cpumask_var_t *msix_affinity_masks;
/* Name strings for interrupts. This size should be enough,
* and I'm too lazy to allocate each name separately. */
char (*msix_names)[256];
/* Number of available vectors */
unsigned msix_vectors;
/* Vectors allocated, excluding per-vq vectors if any */
unsigned msix_used_vectors;
/* Whether we have vector per vq */
bool per_vq_vectors;
struct virtqueue *(*setup_vq)(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned idx,
void (*callback)(struct virtqueue *vq),
const char *name,
u16 msix_vec);
void (*del_vq)(struct virtio_pci_vq_info *info);
u16 (*config_vector)(struct virtio_pci_device *vp_dev, u16 vector);
};
/* Constants for MSI-X */
/* Use first vector for configuration changes, second and the rest for
* virtqueues Thus, we need at least 2 vectors for MSI. */
enum {
VP_MSIX_CONFIG_VECTOR = 0,
VP_MSIX_VQ_VECTOR = 1,
};
/* Convert a generic virtio device to our structure */
static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
{
return container_of(vdev, struct virtio_pci_device, vdev);
}
/* wait for pending irq handlers */
void vp_synchronize_vectors(struct virtio_device *vdev);
/* the notify function used when creating a virt queue */
bool vp_notify(struct virtqueue *vq);
/* the config->del_vqs() implementation */
void vp_del_vqs(struct virtio_device *vdev);
/* the config->find_vqs() implementation */
int vp_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[]);
const char *vp_bus_name(struct virtio_device *vdev);
/* Setup the affinity for a virtqueue:
* - force the affinity for per vq vector
* - OR over all affinities for shared MSI
* - ignore the affinity request if we're using INTX
*/
int vp_set_vq_affinity(struct virtqueue *vq, int cpu);
#if IS_ENABLED(CONFIG_VIRTIO_PCI_LEGACY)
int virtio_pci_legacy_probe(struct virtio_pci_device *);
void virtio_pci_legacy_remove(struct virtio_pci_device *);
#else
static inline int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
{
return -ENODEV;
}
static inline void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev)
{
}
#endif
int virtio_pci_modern_probe(struct virtio_pci_device *);
void virtio_pci_modern_remove(struct virtio_pci_device *);
#endif

View File

@@ -0,0 +1,267 @@
/*
* Virtio PCI driver - legacy device support
*
* This module allows virtio devices to be used over a virtual PCI device.
* This can be used with QEMU based VMMs like KVM or Xen.
*
* Copyright IBM Corp. 2007
* Copyright Red Hat, Inc. 2014
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
* Rusty Russell <rusty@rustcorp.com.au>
* Michael S. Tsirkin <mst@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include "virtio_pci_common.h"
/* virtio config->get_features() implementation */
static u64 vp_get_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* When someone needs more than 32 feature bits, we'll need to
* steal a bit to indicate that the rest are somewhere else. */
return ioread32(vp_dev->ioaddr + VIRTIO_PCI_HOST_FEATURES);
}
/* virtio config->finalize_features() implementation */
static int vp_finalize_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
/* Make sure we don't have any features > 32 bits! */
BUG_ON((u32)vdev->features != vdev->features);
/* We only support 32 feature bits. */
iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
return 0;
}
/* virtio config->get() implementation */
static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
void __iomem *ioaddr = vp_dev->ioaddr +
VIRTIO_PCI_CONFIG(vp_dev) + offset;
u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
ptr[i] = ioread8(ioaddr + i);
}
/* the config->set() implementation. it's symmetric to the config->get()
* implementation */
static void vp_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
void __iomem *ioaddr = vp_dev->ioaddr +
VIRTIO_PCI_CONFIG(vp_dev) + offset;
const u8 *ptr = buf;
int i;
for (i = 0; i < len; i++)
iowrite8(ptr[i], ioaddr + i);
}
/* config->{get,set}_status() implementations */
static u8 vp_get_status(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
}
static void vp_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
iowrite8(status, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
}
static void vp_reset(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* 0 status means a reset. */
iowrite8(0, vp_dev->ioaddr + VIRTIO_PCI_STATUS);
/* Flush out the status write, and flush in device writes,
* including MSi-X interrupts, if any. */
ioread8(vp_dev->ioaddr + VIRTIO_PCI_STATUS);
/* Flush pending VQ/configuration callbacks. */
vp_synchronize_vectors(vdev);
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{
/* Setup the vector used for configuration events */
iowrite16(vector, vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
/* Verify we had enough resources to assign the vector */
/* Will also flush the write out to device */
return ioread16(vp_dev->ioaddr + VIRTIO_MSI_CONFIG_VECTOR);
}
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
u16 msix_vec)
{
struct virtqueue *vq;
unsigned long size;
u16 num;
int err;
/* Select the queue we're interested in */
iowrite16(index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
/* Check if queue is either not available or already active. */
num = ioread16(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NUM);
if (!num || ioread32(vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN))
return ERR_PTR(-ENOENT);
info->num = num;
info->msix_vector = msix_vec;
size = PAGE_ALIGN(vring_size(num, VIRTIO_PCI_VRING_ALIGN));
info->queue = alloc_pages_exact(size, GFP_KERNEL|__GFP_ZERO);
if (info->queue == NULL)
return ERR_PTR(-ENOMEM);
/* activate the queue */
iowrite32(virt_to_phys(info->queue) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT,
vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
/* create the vring */
vq = vring_new_virtqueue(index, info->num,
VIRTIO_PCI_VRING_ALIGN, &vp_dev->vdev,
true, info->queue, vp_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto out_activate_queue;
}
vq->priv = (void __force *)vp_dev->ioaddr + VIRTIO_PCI_QUEUE_NOTIFY;
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
iowrite16(msix_vec, vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
msix_vec = ioread16(vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto out_assign;
}
}
return vq;
out_assign:
vring_del_virtqueue(vq);
out_activate_queue:
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
free_pages_exact(info->queue, size);
return ERR_PTR(err);
}
static void del_vq(struct virtio_pci_vq_info *info)
{
struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
unsigned long size;
iowrite16(vq->index, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_SEL);
if (vp_dev->msix_enabled) {
iowrite16(VIRTIO_MSI_NO_VECTOR,
vp_dev->ioaddr + VIRTIO_MSI_QUEUE_VECTOR);
/* Flush the write out to device */
ioread8(vp_dev->ioaddr + VIRTIO_PCI_ISR);
}
vring_del_virtqueue(vq);
/* Select and deactivate the queue */
iowrite32(0, vp_dev->ioaddr + VIRTIO_PCI_QUEUE_PFN);
size = PAGE_ALIGN(vring_size(info->num, VIRTIO_PCI_VRING_ALIGN));
free_pages_exact(info->queue, size);
}
static const struct virtio_config_ops virtio_pci_config_ops = {
.get = vp_get,
.set = vp_set,
.get_status = vp_get_status,
.set_status = vp_set_status,
.reset = vp_reset,
.find_vqs = vp_find_vqs,
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
};
/* the PCI probing function */
int virtio_pci_legacy_probe(struct virtio_pci_device *vp_dev)
{
struct pci_dev *pci_dev = vp_dev->pci_dev;
int rc;
/* We only own devices >= 0x1000 and <= 0x103f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x103f)
return -ENODEV;
if (pci_dev->revision != VIRTIO_PCI_ABI_VERSION) {
printk(KERN_ERR "virtio_pci: expected ABI version %d, got %d\n",
VIRTIO_PCI_ABI_VERSION, pci_dev->revision);
return -ENODEV;
}
rc = pci_request_region(pci_dev, 0, "virtio-pci-legacy");
if (rc)
return rc;
rc = -ENOMEM;
vp_dev->ioaddr = pci_iomap(pci_dev, 0, 0);
if (!vp_dev->ioaddr)
goto err_iomap;
vp_dev->isr = vp_dev->ioaddr + VIRTIO_PCI_ISR;
/* we use the subsystem vendor/device id as the virtio vendor/device
* id. this allows us to use the same PCI vendor/device id for all
* virtio devices and to identify the particular virtio driver by
* the subsystem ids */
vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
vp_dev->vdev.id.device = pci_dev->subsystem_device;
vp_dev->vdev.config = &virtio_pci_config_ops;
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
return 0;
err_iomap:
pci_release_region(pci_dev, 0);
return rc;
}
void virtio_pci_legacy_remove(struct virtio_pci_device *vp_dev)
{
struct pci_dev *pci_dev = vp_dev->pci_dev;
pci_iounmap(pci_dev, vp_dev->ioaddr);
pci_release_region(pci_dev, 0);
}

View File

@@ -0,0 +1,747 @@
/*
* Virtio PCI driver - modern (virtio 1.0) device support
*
* This module allows virtio devices to be used over a virtual PCI device.
* This can be used with QEMU based VMMs like KVM or Xen.
*
* Copyright IBM Corp. 2007
* Copyright Red Hat, Inc. 2014
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
* Rusty Russell <rusty@rustcorp.com.au>
* Michael S. Tsirkin <mst@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or later.
* See the COPYING file in the top-level directory.
*
*/
#include <linux/delay.h>
#define VIRTIO_PCI_NO_LEGACY
#include "virtio_pci_common.h"
/*
* Type-safe wrappers for io accesses.
* Use these to enforce at compile time the following spec requirement:
*
* The driver MUST access each field using the “natural” access
* method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
* for 16-bit fields and 8-bit accesses for 8-bit fields.
*/
static inline u8 vp_ioread8(u8 __iomem *addr)
{
return ioread8(addr);
}
static inline u16 vp_ioread16 (u16 __iomem *addr)
{
return ioread16(addr);
}
static inline u32 vp_ioread32(u32 __iomem *addr)
{
return ioread32(addr);
}
static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
{
iowrite8(value, addr);
}
static inline void vp_iowrite16(u16 value, u16 __iomem *addr)
{
iowrite16(value, addr);
}
static inline void vp_iowrite32(u32 value, u32 __iomem *addr)
{
iowrite32(value, addr);
}
static void vp_iowrite64_twopart(u64 val,
__le32 __iomem *lo, __le32 __iomem *hi)
{
vp_iowrite32((u32)val, lo);
vp_iowrite32(val >> 32, hi);
}
static void __iomem *map_capability(struct pci_dev *dev, int off,
size_t minlen,
u32 align,
u32 start, u32 size,
size_t *len)
{
u8 bar;
u32 offset, length;
void __iomem *p;
pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
bar),
&bar);
pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
&offset);
pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
&length);
if (length <= start) {
dev_err(&dev->dev,
"virtio_pci: bad capability len %u (>%u expected)\n",
length, start);
return NULL;
}
if (length - start < minlen) {
dev_err(&dev->dev,
"virtio_pci: bad capability len %u (>=%zu expected)\n",
length, minlen);
return NULL;
}
length -= start;
if (start + offset < offset) {
dev_err(&dev->dev,
"virtio_pci: map wrap-around %u+%u\n",
start, offset);
return NULL;
}
offset += start;
if (offset & (align - 1)) {
dev_err(&dev->dev,
"virtio_pci: offset %u not aligned to %u\n",
offset, align);
return NULL;
}
if (length > size)
length = size;
if (len)
*len = length;
if (minlen + offset < minlen ||
minlen + offset > pci_resource_len(dev, bar)) {
dev_err(&dev->dev,
"virtio_pci: map virtio %zu@%u "
"out of range on bar %i length %lu\n",
minlen, offset,
bar, (unsigned long)pci_resource_len(dev, bar));
return NULL;
}
p = pci_iomap_range(dev, bar, offset, length);
if (!p)
dev_err(&dev->dev,
"virtio_pci: unable to map virtio %u@%u on bar %i\n",
length, offset, bar);
return p;
}
/* virtio config->get_features() implementation */
static u64 vp_get_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u64 features;
vp_iowrite32(0, &vp_dev->common->device_feature_select);
features = vp_ioread32(&vp_dev->common->device_feature);
vp_iowrite32(1, &vp_dev->common->device_feature_select);
features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
return features;
}
/* virtio config->finalize_features() implementation */
static int vp_finalize_features(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* Give virtio_ring a chance to accept features. */
vring_transport_features(vdev);
if (!__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
dev_err(&vdev->dev, "virtio: device uses modern interface "
"but does not have VIRTIO_F_VERSION_1\n");
return -EINVAL;
}
vp_iowrite32(0, &vp_dev->common->guest_feature_select);
vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
vp_iowrite32(1, &vp_dev->common->guest_feature_select);
vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
return 0;
}
/* virtio config->get() implementation */
static void vp_get(struct virtio_device *vdev, unsigned offset,
void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u8 b;
__le16 w;
__le32 l;
BUG_ON(offset + len > vp_dev->device_len);
switch (len) {
case 1:
b = ioread8(vp_dev->device + offset);
memcpy(buf, &b, sizeof b);
break;
case 2:
w = cpu_to_le16(ioread16(vp_dev->device + offset));
memcpy(buf, &w, sizeof w);
break;
case 4:
l = cpu_to_le32(ioread32(vp_dev->device + offset));
memcpy(buf, &l, sizeof l);
break;
case 8:
l = cpu_to_le32(ioread32(vp_dev->device + offset));
memcpy(buf, &l, sizeof l);
l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
memcpy(buf + sizeof l, &l, sizeof l);
break;
default:
BUG();
}
}
/* the config->set() implementation. it's symmetric to the config->get()
* implementation */
static void vp_set(struct virtio_device *vdev, unsigned offset,
const void *buf, unsigned len)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
u8 b;
__le16 w;
__le32 l;
BUG_ON(offset + len > vp_dev->device_len);
switch (len) {
case 1:
memcpy(&b, buf, sizeof b);
iowrite8(b, vp_dev->device + offset);
break;
case 2:
memcpy(&w, buf, sizeof w);
iowrite16(le16_to_cpu(w), vp_dev->device + offset);
break;
case 4:
memcpy(&l, buf, sizeof l);
iowrite32(le32_to_cpu(l), vp_dev->device + offset);
break;
case 8:
memcpy(&l, buf, sizeof l);
iowrite32(le32_to_cpu(l), vp_dev->device + offset);
memcpy(&l, buf + sizeof l, sizeof l);
iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
break;
default:
BUG();
}
}
static u32 vp_generation(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return vp_ioread8(&vp_dev->common->config_generation);
}
/* config->{get,set}_status() implementations */
static u8 vp_get_status(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
return vp_ioread8(&vp_dev->common->device_status);
}
static void vp_set_status(struct virtio_device *vdev, u8 status)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* We should never be setting status to 0. */
BUG_ON(status == 0);
vp_iowrite8(status, &vp_dev->common->device_status);
}
static void vp_reset(struct virtio_device *vdev)
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
/* 0 status means a reset. */
vp_iowrite8(0, &vp_dev->common->device_status);
/* After writing 0 to device_status, the driver MUST wait for a read of
* device_status to return 0 before reinitializing the device.
* This will flush out the status write, and flush in device writes,
* including MSI-X interrupts, if any.
*/
while (vp_ioread8(&vp_dev->common->device_status))
msleep(1);
/* Flush pending VQ/configuration callbacks. */
vp_synchronize_vectors(vdev);
}
static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
{
/* Setup the vector used for configuration events */
vp_iowrite16(vector, &vp_dev->common->msix_config);
/* Verify we had enough resources to assign the vector */
/* Will also flush the write out to device */
return vp_ioread16(&vp_dev->common->msix_config);
}
static size_t vring_pci_size(u16 num)
{
/* We only need a cacheline separation. */
return PAGE_ALIGN(vring_size(num, SMP_CACHE_BYTES));
}
static void *alloc_virtqueue_pages(int *num)
{
void *pages;
/* TODO: allocate each queue chunk individually */
for (; *num && vring_pci_size(*num) > PAGE_SIZE; *num /= 2) {
pages = alloc_pages_exact(vring_pci_size(*num),
GFP_KERNEL|__GFP_ZERO|__GFP_NOWARN);
if (pages)
return pages;
}
if (!*num)
return NULL;
/* Try to get a single page. You are my only hope! */
return alloc_pages_exact(vring_pci_size(*num), GFP_KERNEL|__GFP_ZERO);
}
static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
struct virtio_pci_vq_info *info,
unsigned index,
void (*callback)(struct virtqueue *vq),
const char *name,
u16 msix_vec)
{
struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
struct virtqueue *vq;
u16 num, off;
int err;
if (index >= vp_ioread16(&cfg->num_queues))
return ERR_PTR(-ENOENT);
/* Select the queue we're interested in */
vp_iowrite16(index, &cfg->queue_select);
/* Check if queue is either not available or already active. */
num = vp_ioread16(&cfg->queue_size);
if (!num || vp_ioread16(&cfg->queue_enable))
return ERR_PTR(-ENOENT);
if (num & (num - 1)) {
dev_warn(&vp_dev->pci_dev->dev, "bad queue size %u", num);
return ERR_PTR(-EINVAL);
}
/* get offset of notification word for this vq */
off = vp_ioread16(&cfg->queue_notify_off);
info->num = num;
info->msix_vector = msix_vec;
info->queue = alloc_virtqueue_pages(&info->num);
if (info->queue == NULL)
return ERR_PTR(-ENOMEM);
/* create the vring */
vq = vring_new_virtqueue(index, info->num,
SMP_CACHE_BYTES, &vp_dev->vdev,
true, info->queue, vp_notify, callback, name);
if (!vq) {
err = -ENOMEM;
goto err_new_queue;
}
/* activate the queue */
vp_iowrite16(num, &cfg->queue_size);
vp_iowrite64_twopart(virt_to_phys(info->queue),
&cfg->queue_desc_lo, &cfg->queue_desc_hi);
vp_iowrite64_twopart(virt_to_phys(virtqueue_get_avail(vq)),
&cfg->queue_avail_lo, &cfg->queue_avail_hi);
vp_iowrite64_twopart(virt_to_phys(virtqueue_get_used(vq)),
&cfg->queue_used_lo, &cfg->queue_used_hi);
if (vp_dev->notify_base) {
/* offset should not wrap */
if ((u64)off * vp_dev->notify_offset_multiplier + 2
> vp_dev->notify_len) {
dev_warn(&vp_dev->pci_dev->dev,
"bad notification offset %u (x %u) "
"for queue %u > %zd",
off, vp_dev->notify_offset_multiplier,
index, vp_dev->notify_len);
err = -EINVAL;
goto err_map_notify;
}
vq->priv = (void __force *)vp_dev->notify_base +
off * vp_dev->notify_offset_multiplier;
} else {
vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
vp_dev->notify_map_cap, 2, 2,
off * vp_dev->notify_offset_multiplier, 2,
NULL);
}
if (!vq->priv) {
err = -ENOMEM;
goto err_map_notify;
}
if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
msix_vec = vp_ioread16(&cfg->queue_msix_vector);
if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
err = -EBUSY;
goto err_assign_vector;
}
}
return vq;
err_assign_vector:
if (!vp_dev->notify_base)
pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
err_map_notify:
vring_del_virtqueue(vq);
err_new_queue:
free_pages_exact(info->queue, vring_pci_size(info->num));
return ERR_PTR(err);
}
static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
struct virtqueue *vqs[],
vq_callback_t *callbacks[],
const char *names[])
{
struct virtio_pci_device *vp_dev = to_vp_device(vdev);
struct virtqueue *vq;
int rc = vp_find_vqs(vdev, nvqs, vqs, callbacks, names);
if (rc)
return rc;
/* Select and activate all queues. Has to be done last: once we do
* this, there's no way to go back except reset.
*/
list_for_each_entry(vq, &vdev->vqs, list) {
vp_iowrite16(vq->index, &vp_dev->common->queue_select);
vp_iowrite16(1, &vp_dev->common->queue_enable);
}
return 0;
}
static void del_vq(struct virtio_pci_vq_info *info)
{
struct virtqueue *vq = info->vq;
struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
vp_iowrite16(vq->index, &vp_dev->common->queue_select);
if (vp_dev->msix_enabled) {
vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
&vp_dev->common->queue_msix_vector);
/* Flush the write out to device */
vp_ioread16(&vp_dev->common->queue_msix_vector);
}
if (!vp_dev->notify_base)
pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
vring_del_virtqueue(vq);
free_pages_exact(info->queue, vring_pci_size(info->num));
}
static const struct virtio_config_ops virtio_pci_config_nodev_ops = {
.get = NULL,
.set = NULL,
.generation = vp_generation,
.get_status = vp_get_status,
.set_status = vp_set_status,
.reset = vp_reset,
.find_vqs = vp_modern_find_vqs,
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
};
static const struct virtio_config_ops virtio_pci_config_ops = {
.get = vp_get,
.set = vp_set,
.generation = vp_generation,
.get_status = vp_get_status,
.set_status = vp_set_status,
.reset = vp_reset,
.find_vqs = vp_modern_find_vqs,
.del_vqs = vp_del_vqs,
.get_features = vp_get_features,
.finalize_features = vp_finalize_features,
.bus_name = vp_bus_name,
.set_vq_affinity = vp_set_vq_affinity,
};
/**
* virtio_pci_find_capability - walk capabilities to find device info.
* @dev: the pci device
* @cfg_type: the VIRTIO_PCI_CAP_* value we seek
* @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
*
* Returns offset of the capability, or 0.
*/
static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
u32 ioresource_types, int *bars)
{
int pos;
for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
pos > 0;
pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
u8 type, bar;
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
cfg_type),
&type);
pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
bar),
&bar);
/* Ignore structures with reserved BAR values */
if (bar > 0x5)
continue;
if (type == cfg_type) {
if (pci_resource_len(dev, bar) &&
pci_resource_flags(dev, bar) & ioresource_types) {
*bars |= (1 << bar);
return pos;
}
}
}
return 0;
}
/* This is part of the ABI. Don't screw with it. */
static inline void check_offsets(void)
{
/* Note: disk space was harmed in compilation of this function. */
BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
offsetof(struct virtio_pci_cap, cap_vndr));
BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
offsetof(struct virtio_pci_cap, cap_next));
BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
offsetof(struct virtio_pci_cap, cap_len));
BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
offsetof(struct virtio_pci_cap, cfg_type));
BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
offsetof(struct virtio_pci_cap, bar));
BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
offsetof(struct virtio_pci_cap, offset));
BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
offsetof(struct virtio_pci_cap, length));
BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
offsetof(struct virtio_pci_notify_cap,
notify_off_multiplier));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
offsetof(struct virtio_pci_common_cfg,
device_feature_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
offsetof(struct virtio_pci_common_cfg, device_feature));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
offsetof(struct virtio_pci_common_cfg,
guest_feature_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
offsetof(struct virtio_pci_common_cfg, guest_feature));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
offsetof(struct virtio_pci_common_cfg, msix_config));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
offsetof(struct virtio_pci_common_cfg, num_queues));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
offsetof(struct virtio_pci_common_cfg, device_status));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
offsetof(struct virtio_pci_common_cfg, config_generation));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
offsetof(struct virtio_pci_common_cfg, queue_select));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
offsetof(struct virtio_pci_common_cfg, queue_size));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
offsetof(struct virtio_pci_common_cfg, queue_enable));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
offsetof(struct virtio_pci_common_cfg, queue_notify_off));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
offsetof(struct virtio_pci_common_cfg, queue_used_lo));
BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
offsetof(struct virtio_pci_common_cfg, queue_used_hi));
}
/* the PCI probing function */
int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
{
struct pci_dev *pci_dev = vp_dev->pci_dev;
int err, common, isr, notify, device;
u32 notify_length;
u32 notify_offset;
check_offsets();
/* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
return -ENODEV;
if (pci_dev->device < 0x1040) {
/* Transitional devices: use the PCI subsystem device id as
* virtio device id, same as legacy driver always did.
*/
vp_dev->vdev.id.device = pci_dev->subsystem_device;
} else {
/* Modern devices: simply use PCI device id, but start from 0x1040. */
vp_dev->vdev.id.device = pci_dev->device - 0x1040;
}
vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
/* check for a common config: if not, use legacy mode (bar 0). */
common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
if (!common) {
dev_info(&pci_dev->dev,
"virtio_pci: leaving for legacy driver\n");
return -ENODEV;
}
/* If common is there, these should be too... */
isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
if (!isr || !notify) {
dev_err(&pci_dev->dev,
"virtio_pci: missing capabilities %i/%i/%i\n",
common, isr, notify);
return -EINVAL;
}
/* Device capability is only mandatory for devices that have
* device-specific configuration.
*/
device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
IORESOURCE_IO | IORESOURCE_MEM,
&vp_dev->modern_bars);
err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars,
"virtio-pci-modern");
if (err)
return err;
err = -EINVAL;
vp_dev->common = map_capability(pci_dev, common,
sizeof(struct virtio_pci_common_cfg), 4,
0, sizeof(struct virtio_pci_common_cfg),
NULL);
if (!vp_dev->common)
goto err_map_common;
vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
0, 1,
NULL);
if (!vp_dev->isr)
goto err_map_isr;
/* Read notify_off_multiplier from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
notify_off_multiplier),
&vp_dev->notify_offset_multiplier);
/* Read notify length and offset from config space. */
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.length),
&notify_length);
pci_read_config_dword(pci_dev,
notify + offsetof(struct virtio_pci_notify_cap,
cap.length),
&notify_offset);
/* We don't know how many VQs we'll map, ahead of the time.
* If notify length is small, map it all now.
* Otherwise, map each VQ individually later.
*/
if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2,
0, notify_length,
&vp_dev->notify_len);
if (!vp_dev->notify_base)
goto err_map_notify;
} else {
vp_dev->notify_map_cap = notify;
}
/* Again, we don't know how much we should map, but PAGE_SIZE
* is more than enough for all existing devices.
*/
if (device) {
vp_dev->device = map_capability(pci_dev, device, 0, 4,
0, PAGE_SIZE,
&vp_dev->device_len);
if (!vp_dev->device)
goto err_map_device;
vp_dev->vdev.config = &virtio_pci_config_ops;
} else {
vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
}
vp_dev->config_vector = vp_config_vector;
vp_dev->setup_vq = setup_vq;
vp_dev->del_vq = del_vq;
return 0;
err_map_device:
if (vp_dev->notify_base)
pci_iounmap(pci_dev, vp_dev->notify_base);
err_map_notify:
pci_iounmap(pci_dev, vp_dev->isr);
err_map_isr:
pci_iounmap(pci_dev, vp_dev->common);
err_map_common:
return err;
}
void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
{
struct pci_dev *pci_dev = vp_dev->pci_dev;
if (vp_dev->device)
pci_iounmap(pci_dev, vp_dev->device);
if (vp_dev->notify_base)
pci_iounmap(pci_dev, vp_dev->notify_base);
pci_iounmap(pci_dev, vp_dev->isr);
pci_iounmap(pci_dev, vp_dev->common);
pci_release_selected_regions(pci_dev, vp_dev->modern_bars);
}

View File

@@ -0,0 +1,875 @@
/* Virtio ring implementation.
*
* Copyright 2007 Rusty Russell IBM Corporation
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include <linux/virtio.h>
#include <linux/virtio_ring.h>
#include <linux/virtio_config.h>
#include <linux/device.h>
#include <linux/slab.h>
#include <linux/module.h>
#include <linux/hrtimer.h>
#include <linux/kmemleak.h>
#ifdef DEBUG
/* For development, we want to crash whenever the ring is screwed. */
#define BAD_RING(_vq, fmt, args...) \
do { \
dev_err(&(_vq)->vq.vdev->dev, \
"%s:"fmt, (_vq)->vq.name, ##args); \
BUG(); \
} while (0)
/* Caller is supposed to guarantee no reentry. */
#define START_USE(_vq) \
do { \
if ((_vq)->in_use) \
panic("%s:in_use = %i\n", \
(_vq)->vq.name, (_vq)->in_use); \
(_vq)->in_use = __LINE__; \
} while (0)
#define END_USE(_vq) \
do { BUG_ON(!(_vq)->in_use); (_vq)->in_use = 0; } while(0)
#else
#define BAD_RING(_vq, fmt, args...) \
do { \
dev_err(&_vq->vq.vdev->dev, \
"%s:"fmt, (_vq)->vq.name, ##args); \
(_vq)->broken = true; \
} while (0)
#define START_USE(vq)
#define END_USE(vq)
#endif
struct vring_virtqueue {
struct virtqueue vq;
/* Actual memory layout for this queue */
struct vring vring;
/* Can we use weak barriers? */
bool weak_barriers;
/* Other side has made a mess, don't try any more. */
bool broken;
/* Host supports indirect buffers */
bool indirect;
/* Host publishes avail event idx */
bool event;
/* Head of free buffer list. */
unsigned int free_head;
/* Number we've added since last sync. */
unsigned int num_added;
/* Last used index we've seen. */
u16 last_used_idx;
/* Last written value to avail->flags */
u16 avail_flags_shadow;
/* Last written value to avail->idx in guest byte order */
u16 avail_idx_shadow;
/* How to notify other side. FIXME: commonalize hcalls! */
bool (*notify)(struct virtqueue *vq);
#ifdef DEBUG
/* They're supposed to lock for us. */
unsigned int in_use;
/* Figure out if their kicks are too delayed. */
bool last_add_time_valid;
ktime_t last_add_time;
#endif
/* Tokens for callbacks. */
void *data[];
};
#define to_vvq(_vq) container_of(_vq, struct vring_virtqueue, vq)
static struct vring_desc *alloc_indirect(struct virtqueue *_vq,
unsigned int total_sg, gfp_t gfp)
{
struct vring_desc *desc;
unsigned int i;
/*
* We require lowmem mappings for the descriptors because
* otherwise virt_to_phys will give us bogus addresses in the
* virtqueue.
*/
gfp &= ~__GFP_HIGHMEM;
desc = kmalloc(total_sg * sizeof(struct vring_desc), gfp);
if (!desc)
return NULL;
for (i = 0; i < total_sg; i++)
desc[i].next = cpu_to_virtio16(_vq->vdev, i + 1);
return desc;
}
static inline int virtqueue_add(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int total_sg,
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp)
{
struct vring_virtqueue *vq = to_vvq(_vq);
struct scatterlist *sg;
struct vring_desc *desc;
unsigned int i, n, avail, descs_used, uninitialized_var(prev);
int head;
bool indirect;
START_USE(vq);
BUG_ON(data == NULL);
if (unlikely(vq->broken)) {
END_USE(vq);
return -EIO;
}
#ifdef DEBUG
{
ktime_t now = ktime_get();
/* No kick or get, with .1 second between? Warn. */
if (vq->last_add_time_valid)
WARN_ON(ktime_to_ms(ktime_sub(now, vq->last_add_time))
> 100);
vq->last_add_time = now;
vq->last_add_time_valid = true;
}
#endif
BUG_ON(total_sg > vq->vring.num);
BUG_ON(total_sg == 0);
head = vq->free_head;
/* If the host supports indirect descriptor tables, and we have multiple
* buffers, then go indirect. FIXME: tune this threshold */
if (vq->indirect && total_sg > 1 && vq->vq.num_free)
desc = alloc_indirect(_vq, total_sg, gfp);
else
desc = NULL;
if (desc) {
/* Use a single buffer which doesn't continue */
vq->vring.desc[head].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_INDIRECT);
vq->vring.desc[head].addr = cpu_to_virtio64(_vq->vdev, virt_to_phys(desc));
/* avoid kmemleak false positive (hidden by virt_to_phys) */
kmemleak_ignore(desc);
vq->vring.desc[head].len = cpu_to_virtio32(_vq->vdev, total_sg * sizeof(struct vring_desc));
/* Set up rest to use this indirect table. */
i = 0;
descs_used = 1;
indirect = true;
} else {
desc = vq->vring.desc;
i = head;
descs_used = total_sg;
indirect = false;
}
if (vq->vq.num_free < descs_used) {
pr_debug("Can't add buf len %i - avail = %i\n",
descs_used, vq->vq.num_free);
/* FIXME: for historical reasons, we force a notify here if
* there are outgoing parts to the buffer. Presumably the
* host should service the ring ASAP. */
if (out_sgs)
vq->notify(&vq->vq);
if (indirect)
kfree(desc);
END_USE(vq);
return -ENOSPC;
}
/* We're about to use some buffers from the free list. */
vq->vq.num_free -= descs_used;
for (n = 0; n < out_sgs; n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT);
desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i;
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
}
}
for (; n < (out_sgs + in_sgs); n++) {
for (sg = sgs[n]; sg; sg = sg_next(sg)) {
desc[i].flags = cpu_to_virtio16(_vq->vdev, VRING_DESC_F_NEXT | VRING_DESC_F_WRITE);
desc[i].addr = cpu_to_virtio64(_vq->vdev, sg_phys(sg));
desc[i].len = cpu_to_virtio32(_vq->vdev, sg->length);
prev = i;
i = virtio16_to_cpu(_vq->vdev, desc[i].next);
}
}
/* Last one doesn't continue. */
desc[prev].flags &= cpu_to_virtio16(_vq->vdev, ~VRING_DESC_F_NEXT);
/* Update free pointer */
if (indirect)
vq->free_head = virtio16_to_cpu(_vq->vdev, vq->vring.desc[head].next);
else
vq->free_head = i;
/* Set token. */
vq->data[head] = data;
/* Put entry in available array (but don't update avail->idx until they
* do sync). */
avail = vq->avail_idx_shadow & (vq->vring.num - 1);
vq->vring.avail->ring[avail] = cpu_to_virtio16(_vq->vdev, head);
/* Descriptors and available array need to be set before we expose the
* new available array entries. */
virtio_wmb(vq->weak_barriers);
vq->avail_idx_shadow++;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
vq->num_added++;
pr_debug("Added buffer head %i to %p\n", head, vq);
END_USE(vq);
/* This is very unlikely, but theoretically possible. Kick
* just in case. */
if (unlikely(vq->num_added == (1 << 16) - 1))
virtqueue_kick(_vq);
return 0;
}
/**
* virtqueue_add_sgs - expose buffers to other end
* @vq: the struct virtqueue we're talking about.
* @sgs: array of terminated scatterlists.
* @out_num: the number of scatterlists readable by other side
* @in_num: the number of scatterlists which are writable (after readable ones)
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
*/
int virtqueue_add_sgs(struct virtqueue *_vq,
struct scatterlist *sgs[],
unsigned int out_sgs,
unsigned int in_sgs,
void *data,
gfp_t gfp)
{
unsigned int i, total_sg = 0;
/* Count them first. */
for (i = 0; i < out_sgs + in_sgs; i++) {
struct scatterlist *sg;
for (sg = sgs[i]; sg; sg = sg_next(sg))
total_sg++;
}
return virtqueue_add(_vq, sgs, total_sg, out_sgs, in_sgs, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_sgs);
/**
* virtqueue_add_outbuf - expose output buffers to other end
* @vq: the struct virtqueue we're talking about.
* @sg: scatterlist (must be well-formed and terminated!)
* @num: the number of entries in @sg readable by other side
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
*/
int virtqueue_add_outbuf(struct virtqueue *vq,
struct scatterlist *sg, unsigned int num,
void *data,
gfp_t gfp)
{
return virtqueue_add(vq, &sg, num, 1, 0, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_outbuf);
/**
* virtqueue_add_inbuf - expose input buffers to other end
* @vq: the struct virtqueue we're talking about.
* @sg: scatterlist (must be well-formed and terminated!)
* @num: the number of entries in @sg writable by other side
* @data: the token identifying the buffer.
* @gfp: how to do memory allocations (if necessary).
*
* Caller must ensure we don't call this with other virtqueue operations
* at the same time (except where noted).
*
* Returns zero or a negative error (ie. ENOSPC, ENOMEM, EIO).
*/
int virtqueue_add_inbuf(struct virtqueue *vq,
struct scatterlist *sg, unsigned int num,
void *data,
gfp_t gfp)
{
return virtqueue_add(vq, &sg, num, 0, 1, data, gfp);
}
EXPORT_SYMBOL_GPL(virtqueue_add_inbuf);
/**
* virtqueue_kick_prepare - first half of split virtqueue_kick call.
* @vq: the struct virtqueue
*
* Instead of virtqueue_kick(), you can do:
* if (virtqueue_kick_prepare(vq))
* virtqueue_notify(vq);
*
* This is sometimes useful because the virtqueue_kick_prepare() needs
* to be serialized, but the actual virtqueue_notify() call does not.
*/
bool virtqueue_kick_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 new, old;
bool needs_kick;
START_USE(vq);
/* We need to expose available array entries before checking avail
* event. */
virtio_mb(vq->weak_barriers);
old = vq->avail_idx_shadow - vq->num_added;
new = vq->avail_idx_shadow;
vq->num_added = 0;
#ifdef DEBUG
if (vq->last_add_time_valid) {
WARN_ON(ktime_to_ms(ktime_sub(ktime_get(),
vq->last_add_time)) > 100);
}
vq->last_add_time_valid = false;
#endif
if (vq->event) {
needs_kick = vring_need_event(virtio16_to_cpu(_vq->vdev, vring_avail_event(&vq->vring)),
new, old);
} else {
needs_kick = !(vq->vring.used->flags & cpu_to_virtio16(_vq->vdev, VRING_USED_F_NO_NOTIFY));
}
END_USE(vq);
return needs_kick;
}
EXPORT_SYMBOL_GPL(virtqueue_kick_prepare);
/**
* virtqueue_notify - second half of split virtqueue_kick call.
* @vq: the struct virtqueue
*
* This does not need to be serialized.
*
* Returns false if host notify failed or queue is broken, otherwise true.
*/
bool virtqueue_notify(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
if (unlikely(vq->broken))
return false;
/* Prod other side to tell it about changes. */
if (!vq->notify(_vq)) {
vq->broken = true;
return false;
}
return true;
}
EXPORT_SYMBOL_GPL(virtqueue_notify);
/**
* virtqueue_kick - update after add_buf
* @vq: the struct virtqueue
*
* After one or more virtqueue_add_* calls, invoke this to kick
* the other side.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*
* Returns false if kick failed, otherwise true.
*/
bool virtqueue_kick(struct virtqueue *vq)
{
if (virtqueue_kick_prepare(vq))
return virtqueue_notify(vq);
return true;
}
EXPORT_SYMBOL_GPL(virtqueue_kick);
static void detach_buf(struct vring_virtqueue *vq, unsigned int head)
{
unsigned int i;
/* Clear data ptr. */
vq->data[head] = NULL;
/* Put back on free list: find end */
i = head;
/* Free the indirect table */
if (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_INDIRECT))
kfree(phys_to_virt(virtio64_to_cpu(vq->vq.vdev, vq->vring.desc[i].addr)));
while (vq->vring.desc[i].flags & cpu_to_virtio16(vq->vq.vdev, VRING_DESC_F_NEXT)) {
i = virtio16_to_cpu(vq->vq.vdev, vq->vring.desc[i].next);
vq->vq.num_free++;
}
vq->vring.desc[i].next = cpu_to_virtio16(vq->vq.vdev, vq->free_head);
vq->free_head = head;
/* Plus final descriptor */
vq->vq.num_free++;
}
static inline bool more_used(const struct vring_virtqueue *vq)
{
return vq->last_used_idx != virtio16_to_cpu(vq->vq.vdev, vq->vring.used->idx);
}
/**
* virtqueue_get_buf - get the next used buffer
* @vq: the struct virtqueue we're talking about.
* @len: the length written into the buffer
*
* If the driver wrote data into the buffer, @len will be set to the
* amount written. This means you don't need to clear the buffer
* beforehand to ensure there's no data leakage in the case of short
* writes.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*
* Returns NULL if there are no used buffers, or the "data" token
* handed to virtqueue_add_*().
*/
void *virtqueue_get_buf(struct virtqueue *_vq, unsigned int *len)
{
struct vring_virtqueue *vq = to_vvq(_vq);
void *ret;
unsigned int i;
u16 last_used;
START_USE(vq);
if (unlikely(vq->broken)) {
END_USE(vq);
return NULL;
}
if (!more_used(vq)) {
pr_debug("No more buffers in queue\n");
END_USE(vq);
return NULL;
}
/* Only get used array entries after they have been exposed by host. */
virtio_rmb(vq->weak_barriers);
last_used = (vq->last_used_idx & (vq->vring.num - 1));
i = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].id);
*len = virtio32_to_cpu(_vq->vdev, vq->vring.used->ring[last_used].len);
if (unlikely(i >= vq->vring.num)) {
BAD_RING(vq, "id %u out of range\n", i);
return NULL;
}
if (unlikely(!vq->data[i])) {
BAD_RING(vq, "id %u is not a head!\n", i);
return NULL;
}
/* detach_buf clears data, so grab it now. */
ret = vq->data[i];
detach_buf(vq, i);
vq->last_used_idx++;
/* If we expect an interrupt for the next entry, tell host
* by writing event index and flush out the write before
* the read in the next get_buf call. */
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx);
virtio_mb(vq->weak_barriers);
}
#ifdef DEBUG
vq->last_add_time_valid = false;
#endif
END_USE(vq);
return ret;
}
EXPORT_SYMBOL_GPL(virtqueue_get_buf);
/**
* virtqueue_disable_cb - disable callbacks
* @vq: the struct virtqueue we're talking about.
*
* Note that this is not necessarily synchronous, hence unreliable and only
* useful as an optimization.
*
* Unlike other operations, this need not be serialized.
*/
void virtqueue_disable_cb(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
if (!(vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT)) {
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
if (!vq->event)
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
}
EXPORT_SYMBOL_GPL(virtqueue_disable_cb);
/**
* virtqueue_enable_cb_prepare - restart callbacks after disable_cb
* @vq: the struct virtqueue we're talking about.
*
* This re-enables callbacks; it returns current queue state
* in an opaque unsigned value. This value should be later tested by
* virtqueue_poll, to detect a possible race between the driver checking for
* more work, and enabling callbacks.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
unsigned virtqueue_enable_cb_prepare(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 last_used_idx;
START_USE(vq);
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
/* Depending on the VIRTIO_RING_F_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always do both to keep code simple. */
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
if (!vq->event)
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, last_used_idx = vq->last_used_idx);
END_USE(vq);
return last_used_idx;
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_prepare);
/**
* virtqueue_poll - query pending used buffers
* @vq: the struct virtqueue we're talking about.
* @last_used_idx: virtqueue state (from call to virtqueue_enable_cb_prepare).
*
* Returns "true" if there are pending used buffers in the queue.
*
* This does not need to be serialized.
*/
bool virtqueue_poll(struct virtqueue *_vq, unsigned last_used_idx)
{
struct vring_virtqueue *vq = to_vvq(_vq);
virtio_mb(vq->weak_barriers);
return (u16)last_used_idx != virtio16_to_cpu(_vq->vdev, vq->vring.used->idx);
}
EXPORT_SYMBOL_GPL(virtqueue_poll);
/**
* virtqueue_enable_cb - restart callbacks after disable_cb.
* @vq: the struct virtqueue we're talking about.
*
* This re-enables callbacks; it returns "false" if there are pending
* buffers in the queue, to detect a possible race between the driver
* checking for more work, and enabling callbacks.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
bool virtqueue_enable_cb(struct virtqueue *_vq)
{
unsigned last_used_idx = virtqueue_enable_cb_prepare(_vq);
return !virtqueue_poll(_vq, last_used_idx);
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb);
/**
* virtqueue_enable_cb_delayed - restart callbacks after disable_cb.
* @vq: the struct virtqueue we're talking about.
*
* This re-enables callbacks but hints to the other side to delay
* interrupts until most of the available buffers have been processed;
* it returns "false" if there are many pending buffers in the queue,
* to detect a possible race between the driver checking for more work,
* and enabling callbacks.
*
* Caller must ensure we don't call this with other virtqueue
* operations at the same time (except where noted).
*/
bool virtqueue_enable_cb_delayed(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
u16 bufs;
START_USE(vq);
/* We optimistically turn back on interrupts, then check if there was
* more to do. */
/* Depending on the VIRTIO_RING_F_USED_EVENT_IDX feature, we need to
* either clear the flags bit or point the event index at the next
* entry. Always update the event index to keep code simple. */
if (vq->avail_flags_shadow & VRING_AVAIL_F_NO_INTERRUPT) {
vq->avail_flags_shadow &= ~VRING_AVAIL_F_NO_INTERRUPT;
if (!vq->event)
vq->vring.avail->flags = cpu_to_virtio16(_vq->vdev, vq->avail_flags_shadow);
}
/* TODO: tune this threshold */
bufs = (u16)(vq->avail_idx_shadow - vq->last_used_idx) * 3 / 4;
vring_used_event(&vq->vring) = cpu_to_virtio16(_vq->vdev, vq->last_used_idx + bufs);
virtio_mb(vq->weak_barriers);
if (unlikely((u16)(virtio16_to_cpu(_vq->vdev, vq->vring.used->idx) - vq->last_used_idx) > bufs)) {
END_USE(vq);
return false;
}
END_USE(vq);
return true;
}
EXPORT_SYMBOL_GPL(virtqueue_enable_cb_delayed);
/**
* virtqueue_detach_unused_buf - detach first unused buffer
* @vq: the struct virtqueue we're talking about.
*
* Returns NULL or the "data" token handed to virtqueue_add_*().
* This is not valid on an active queue; it is useful only for device
* shutdown.
*/
void *virtqueue_detach_unused_buf(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
unsigned int i;
void *buf;
START_USE(vq);
for (i = 0; i < vq->vring.num; i++) {
if (!vq->data[i])
continue;
/* detach_buf clears data, so grab it now. */
buf = vq->data[i];
detach_buf(vq, i);
vq->avail_idx_shadow--;
vq->vring.avail->idx = cpu_to_virtio16(_vq->vdev, vq->avail_idx_shadow);
END_USE(vq);
return buf;
}
/* That should have freed everything. */
BUG_ON(vq->vq.num_free != vq->vring.num);
END_USE(vq);
return NULL;
}
EXPORT_SYMBOL_GPL(virtqueue_detach_unused_buf);
irqreturn_t vring_interrupt(int irq, void *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
if (!more_used(vq)) {
pr_debug("virtqueue interrupt with no work for %p\n", vq);
return IRQ_NONE;
}
if (unlikely(vq->broken))
return IRQ_HANDLED;
pr_debug("virtqueue callback for %p (%p)\n", vq, vq->vq.callback);
if (vq->vq.callback)
vq->vq.callback(&vq->vq);
return IRQ_HANDLED;
}
EXPORT_SYMBOL_GPL(vring_interrupt);
struct virtqueue *vring_new_virtqueue(unsigned int index,
unsigned int num,
unsigned int vring_align,
struct virtio_device *vdev,
bool weak_barriers,
void *pages,
bool (*notify)(struct virtqueue *),
void (*callback)(struct virtqueue *),
const char *name)
{
struct vring_virtqueue *vq;
unsigned int i;
/* We assume num is a power of 2. */
if (num & (num - 1)) {
dev_warn(&vdev->dev, "Bad virtqueue length %u\n", num);
return NULL;
}
vq = kmalloc(sizeof(*vq) + sizeof(void *)*num, GFP_KERNEL);
if (!vq)
return NULL;
vring_init(&vq->vring, num, pages, vring_align);
vq->vq.callback = callback;
vq->vq.vdev = vdev;
vq->vq.name = name;
vq->vq.num_free = num;
vq->vq.index = index;
vq->notify = notify;
vq->weak_barriers = weak_barriers;
vq->broken = false;
vq->last_used_idx = 0;
vq->avail_flags_shadow = 0;
vq->avail_idx_shadow = 0;
vq->num_added = 0;
list_add_tail(&vq->vq.list, &vdev->vqs);
#ifdef DEBUG
vq->in_use = false;
vq->last_add_time_valid = false;
#endif
vq->indirect = virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC);
vq->event = virtio_has_feature(vdev, VIRTIO_RING_F_EVENT_IDX);
/* No callback? Tell other side not to bother us. */
if (!callback) {
vq->avail_flags_shadow |= VRING_AVAIL_F_NO_INTERRUPT;
if (!vq->event)
vq->vring.avail->flags = cpu_to_virtio16(vdev, vq->avail_flags_shadow);
}
/* Put everything in free lists. */
vq->free_head = 0;
for (i = 0; i < num-1; i++) {
vq->vring.desc[i].next = cpu_to_virtio16(vdev, i + 1);
vq->data[i] = NULL;
}
vq->data[i] = NULL;
return &vq->vq;
}
EXPORT_SYMBOL_GPL(vring_new_virtqueue);
void vring_del_virtqueue(struct virtqueue *vq)
{
list_del(&vq->list);
kfree(to_vvq(vq));
}
EXPORT_SYMBOL_GPL(vring_del_virtqueue);
/* Manipulates transport-specific feature bits. */
void vring_transport_features(struct virtio_device *vdev)
{
unsigned int i;
for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++) {
switch (i) {
case VIRTIO_RING_F_INDIRECT_DESC:
break;
case VIRTIO_RING_F_EVENT_IDX:
break;
case VIRTIO_F_VERSION_1:
break;
default:
/* We don't understand this bit. */
__virtio_clear_bit(vdev, i);
}
}
}
EXPORT_SYMBOL_GPL(vring_transport_features);
/**
* virtqueue_get_vring_size - return the size of the virtqueue's vring
* @vq: the struct virtqueue containing the vring of interest.
*
* Returns the size of the vring. This is mainly used for boasting to
* userspace. Unlike other operations, this need not be serialized.
*/
unsigned int virtqueue_get_vring_size(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
return vq->vring.num;
}
EXPORT_SYMBOL_GPL(virtqueue_get_vring_size);
bool virtqueue_is_broken(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
return vq->broken;
}
EXPORT_SYMBOL_GPL(virtqueue_is_broken);
/*
* This should prevent the device from being used, allowing drivers to
* recover. You may need to grab appropriate locks to flush.
*/
void virtio_break_device(struct virtio_device *dev)
{
struct virtqueue *_vq;
list_for_each_entry(_vq, &dev->vqs, list) {
struct vring_virtqueue *vq = to_vvq(_vq);
vq->broken = true;
}
}
EXPORT_SYMBOL_GPL(virtio_break_device);
void *virtqueue_get_avail(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
return vq->vring.avail;
}
EXPORT_SYMBOL_GPL(virtqueue_get_avail);
void *virtqueue_get_used(struct virtqueue *_vq)
{
struct vring_virtqueue *vq = to_vvq(_vq);
return vq->vring.used;
}
EXPORT_SYMBOL_GPL(virtqueue_get_used);
MODULE_LICENSE("GPL");

File diff suppressed because it is too large Load Diff