From: Paul Mackerras Date: Mon, 30 Jun 2014 20:51:09 +1000 Subject: KVM: Don't keep reference to irq routing table in irqfd struct Origin: https://git.kernel.org/linus/56f89f3629ffd1a21d38c3d0bea23deac0e284ce This makes the irqfd code keep a copy of the irq routing table entry for each irqfd, rather than a reference to the copy in the actual irq routing table maintained in kvm/virt/irqchip.c. This will enable us to change the routing table structure in future, or even not have a routing table at all on some platforms. The synchronization that was previously achieved using srcu_dereference on the read side is now achieved using a seqcount_t structure. That ensures that we don't get a halfway-updated copy of the structure if we read it while another thread is updating it. We still use srcu_read_lock/unlock around the read side so that when changing the routing table we can be sure that after calling synchronize_srcu, nothing will be using the old routing. Signed-off-by: Paul Mackerras Tested-by: Eric Auger Tested-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- virt/kvm/eventfd.c | 41 +++++++++++++++++++++++++---------------- 1 file changed, 25 insertions(+), 16 deletions(-) diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 20c3af7..bae593a 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "iodev.h" @@ -75,7 +76,8 @@ struct _irqfd { struct kvm *kvm; wait_queue_t wait; /* Update side is protected by irqfds.lock */ - struct kvm_kernel_irq_routing_entry __rcu *irq_entry; + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; /* Used for level IRQ fast-path */ int gsi; struct work_struct inject; @@ -223,16 +225,20 @@ irqfd_wakeup(wait_queue_t *wait, unsigned mode, int sync, void *key) { struct _irqfd *irqfd = container_of(wait, struct _irqfd, wait); unsigned long flags = (unsigned long)key; - struct kvm_kernel_irq_routing_entry *irq; + struct kvm_kernel_irq_routing_entry irq; struct kvm *kvm = irqfd->kvm; + unsigned seq; int idx; if (flags & POLLIN) { idx = srcu_read_lock(&kvm->irq_srcu); - irq = srcu_dereference(irqfd->irq_entry, &kvm->irq_srcu); + do { + seq = read_seqcount_begin(&irqfd->irq_entry_sc); + irq = irqfd->irq_entry; + } while (read_seqcount_retry(&irqfd->irq_entry_sc, seq)); /* An event has been signaled, inject an interrupt */ - if (irq) - kvm_set_msi(irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, + if (irq.type == KVM_IRQ_ROUTING_MSI) + kvm_set_msi(&irq, kvm, KVM_USERSPACE_IRQ_SOURCE_ID, 1, false); else schedule_work(&irqfd->inject); @@ -277,18 +283,20 @@ static void irqfd_update(struct kvm *kvm, struct _irqfd *irqfd, { struct kvm_kernel_irq_routing_entry *e; - if (irqfd->gsi >= irq_rt->nr_rt_entries) { - rcu_assign_pointer(irqfd->irq_entry, NULL); - return; - } + write_seqcount_begin(&irqfd->irq_entry_sc); + + irqfd->irq_entry.type = 0; + if (irqfd->gsi >= irq_rt->nr_rt_entries) + goto out; hlist_for_each_entry(e, &irq_rt->map[irqfd->gsi], link) { /* Only fast-path MSI. */ if (e->type == KVM_IRQ_ROUTING_MSI) - rcu_assign_pointer(irqfd->irq_entry, e); - else - rcu_assign_pointer(irqfd->irq_entry, NULL); + irqfd->irq_entry = *e; } + + out: + write_seqcount_end(&irqfd->irq_entry_sc); } static int @@ -310,6 +318,7 @@ kvm_irqfd_assign(struct kvm *kvm, struct kvm_irqfd *args) INIT_LIST_HEAD(&irqfd->list); INIT_WORK(&irqfd->inject, irqfd_inject); INIT_WORK(&irqfd->shutdown, irqfd_shutdown); + seqcount_init(&irqfd->irq_entry_sc); f = fdget(args->fd); if (!f.file) { @@ -466,14 +475,14 @@ kvm_irqfd_deassign(struct kvm *kvm, struct kvm_irqfd *args) list_for_each_entry_safe(irqfd, tmp, &kvm->irqfds.items, list) { if (irqfd->eventfd == eventfd && irqfd->gsi == args->gsi) { /* - * This rcu_assign_pointer is needed for when + * This clearing of irq_entry.type is needed for when * another thread calls kvm_irq_routing_update before * we flush workqueue below (we synchronize with * kvm_irq_routing_update using irqfds.lock). - * It is paired with synchronize_srcu done by caller - * of that function. */ - rcu_assign_pointer(irqfd->irq_entry, NULL); + write_seqcount_begin(&irqfd->irq_entry_sc); + irqfd->irq_entry.type = 0; + write_seqcount_end(&irqfd->irq_entry_sc); irqfd_deactivate(irqfd); } } -- 1.7.10.4