Browse Source

KVM updates for the 3.6 merge window

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.12 (GNU/Linux)
 
 iQIcBAABAgAGBQJQDRDNAAoJEI7yEDeUysxlkl8P/3C2AHx2webOU8sVzhfU6ONZ
 ZoGevwBjyZIeJEmiWVpFTTEew1l0PXtpyOocXGNUXIddVnhXTQOKr/Scj4uFbmx8
 ROqgK8NSX9+xOGrBPCoN7SlJkmp+m6uYtwYkl2SGnsEVLWMKkc7J7oqmszCcTQvN
 UXMf7G47/Ul2NUSBdv4Yvizhl4kpvWxluiweDw3E/hIQKN0uyP7CY58qcAztw8nG
 csZBAnnuPFwIAWxHXW3eBBv4UP138HbNDqJ/dujjocM6GnOxmXJmcZ6b57gh+Y64
 3+w9IR4qrRWnsErb/I8inKLJ1Jdcf7yV2FmxYqR4pIXay2Yzo1BsvFd6EB+JavUv
 pJpixrFiDDFoQyXlh4tGpsjpqdXNMLqyG4YpqzSZ46C8naVv9gKE7SXqlXnjyDlb
 Llx3hb9Fop8O5ykYEGHi+gIISAK5eETiQl4yw9RUBDpxydH4qJtqGIbLiDy8y9wi
 Xyi8PBlNl+biJFsK805lxURqTp/SJTC3+Zb7A7CzYEQm5xZw3W/CKZx1ZYBfpaa/
 pWaP6tB7JwgLIVXi4HQayLWqMVwH0soZIn9yazpOEFv6qO8d5QH5RAxAW2VXE3n5
 JDlrajar/lGIdiBVWfwTJLb86gv3QDZtIWoR9mZuLKeKWE/6PRLe7HQpG1pJovsm
 2AsN5bS0BWq+aqPpZHa5
 =pECD
 -----END PGP SIGNATURE-----

Merge tag 'kvm-3.6-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull KVM updates from Avi Kivity:
 "Highlights include
  - full big real mode emulation on pre-Westmere Intel hosts (can be
    disabled with emulate_invalid_guest_state=0)
  - relatively small ppc and s390 updates
  - PCID/INVPCID support in guests
  - EOI avoidance; 3.6 guests should perform better on 3.6 hosts on
    interrupt intensive workloads)
  - Lockless write faults during live migration
  - EPT accessed/dirty bits support for new Intel processors"

Fix up conflicts in:
 - Documentation/virtual/kvm/api.txt:

   Stupid subchapter numbering, added next to each other.

 - arch/powerpc/kvm/booke_interrupts.S:

   PPC asm changes clashing with the KVM fixes

 - arch/s390/include/asm/sigp.h, arch/s390/kvm/sigp.c:

   Duplicated commits through the kvm tree and the s390 tree, with
   subsequent edits in the KVM tree.

* tag 'kvm-3.6-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (93 commits)
  KVM: fix race with level interrupts
  x86, hyper: fix build with !CONFIG_KVM_GUEST
  Revert "apic: fix kvm build on UP without IOAPIC"
  KVM guest: switch to apic_set_eoi_write, apic_write
  apic: add apic_set_eoi_write for PV use
  KVM: VMX: Implement PCID/INVPCID for guests with EPT
  KVM: Add x86_hyper_kvm to complete detect_hypervisor_platform check
  KVM: PPC: Critical interrupt emulation support
  KVM: PPC: e500mc: Fix tlbilx emulation for 64-bit guests
  KVM: PPC64: booke: Set interrupt computation mode for 64-bit host
  KVM: PPC: bookehv: Add ESR flag to Data Storage Interrupt
  KVM: PPC: bookehv64: Add support for std/ld emulation.
  booke: Added crit/mc exception handler for e500v2
  booke/bookehv: Add host crit-watchdog exception support
  KVM: MMU: document mmu-lock and fast page fault
  KVM: MMU: fix kvm_mmu_pagetable_walk tracepoint
  KVM: MMU: trace fast page fault
  KVM: MMU: fast path of handling guest page fault
  KVM: MMU: introduce SPTE_MMU_WRITEABLE bit
  KVM: MMU: fold tlb flush judgement into mmu_spte_update
  ...
master
Linus Torvalds 9 years ago
parent
commit
5fecc9d8f5
  1. 34
      Documentation/virtual/kvm/api.txt
  2. 130
      Documentation/virtual/kvm/locking.txt
  3. 33
      Documentation/virtual/kvm/msr.txt
  4. 2
      Documentation/virtual/kvm/ppc-pv.txt
  5. 2
      MAINTAINERS
  6. 1
      arch/ia64/include/asm/kvm.h
  7. 1
      arch/ia64/kvm/Kconfig
  8. 2
      arch/powerpc/include/asm/epapr_hcalls.h
  9. 2
      arch/powerpc/include/asm/hw_irq.h
  10. 7
      arch/powerpc/include/asm/kvm_book3s_64.h
  11. 6
      arch/powerpc/include/asm/kvm_host.h
  12. 3
      arch/powerpc/include/asm/kvm_ppc.h
  13. 1
      arch/powerpc/kernel/Makefile
  14. 25
      arch/powerpc/kernel/epapr_hcalls.S
  15. 52
      arch/powerpc/kernel/epapr_paravirt.c
  16. 28
      arch/powerpc/kernel/kvm.c
  17. 12
      arch/powerpc/kernel/kvm_emul.S
  18. 123
      arch/powerpc/kvm/book3s_64_mmu_hv.c
  19. 40
      arch/powerpc/kvm/book3s_hv.c
  20. 5
      arch/powerpc/kvm/book3s_hv_builtin.c
  21. 15
      arch/powerpc/kvm/book3s_hv_rm_mmu.c
  22. 26
      arch/powerpc/kvm/booke.c
  23. 28
      arch/powerpc/kvm/booke_emulate.c
  24. 55
      arch/powerpc/kvm/booke_interrupts.S
  25. 2
      arch/powerpc/kvm/bookehv_interrupts.S
  26. 3
      arch/powerpc/kvm/e500_emulate.c
  27. 8
      arch/powerpc/kvm/e500mc.c
  28. 16
      arch/powerpc/kvm/emulate.c
  29. 18
      arch/powerpc/kvm/powerpc.c
  30. 9
      arch/powerpc/platforms/Kconfig
  31. 2
      arch/s390/include/asm/sclp.h
  32. 1
      arch/s390/include/asm/sigp.h
  33. 12
      arch/s390/kernel/setup.c
  34. 1
      arch/s390/kvm/kvm-s390.c
  35. 77
      arch/s390/kvm/sigp.c
  36. 3
      arch/x86/include/asm/apic.h
  37. 7
      arch/x86/include/asm/bitops.h
  38. 1
      arch/x86/include/asm/hypervisor.h
  39. 1
      arch/x86/include/asm/kvm.h
  40. 6
      arch/x86/include/asm/kvm_emulate.h
  41. 31
      arch/x86/include/asm/kvm_host.h
  42. 7
      arch/x86/include/asm/kvm_para.h
  43. 2
      arch/x86/include/asm/processor-flags.h
  44. 6
      arch/x86/include/asm/vmx.h
  45. 17
      arch/x86/kernel/apic/apic.c
  46. 3
      arch/x86/kernel/cpu/hypervisor.c
  47. 64
      arch/x86/kernel/kvm.c
  48. 46
      arch/x86/kvm/cpuid.c
  49. 9
      arch/x86/kvm/cpuid.h
  50. 273
      arch/x86/kvm/emulate.c
  51. 17
      arch/x86/kvm/i8259.c
  52. 194
      arch/x86/kvm/lapic.c
  53. 11
      arch/x86/kvm/lapic.h
  54. 359
      arch/x86/kvm/mmu.c
  55. 45
      arch/x86/kvm/mmutrace.h
  56. 3
      arch/x86/kvm/paging_tmpl.h
  57. 12
      arch/x86/kvm/svm.c
  58. 34
      arch/x86/kvm/trace.h
  59. 189
      arch/x86/kvm/vmx.c
  60. 123
      arch/x86/kvm/x86.c
  61. 10
      drivers/s390/char/sclp.c
  62. 10
      drivers/s390/char/sclp.h
  63. 38
      drivers/s390/char/sclp_cmd.c
  64. 3
      drivers/s390/kvm/kvm_virtio.c
  65. 3
      include/linux/kvm.h
  66. 27
      include/linux/kvm_host.h
  67. 7
      include/trace/events/kvm.h
  68. 19
      virt/kvm/ioapic.c
  69. 4
      virt/kvm/ioapic.h
  70. 31
      virt/kvm/irq_comm.c
  71. 36
      virt/kvm/kvm_main.c

34
Documentation/virtual/kvm/api.txt

@ -1946,6 +1946,40 @@ the guest using the specified gsi pin. The irqfd is removed using
the KVM_IRQFD_FLAG_DEASSIGN flag, specifying both kvm_irqfd.fd
and kvm_irqfd.gsi.
4.76 KVM_PPC_ALLOCATE_HTAB
Capability: KVM_CAP_PPC_ALLOC_HTAB
Architectures: powerpc
Type: vm ioctl
Parameters: Pointer to u32 containing hash table order (in/out)
Returns: 0 on success, -1 on error
This requests the host kernel to allocate an MMU hash table for a
guest using the PAPR paravirtualization interface. This only does
anything if the kernel is configured to use the Book 3S HV style of
virtualization. Otherwise the capability doesn't exist and the ioctl
returns an ENOTTY error. The rest of this description assumes Book 3S
HV.
There must be no vcpus running when this ioctl is called; if there
are, it will do nothing and return an EBUSY error.
The parameter is a pointer to a 32-bit unsigned integer variable
containing the order (log base 2) of the desired size of the hash
table, which must be between 18 and 46. On successful return from the
ioctl, it will have been updated with the order of the hash table that
was allocated.
If no hash table has been allocated when any vcpu is asked to run
(with the KVM_RUN ioctl), the host kernel will allocate a
default-sized hash table (16 MB).
If this ioctl is called when a hash table has already been allocated,
the kernel will clear out the existing hash table (zero all HPTEs) and
return the hash table order in the parameter. (If the guest is using
the virtualized real-mode area (VRMA) facility, the kernel will
re-create the VMRA HPTEs on the next KVM_RUN of any vcpu.)
5. The kvm_run structure
------------------------

130
Documentation/virtual/kvm/locking.txt

@ -6,7 +6,129 @@ KVM Lock Overview
(to be written)
2. Reference
2: Exception
------------
Fast page fault:
Fast page fault is the fast path which fixes the guest page fault out of
the mmu-lock on x86. Currently, the page fault can be fast only if the
shadow page table is present and it is caused by write-protect, that means
we just need change the W bit of the spte.
What we use to avoid all the race is the SPTE_HOST_WRITEABLE bit and
SPTE_MMU_WRITEABLE bit on the spte:
- SPTE_HOST_WRITEABLE means the gfn is writable on host.
- SPTE_MMU_WRITEABLE means the gfn is writable on mmu. The bit is set when
the gfn is writable on guest mmu and it is not write-protected by shadow
page write-protection.
On fast page fault path, we will use cmpxchg to atomically set the spte W
bit if spte.SPTE_HOST_WRITEABLE = 1 and spte.SPTE_WRITE_PROTECT = 1, this
is safe because whenever changing these bits can be detected by cmpxchg.
But we need carefully check these cases:
1): The mapping from gfn to pfn
The mapping from gfn to pfn may be changed since we can only ensure the pfn
is not changed during cmpxchg. This is a ABA problem, for example, below case
will happen:
At the beginning:
gpte = gfn1
gfn1 is mapped to pfn1 on host
spte is the shadow page table entry corresponding with gpte and
spte = pfn1
VCPU 0 VCPU0
on fast page fault path:
old_spte = *spte;
pfn1 is swapped out:
spte = 0;
pfn1 is re-alloced for gfn2.
gpte is changed to point to
gfn2 by the guest:
spte = pfn1;
if (cmpxchg(spte, old_spte, old_spte+W)
mark_page_dirty(vcpu->kvm, gfn1)
OOPS!!!
We dirty-log for gfn1, that means gfn2 is lost in dirty-bitmap.
For direct sp, we can easily avoid it since the spte of direct sp is fixed
to gfn. For indirect sp, before we do cmpxchg, we call gfn_to_pfn_atomic()
to pin gfn to pfn, because after gfn_to_pfn_atomic():
- We have held the refcount of pfn that means the pfn can not be freed and
be reused for another gfn.
- The pfn is writable that means it can not be shared between different gfns
by KSM.
Then, we can ensure the dirty bitmaps is correctly set for a gfn.
Currently, to simplify the whole things, we disable fast page fault for
indirect shadow page.
2): Dirty bit tracking
In the origin code, the spte can be fast updated (non-atomically) if the
spte is read-only and the Accessed bit has already been set since the
Accessed bit and Dirty bit can not be lost.
But it is not true after fast page fault since the spte can be marked
writable between reading spte and updating spte. Like below case:
At the beginning:
spte.W = 0
spte.Accessed = 1
VCPU 0 VCPU0
In mmu_spte_clear_track_bits():
old_spte = *spte;
/* 'if' condition is satisfied. */
if (old_spte.Accssed == 1 &&
old_spte.W == 0)
spte = 0ull;
on fast page fault path:
spte.W = 1
memory write on the spte:
spte.Dirty = 1
else
old_spte = xchg(spte, 0ull)
if (old_spte.Accssed == 1)
kvm_set_pfn_accessed(spte.pfn);
if (old_spte.Dirty == 1)
kvm_set_pfn_dirty(spte.pfn);
OOPS!!!
The Dirty bit is lost in this case.
In order to avoid this kind of issue, we always treat the spte as "volatile"
if it can be updated out of mmu-lock, see spte_has_volatile_bits(), it means,
the spte is always atomicly updated in this case.
3): flush tlbs due to spte updated
If the spte is updated from writable to readonly, we should flush all TLBs,
otherwise rmap_write_protect will find a read-only spte, even though the
writable spte might be cached on a CPU's TLB.
As mentioned before, the spte can be updated to writable out of mmu-lock on
fast page fault path, in order to easily audit the path, we see if TLBs need
be flushed caused by this reason in mmu_spte_update() since this is a common
function to update spte (present -> present).
Since the spte is "volatile" if it can be updated out of mmu-lock, we always
atomicly update the spte, the race caused by fast page fault can be avoided,
See the comments in spte_has_volatile_bits() and mmu_spte_update().
3. Reference
------------
Name: kvm_lock
@ -23,3 +145,9 @@ Arch: x86
Protects: - kvm_arch::{last_tsc_write,last_tsc_nsec,last_tsc_offset}
- tsc offset in vmcb
Comment: 'raw' because updating the tsc offsets must not be preempted.
Name: kvm->mmu_lock
Type: spinlock_t
Arch: any
Protects: -shadow page/shadow tlb entry
Comment: it is a spinlock since it is used in mmu notifier.

33
Documentation/virtual/kvm/msr.txt

@ -223,3 +223,36 @@ MSR_KVM_STEAL_TIME: 0x4b564d03
steal: the amount of time in which this vCPU did not run, in
nanoseconds. Time during which the vcpu is idle, will not be
reported as steal time.
MSR_KVM_EOI_EN: 0x4b564d04
data: Bit 0 is 1 when PV end of interrupt is enabled on the vcpu; 0
when disabled. Bit 1 is reserved and must be zero. When PV end of
interrupt is enabled (bit 0 set), bits 63-2 hold a 4-byte aligned
physical address of a 4 byte memory area which must be in guest RAM and
must be zeroed.
The first, least significant bit of 4 byte memory location will be
written to by the hypervisor, typically at the time of interrupt
injection. Value of 1 means that guest can skip writing EOI to the apic
(using MSR or MMIO write); instead, it is sufficient to signal
EOI by clearing the bit in guest memory - this location will
later be polled by the hypervisor.
Value of 0 means that the EOI write is required.
It is always safe for the guest to ignore the optimization and perform
the APIC EOI write anyway.
Hypervisor is guaranteed to only modify this least
significant bit while in the current VCPU context, this means that
guest does not need to use either lock prefix or memory ordering
primitives to synchronise with the hypervisor.
However, hypervisor can set and clear this memory bit at any time:
therefore to make sure hypervisor does not interrupt the
guest and clear the least significant bit in the memory area
in the window between guest testing it to detect
whether it can skip EOI apic write and between guest
clearing it to signal EOI to the hypervisor,
guest must both read the least significant bit in the memory area and
clear it using a single CPU instruction, such as test and clear, or
compare and exchange.

2
Documentation/virtual/kvm/ppc-pv.txt

@ -109,8 +109,6 @@ The following bits are safe to be set inside the guest:
MSR_EE
MSR_RI
MSR_CR
MSR_ME
If any other bit changes in the MSR, please still use mtmsr(d).

2
MAINTAINERS

@ -4002,8 +4002,8 @@ F: arch/ia64/include/asm/kvm*
F: arch/ia64/kvm/
KERNEL VIRTUAL MACHINE for s390 (KVM/s390)
M: Carsten Otte <cotte@de.ibm.com>
M: Christian Borntraeger <borntraeger@de.ibm.com>
M: Cornelia Huck <cornelia.huck@de.ibm.com>
M: linux390@de.ibm.com
L: linux-s390@vger.kernel.org
W: http://www.ibm.com/developerworks/linux/linux390/

1
arch/ia64/include/asm/kvm.h

@ -26,6 +26,7 @@
/* Select x86 specific features in <linux/kvm.h> */
#define __KVM_HAVE_IOAPIC
#define __KVM_HAVE_IRQ_LINE
#define __KVM_HAVE_DEVICE_ASSIGNMENT
/* Architectural interrupt line count. */

1
arch/ia64/kvm/Kconfig

@ -19,6 +19,7 @@ if VIRTUALIZATION
config KVM
tristate "Kernel-based Virtual Machine (KVM) support"
depends on BROKEN
depends on HAVE_KVM && MODULES && EXPERIMENTAL
# for device assignment:
depends on PCI

2
arch/powerpc/include/asm/epapr_hcalls.h

@ -153,6 +153,8 @@
#define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5"
#define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4"
extern bool epapr_paravirt_enabled;
extern u32 epapr_hypercall_start[];
/*
* We use "uintptr_t" to define a register because it's guaranteed to be a

2
arch/powerpc/include/asm/hw_irq.h

@ -34,6 +34,8 @@ extern void __replay_interrupt(unsigned int vector);
extern void timer_interrupt(struct pt_regs *);
extern void performance_monitor_exception(struct pt_regs *regs);
extern void WatchdogException(struct pt_regs *regs);
extern void unknown_exception(struct pt_regs *regs);
#ifdef CONFIG_PPC64
#include <asm/paca.h>

7
arch/powerpc/include/asm/kvm_book3s_64.h

@ -36,11 +36,8 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu)
#define SPAPR_TCE_SHIFT 12
#ifdef CONFIG_KVM_BOOK3S_64_HV
/* For now use fixed-size 16MB page table */
#define HPT_ORDER 24
#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */
#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */
#define HPT_HASH_MASK (HPT_NPTEG - 1)
#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */
extern int kvm_hpt_order; /* order of preallocated HPTs */
#endif
#define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */

6
arch/powerpc/include/asm/kvm_host.h

@ -237,6 +237,10 @@ struct kvm_arch {
unsigned long vrma_slb_v;
int rma_setup_done;
int using_mmu_notifiers;
u32 hpt_order;
atomic_t vcpus_running;
unsigned long hpt_npte;
unsigned long hpt_mask;
spinlock_t slot_phys_lock;
unsigned long *slot_phys[KVM_MEM_SLOTS_NUM];
int slot_npages[KVM_MEM_SLOTS_NUM];
@ -414,7 +418,9 @@ struct kvm_vcpu_arch {
ulong mcsrr1;
ulong mcsr;
u32 dec;
#ifdef CONFIG_BOOKE
u32 decar;
#endif
u32 tbl;
u32 tbu;
u32 tcr;

3
arch/powerpc/include/asm/kvm_ppc.h

@ -119,7 +119,8 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu);
extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu);
extern void kvmppc_map_magic(struct kvm_vcpu *vcpu);
extern long kvmppc_alloc_hpt(struct kvm *kvm);
extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp);
extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp);
extern void kvmppc_free_hpt(struct kvm *kvm);
extern long kvmppc_prepare_vrma(struct kvm *kvm,
struct kvm_userspace_memory_region *mem);

1
arch/powerpc/kernel/Makefile

@ -128,6 +128,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
obj-y += ppc_save_regs.o
endif
obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o
obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o
# Disable GCOV in odd or sensitive code

25
arch/powerpc/kernel/epapr_hcalls.S

@ -0,0 +1,25 @@
/*
* Copyright (C) 2012 Freescale Semiconductor, Inc.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <linux/threads.h>
#include <asm/reg.h>
#include <asm/page.h>
#include <asm/cputable.h>
#include <asm/thread_info.h>
#include <asm/ppc_asm.h>
#include <asm/asm-offsets.h>
/* Hypercall entry point. Will be patched with device tree instructions. */
.global epapr_hypercall_start
epapr_hypercall_start:
li r3, -1
nop
nop
nop
blr

52
arch/powerpc/kernel/epapr_paravirt.c

@ -0,0 +1,52 @@
/*
* ePAPR para-virtualization support.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License, version 2, as
* published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*
* Copyright (C) 2012 Freescale Semiconductor, Inc.
*/
#include <linux/of.h>
#include <asm/epapr_hcalls.h>
#include <asm/cacheflush.h>
#include <asm/code-patching.h>
bool epapr_paravirt_enabled;
static int __init epapr_paravirt_init(void)
{
struct device_node *hyper_node;
const u32 *insts;
int len, i;
hyper_node = of_find_node_by_path("/hypervisor");
if (!hyper_node)
return -ENODEV;
insts = of_get_property(hyper_node, "hcall-instructions", &len);
if (!insts)
return -ENODEV;
if (len % 4 || len > (4 * 4))
return -ENODEV;
for (i = 0; i < (len / 4); i++)
patch_instruction(epapr_hypercall_start + i, insts[i]);
epapr_paravirt_enabled = true;
return 0;
}
early_initcall(epapr_paravirt_init);

28
arch/powerpc/kernel/kvm.c

@ -31,6 +31,7 @@
#include <asm/cacheflush.h>
#include <asm/disassemble.h>
#include <asm/ppc-opcode.h>
#include <asm/epapr_hcalls.h>
#define KVM_MAGIC_PAGE (-4096L)
#define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x)
@ -726,7 +727,7 @@ unsigned long kvm_hypercall(unsigned long *in,
unsigned long register r11 asm("r11") = nr;
unsigned long register r12 asm("r12");
asm volatile("bl kvm_hypercall_start"
asm volatile("bl epapr_hypercall_start"
: "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6),
"=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11),
"=r"(r12)
@ -747,29 +748,6 @@ unsigned long kvm_hypercall(unsigned long *in,
}
EXPORT_SYMBOL_GPL(kvm_hypercall);
static int kvm_para_setup(void)
{
extern u32 kvm_hypercall_start;
struct device_node *hyper_node;
u32 *insts;
int len, i;
hyper_node = of_find_node_by_path("/hypervisor");
if (!hyper_node)
return -1;
insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len);
if (len % 4)
return -1;
if (len > (4 * 4))
return -1;
for (i = 0; i < (len / 4); i++)
kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]);
return 0;
}
static __init void kvm_free_tmp(void)
{
unsigned long start, end;
@ -791,7 +769,7 @@ static int __init kvm_guest_init(void)
if (!kvm_para_available())
goto free_tmp;
if (kvm_para_setup())
if (!epapr_paravirt_enabled)
goto free_tmp;
if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE))

12
arch/powerpc/kernel/kvm_emul.S

@ -24,16 +24,6 @@
#include <asm/page.h>
#include <asm/asm-offsets.h>
/* Hypercall entry point. Will be patched with device tree instructions. */
.global kvm_hypercall_start
kvm_hypercall_start:
li r3, -1
nop
nop
nop
blr
#define KVM_MAGIC_PAGE (-4096)
#ifdef CONFIG_64BIT
@ -132,7 +122,7 @@ kvm_emulate_mtmsrd_len:
.long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4
#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI)
#define MSR_SAFE_BITS (MSR_EE | MSR_RI)
#define MSR_CRITICAL_BITS ~MSR_SAFE_BITS
.global kvm_emulate_mtmsr

123
arch/powerpc/kvm/book3s_64_mmu_hv.c

@ -37,56 +37,121 @@
/* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */
#define MAX_LPID_970 63
long kvmppc_alloc_hpt(struct kvm *kvm)
/* Power architecture requires HPT is at least 256kB */
#define PPC_MIN_HPT_ORDER 18
long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp)
{
unsigned long hpt;
long lpid;
struct revmap_entry *rev;
struct kvmppc_linear_info *li;
long order = kvm_hpt_order;
/* Allocate guest's hashed page table */
li = kvm_alloc_hpt();
if (li) {
/* using preallocated memory */
hpt = (ulong)li->base_virt;
kvm->arch.hpt_li = li;
} else {
/* using dynamic memory */
if (htab_orderp) {
order = *htab_orderp;
if (order < PPC_MIN_HPT_ORDER)
order = PPC_MIN_HPT_ORDER;
}
/*
* If the user wants a different size from default,
* try first to allocate it from the kernel page allocator.
*/
hpt = 0;
if (order != kvm_hpt_order) {
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
__GFP_NOWARN, HPT_ORDER - PAGE_SHIFT);
__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
--order;
}
/* Next try to allocate from the preallocated pool */
if (!hpt) {
pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n");
return -ENOMEM;
li = kvm_alloc_hpt();
if (li) {
hpt = (ulong)li->base_virt;
kvm->arch.hpt_li = li;
order = kvm_hpt_order;
}
}
/* Lastly try successively smaller sizes from the page allocator */
while (!hpt && order > PPC_MIN_HPT_ORDER) {
hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT|
__GFP_NOWARN, order - PAGE_SHIFT);
if (!hpt)
--order;
}
if (!hpt)
return -ENOMEM;
kvm->arch.hpt_virt = hpt;
kvm->arch.hpt_order = order;
/* HPTEs are 2**4 bytes long */
kvm->arch.hpt_npte = 1ul << (order - 4);
/* 128 (2**7) bytes in each HPTEG */
kvm->arch.hpt_mask = (1ul << (order - 7)) - 1;
/* Allocate reverse map array */
rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE);
rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte);
if (!rev) {
pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n");
goto out_freehpt;
}
kvm->arch.revmap = rev;
kvm->arch.sdr1 = __pa(hpt) | (order - 18);
lpid = kvmppc_alloc_lpid();
if (lpid < 0)
goto out_freeboth;
pr_info("KVM guest htab at %lx (order %ld), LPID %x\n",
hpt, order, kvm->arch.lpid);
kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18);
kvm->arch.lpid = lpid;
pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid);
if (htab_orderp)
*htab_orderp = order;
return 0;
out_freeboth:
vfree(rev);
out_freehpt:
free_pages(hpt, HPT_ORDER - PAGE_SHIFT);
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
else
free_pages(hpt, order - PAGE_SHIFT);
return -ENOMEM;
}
long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp)
{
long err = -EBUSY;
long order;
mutex_lock(&kvm->lock);
if (kvm->arch.rma_setup_done) {
kvm->arch.rma_setup_done = 0;
/* order rma_setup_done vs. vcpus_running */
smp_mb();
if (atomic_read(&kvm->arch.vcpus_running)) {
kvm->arch.rma_setup_done = 1;
goto out;
}
}
if (kvm->arch.hpt_virt) {
order = kvm->arch.hpt_order;
/* Set the entire HPT to 0, i.e. invalid HPTEs */
memset((void *)kvm->arch.hpt_virt, 0, 1ul << order);
/*
* Set the whole last_vcpu array to an invalid vcpu number.
* This ensures that each vcpu will flush its TLB on next entry.
*/
memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu));
*htab_orderp = order;
err = 0;
} else {
err = kvmppc_alloc_hpt(kvm, htab_orderp);
order = *htab_orderp;
}
out:
mutex_unlock(&kvm->lock);
return err;
}
void kvmppc_free_hpt(struct kvm *kvm)
{
kvmppc_free_lpid(kvm->arch.lpid);
@ -94,7 +159,8 @@ void kvmppc_free_hpt(struct kvm *kvm)
if (kvm->arch.hpt_li)
kvm_release_hpt(kvm->arch.hpt_li);
else
free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT);
free_pages(kvm->arch.hpt_virt,
kvm->arch.hpt_order - PAGE_SHIFT);
}
/* Bits in first HPTE dword for pagesize 4k, 64k or 16M */
@ -119,6 +185,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
unsigned long psize;
unsigned long hp0, hp1;
long ret;
struct kvm *kvm = vcpu->kvm;
psize = 1ul << porder;
npages = memslot->npages >> (porder - PAGE_SHIFT);
@ -127,8 +194,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
if (npages > 1ul << (40 - porder))
npages = 1ul << (40 - porder);
/* Can't use more than 1 HPTE per HPTEG */
if (npages > HPT_NPTEG)
npages = HPT_NPTEG;
if (npages > kvm->arch.hpt_mask + 1)
npages = kvm->arch.hpt_mask + 1;
hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) |
HPTE_V_BOLTED | hpte0_pgsize_encoding(psize);
@ -138,7 +205,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot,
for (i = 0; i < npages; ++i) {
addr = i << porder;
/* can't use hpt_hash since va > 64 bits */
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK;
hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask;
/*
* We assume that the hash table is empty and no
* vcpus are using it at this stage. Since we create

40
arch/powerpc/kvm/book3s_hv.c

@ -56,7 +56,7 @@
/* #define EXIT_DEBUG_INT */
static void kvmppc_end_cede(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu);
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu);
void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
{
@ -1104,11 +1104,15 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
return -EINTR;
}
/* On the first time here, set up VRMA or RMA */
atomic_inc(&vcpu->kvm->arch.vcpus_running);
/* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */
smp_mb();
/* On the first time here, set up HTAB and VRMA or RMA */
if (!vcpu->kvm->arch.rma_setup_done) {
r = kvmppc_hv_setup_rma(vcpu);
r = kvmppc_hv_setup_htab_rma(vcpu);
if (r)
return r;
goto out;
}
flush_fp_to_thread(current);
@ -1126,6 +1130,9 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu)
kvmppc_core_prepare_to_enter(vcpu);
}
} while (r == RESUME_GUEST);
out:
atomic_dec(&vcpu->kvm->arch.vcpus_running);
return r;
}
@ -1341,7 +1348,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm,
{
}
static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu)
{
int err = 0;
struct kvm *kvm = vcpu->kvm;
@ -1360,6 +1367,15 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
if (kvm->arch.rma_setup_done)
goto out; /* another vcpu beat us to it */
/* Allocate hashed page table (if not done already) and reset it */
if (!kvm->arch.hpt_virt) {
err = kvmppc_alloc_hpt(kvm, NULL);
if (err) {
pr_err("KVM: Couldn't alloc HPT\n");
goto out;
}
}
/* Look up the memslot for guest physical address 0 */
memslot = gfn_to_memslot(kvm, 0);
@ -1471,13 +1487,14 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu)
int kvmppc_core_init_vm(struct kvm *kvm)
{
long r;
unsigned long lpcr;
unsigned long lpcr, lpid;
/* Allocate hashed page table */
r = kvmppc_alloc_hpt(kvm);
if (r)
return r;
/* Allocate the guest's logical partition ID */
lpid = kvmppc_alloc_lpid();
if (lpid < 0)
return -ENOMEM;
kvm->arch.lpid = lpid;
INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables);
@ -1487,7 +1504,6 @@ int kvmppc_core_init_vm(struct kvm *kvm)
if (cpu_has_feature(CPU_FTR_ARCH_201)) {
/* PPC970; HID4 is effectively the LPCR */
unsigned long lpid = kvm->arch.lpid;
kvm->arch.host_lpid = 0;
kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4);
lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH));

5
arch/powerpc/kvm/book3s_hv_builtin.c

@ -25,6 +25,9 @@ static void __init kvm_linear_init_one(ulong size, int count, int type);
static struct kvmppc_linear_info *kvm_alloc_linear(int type);
static void kvm_release_linear(struct kvmppc_linear_info *ri);
int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER;
EXPORT_SYMBOL_GPL(kvm_hpt_order);
/*************** RMA *************/
/*
@ -209,7 +212,7 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri)
void __init kvm_linear_init(void)
{
/* HPT */
kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT);
kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT);
/* RMA */
/* Only do this on PPC970 in HV mode */

15
arch/powerpc/kvm/book3s_hv_rm_mmu.c

@ -237,7 +237,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags,
/* Find and lock the HPTEG slot to use */
do_insert:
if (pte_index >= HPT_NPTE)
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
if (likely((flags & H_EXACT) == 0)) {
pte_index &= ~7UL;
@ -352,7 +352,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags,
unsigned long v, r, rb;
struct revmap_entry *rev;
if (pte_index >= HPT_NPTE)
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
while (!try_lock_hpte(hpte, HPTE_V_HVLOCK))
@ -419,7 +419,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu)
i = 4;
break;
}
if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) {
if (req != 1 || flags == 3 ||
pte_index >= kvm->arch.hpt_npte) {
/* parameter error */
args[j] = ((0xa0 | flags) << 56) + pte_index;
ret = H_PARAMETER;
@ -521,7 +522,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags,
struct revmap_entry *rev;
unsigned long v, r, rb, mask, bits;
if (pte_index >= HPT_NPTE)
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4));
@ -583,7 +584,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags,
int i, n = 1;
struct revmap_entry *rev = NULL;
if (pte_index >= HPT_NPTE)
if (pte_index >= kvm->arch.hpt_npte)
return H_PARAMETER;
if (flags & H_READ_4) {
pte_index &= ~3;
@ -678,7 +679,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
somask = (1UL << 28) - 1;
vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT;
}
hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK;
hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask;
avpn = slb_v & ~(somask >> 16); /* also includes B */
avpn |= (eaddr & somask) >> 16;
@ -723,7 +724,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v,
if (val & HPTE_V_SECONDARY)
break;
val |= HPTE_V_SECONDARY;
hash = hash ^ HPT_HASH_MASK;
hash = hash ^ kvm->arch.hpt_mask;
}
return -1;
}

26
arch/powerpc/kvm/booke.c

@ -612,6 +612,12 @@ static void kvmppc_fill_pt_regs(struct pt_regs *regs)
regs->link = lr;
}
/*
* For interrupts needed to be handled by host interrupt handlers,
* corresponding host handler are called from here in similar way
* (but not exact) as they are called from low level handler
* (such as from arch/powerpc/kernel/head_fsl_booke.S).
*/
static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
unsigned int exit_nr)
{
@ -639,6 +645,17 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu,
kvmppc_fill_pt_regs(&regs);
performance_monitor_exception(&regs);
break;
case BOOKE_INTERRUPT_WATCHDOG:
kvmppc_fill_pt_regs(&regs);
#ifdef CONFIG_BOOKE_WDT
WatchdogException(&regs);
#else
unknown_exception(&regs);
#endif
break;
case BOOKE_INTERRUPT_CRITICAL:
unknown_exception(&regs);
break;
}
}
@ -683,6 +700,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu,
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_WATCHDOG:
r = RESUME_GUEST;
break;
case BOOKE_INTERRUPT_DOORBELL:
kvmppc_account_exit(vcpu, DBELL_EXITS);
r = RESUME_GUEST;
@ -1267,6 +1288,11 @@ void kvmppc_decrementer_func(unsigned long data)
{
struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data;
if (vcpu->arch.tcr & TCR_ARE) {
vcpu->arch.dec = vcpu->arch.decar;
kvmppc_emulate_dec(vcpu);
}
kvmppc_set_tsr_bits(vcpu, TSR_DIS);
}

28
arch/powerpc/kvm/booke_emulate.c

@ -24,6 +24,7 @@
#include "booke.h"
#define OP_19_XOP_RFI 50
#define OP_19_XOP_RFCI 51
#define OP_31_XOP_MFMSR 83
#define OP_31_XOP_WRTEE 131
@ -36,6 +37,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu)
kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1);
}
static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu)
{
vcpu->arch.pc = vcpu->arch.csrr0;
kvmppc_set_msr(vcpu, vcpu->arch.csrr1);
}
int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
unsigned int inst, int *advance)
{
@ -52,6 +59,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu,
*advance = 0;
break;
case OP_19_XOP_RFCI:
kvmppc_emul_rfci(vcpu);
kvmppc_set_exit_type(vcpu, EMULATED_RFCI_EXITS);
*advance = 0;
break;
default:
emulated = EMULATE_FAIL;
break;
@ -113,6 +126,12 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
case SPRN_ESR:
vcpu->arch.shared->esr = spr_val;
break;
case SPRN_CSRR0:
vcpu->arch.csrr0 = spr_val;
break;
case SPRN_CSRR1:
vcpu->arch.csrr1 = spr_val;
break;
case SPRN_DBCR0:
vcpu->arch.dbcr0 = spr_val;
break;
@ -129,6 +148,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val)
kvmppc_set_tcr(vcpu, spr_val);
break;
case SPRN_DECAR:
vcpu->arch.decar = spr_val;
break;
/*
* Note: SPRG4-7 are user-readable.
* These values are loaded into the real SPRGs when resuming the
@ -229,6 +251,12 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
case SPRN_ESR:
*spr_val = vcpu->arch.shared->esr;
break;
case SPRN_CSRR0:
*spr_val = vcpu->arch.csrr0;
break;
case SPRN_CSRR1:
*spr_val = vcpu->arch.csrr1;
break;
case SPRN_DBCR0:
*spr_val = vcpu->arch.dbcr0;
break;

55
arch/powerpc/kvm/booke_interrupts.S

@ -52,16 +52,21 @@
(1<<BOOKE_INTERRUPT_PROGRAM) | \
(1<<BOOKE_INTERRUPT_DTLB_MISS))
.macro KVM_HANDLER ivor_nr
.macro KVM_HANDLER ivor_nr scratch srr0
_GLOBAL(kvmppc_handler_\ivor_nr)
/* Get pointer to vcpu and record exit number. */
mtspr SPRN_SPRG_WSCRATCH0, r4
mtspr \scratch , r4
mfspr r4, SPRN_SPRG_RVCPU
stw r3, VCPU_GPR(R3)(r4)
stw r5, VCPU_GPR(R5)(r4)
stw r6, VCPU_GPR(R6)(r4)
mfspr r3, \scratch
mfctr r5
lis r6, kvmppc_resume_host@h
stw r3, VCPU_GPR(R4)(r4)
stw r5, VCPU_CTR(r4)
mfspr r3, \srr0
lis r6, kvmppc_resume_host@h
stw r3, VCPU_PC(r4)
li r5, \ivor_nr
ori r6, r6, kvmppc_resume_host@l
mtctr r6
@ -69,37 +74,35 @@ _GLOBAL(kvmppc_handler_\ivor_nr)
.endm
_GLOBAL(kvmppc_handlers_start)
KVM_HANDLER BOOKE_INTERRUPT_CRITICAL
KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK
KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE
KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE
KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL
KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT
KVM_HANDLER BOOKE_INTERRUPT_PROGRAM
KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL
KVM_HANDLER BOOKE_INTERRUPT_SYSCALL
KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL
KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER
KVM_HANDLER BOOKE_INTERRUPT_FIT
KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG
KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS
KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS
KVM_HANDLER BOOKE_INTERRUPT_DEBUG
KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND
KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0
KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_PROGRAM SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_SYSCALL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0
KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0
_GLOBAL(kvmppc_handler_len)
.long kvmppc_handler_1 - kvmppc_handler_0
/* Registers:
* SPRG_SCRATCH0: guest r4
* r4: vcpu pointer
* r5: KVM exit number
*/
_GLOBAL(kvmppc_resume_host)
stw r3, VCPU_GPR(R3)(r4)
mfcr r3
stw r3, VCPU_CR(r4)
stw r7, VCPU_GPR(R7)(r4)
@ -180,10 +183,6 @@ _GLOBAL(kvmppc_resume_host)
stw r3, VCPU_LR(r4)
mfxer r3
stw r3, VCPU_XER(r4)
mfspr r3, SPRN_SPRG_RSCRATCH0
stw r3, VCPU_GPR(R4)(r4)
mfspr r3, SPRN_SRR0
stw r3, VCPU_PC(r4)
/* Restore host stack pointer and PID before IVPR, since the host
* exception handlers use them. */

2
arch/powerpc/kvm/bookehv_interrupts.S

@ -262,7 +262,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \
kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \
SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0
kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR)
SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR)
kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR
kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0
kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \

3
arch/powerpc/kvm/e500_emulate.c

@ -269,6 +269,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val)
*spr_val = vcpu->arch.shared->mas7_3 >> 32;
break;
#endif
case SPRN_DECAR:
*spr_val = vcpu->arch.decar;
break;
case SPRN_TLB0CFG:
*spr_val = vcpu->arch.tlbcfg[0];
break;

8
arch/powerpc/kvm/e500mc.c

@ -1,5 +1,5 @@
/*
* Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved.
* Copyright (C) 2010,2012 Freescale Semiconductor, Inc. All rights reserved.
*
* Author: Varun Sethi, <varun.sethi@freescale.com>
*
@ -57,7 +57,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500,
struct kvm_book3e_206_tlb_entry *gtlbe)
{
unsigned int tid, ts;
u32 val, eaddr, lpid;
gva_t eaddr;
u32 val, lpid;
unsigned long flags;
ts = get_tlb_ts(gtlbe);
@ -183,6 +184,9 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu)
vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \
SPRN_EPCR_DUVD;
#ifdef CONFIG_64BIT
vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM;
#endif
vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP;
vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT);
vcpu->arch.epsc = vcpu->arch.eplc;

16
arch/powerpc/kvm/emulate.c

@ -59,11 +59,13 @@
#define OP_31_XOP_STHBRX 918
#define OP_LWZ 32
#define OP_LD 58
#define OP_LWZU 33
#define OP_LBZ 34
#define OP_LBZU 35
#define OP_STW 36
#define OP_STWU 37
#define OP_STD 62
#define OP_STB 38
#define OP_STBU 39
#define OP_LHZ 40
@ -392,6 +394,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
break;
/* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */
case OP_LD:
rt = get_rt(inst);
emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1);
break;
case OP_LWZU:
emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1);
kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed);
@ -412,6 +420,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu)
4, 1);
break;
/* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */
case OP_STD:
rs = get_rs(inst);
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),
8, 1);
break;
case OP_STWU:
emulated = kvmppc_handle_store(run, vcpu,
kvmppc_get_gpr(vcpu, rs),

18
arch/powerpc/kvm/powerpc.c

@ -246,6 +246,7 @@ int kvm_dev_ioctl_check_extension(long ext)
#endif
#ifdef CONFIG_PPC_BOOK3S_64
case KVM_CAP_SPAPR_TCE:
case KVM_CAP_PPC_ALLOC_HTAB:
r = 1;
break;
#endif /* CONFIG_PPC_BOOK3S_64 */
@ -802,6 +803,23 @@ long kvm_arch_vm_ioctl(struct file *filp,
r = -EFAULT;
break;
}
case KVM_PPC_ALLOCATE_HTAB: {
struct kvm *kvm = filp->private_data;
u32 htab_order;
r = -EFAULT;
if (get_user(htab_order, (u32 __user *)argp))
break;
r = kvmppc_alloc_reset_hpt(kvm, &htab_order);
if (r)
break;
r = -EFAULT;
if (put_user(htab_order, (u32 __user *)argp))
break;
r = 0;
break;
}
#endif /* CONFIG_KVM_BOOK3S_64_HV */
#ifdef CONFIG_PPC_BOOK3S_64

9
arch/powerpc/platforms/Kconfig

@ -25,6 +25,7 @@ source "arch/powerpc/platforms/wsp/Kconfig"
config KVM_GUEST
bool "KVM Guest support"
default n
select EPAPR_PARAVIRT
---help---
This option enables various optimizations for running under the KVM
hypervisor. Overhead for the kernel when not running inside KVM should
@ -32,6 +33,14 @@ config KVM_GUEST
In case of doubt, say Y
config EPAPR_PARAVIRT
bool "ePAPR para-virtualization support"
default n
help
Enables ePAPR para-virtualization support for guests.
In case of doubt, say Y
config PPC_NATIVE
bool
depends on 6xx || PPC64

2
arch/s390/include/asm/sclp.h

@ -53,5 +53,7 @@ int sclp_chp_configure(struct chp_id chpid);
int sclp_chp_deconfigure(struct chp_id chpid);
int sclp_chp_read_info(struct sclp_chp_info *info);
void sclp_get_ipl_info(struct sclp_ipl_info *info);
bool sclp_has_linemode(void);
bool sclp_has_vt220(void);
#endif /* _ASM_S390_SCLP_H */

1
arch/s390/include/asm/sigp.h

@ -24,6 +24,7 @@
#define SIGP_STATUS_CHECK_STOP 0x00000010UL
#define SIGP_STATUS_STOPPED 0x00000040UL
#define SIGP_STATUS_EXT_CALL_PENDING 0x00000080UL
#define SIGP_STATUS_INVALID_PARAMETER 0x00000100UL
#define SIGP_STATUS_INCORRECT_STATE 0x00000200UL
#define SIGP_STATUS_NOT_RUNNING 0x00000400UL

12
arch/s390/kernel/setup.c

@ -61,6 +61,7 @@
#include <asm/kvm_virtio.h>
#include <asm/diag.h>
#include <asm/os_info.h>
#include <asm/sclp.h>
#include "entry.h"
long psw_kernel_bits = PSW_DEFAULT_KEY | PSW_MASK_BASE | PSW_ASC_PRIMARY |
@ -136,9 +137,14 @@ __setup("condev=", condev_setup);
static void __init set_preferred_console(void)
{
if (MACHINE_IS_KVM)
add_preferred_console("hvc", 0, NULL);
else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
if (MACHINE_IS_KVM) {
if (sclp_has_vt220())
add_preferred_console("ttyS", 1, NULL);
else if (sclp_has_linemode())
add_preferred_console("ttyS", 0, NULL);
else
add_preferred_console("hvc", 0, NULL);
} else if (CONSOLE_IS_3215 || CONSOLE_IS_SCLP)
add_preferred_console("ttyS", 0, NULL);
else if (CONSOLE_IS_3270)
add_preferred_console("tty3270", 0, NULL);

1
arch/s390/kvm/kvm-s390.c

@ -347,6 +347,7 @@ static void kvm_s390_vcpu_initial_reset(struct kvm_vcpu *vcpu)
vcpu->arch.guest_fpregs.fpc = 0;
asm volatile("lfpc %0" : : "Q" (vcpu->arch.guest_fpregs.fpc));
vcpu->arch.sie_block->gbea = 1;
atomic_set_mask(CPUSTAT_STOPPED, &vcpu->arch.sie_block->cpuflags);
}
int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu)

77
arch/s390/kvm/sigp.c

@ -26,19 +26,23 @@ static int __sigp_sense(struct kvm_vcpu *vcpu, u16 cpu_addr,
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */
return SIGP_CC_NOT_OPERATIONAL;
spin_lock(&fi->lock);
if (fi->local_int[cpu_addr] == NULL)
rc = 3; /* not operational */
rc = SIGP_CC_NOT_OPERATIONAL;
else if (!(atomic_read(fi->local_int[cpu_addr]->cpuflags)
& CPUSTAT_STOPPED)) {
*reg &= 0xffffffff00000000UL;
rc = 1; /* status stored */
} else {
& (CPUSTAT_ECALL_PEND | CPUSTAT_STOPPED)))
rc = SIGP_CC_ORDER_CODE_ACCEPTED;
else {
*reg &= 0xffffffff00000000UL;
*reg |= SIGP_STATUS_STOPPED;
rc = 1; /* status stored */
if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
& CPUSTAT_ECALL_PEND)
*reg |= SIGP_STATUS_EXT_CALL_PENDING;
if (atomic_read(fi->local_int[cpu_addr]->cpuflags)
& CPUSTAT_STOPPED)
*reg |= SIGP_STATUS_STOPPED;
rc = SIGP_CC_STATUS_STORED;
}
spin_unlock(&fi->lock);
@ -54,7 +58,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
int rc;
if (cpu_addr >= KVM_MAX_VCPUS)
return 3; /* not operational */
return SIGP_CC_NOT_OPERATIONAL;
inti = kzalloc(sizeof(*inti), GFP_KERNEL);
if (!inti)
@ -66,7 +70,7 @@ static int __sigp_emergency(struct kvm_vcpu *vcpu, u16 cpu_addr)
spin_lock(&fi->lock);
li = fi->local_int[cpu_addr];
if (li == NULL) {
rc = 3; /* not operational */
rc = SIGP_CC_NOT_OPERATIONAL;
kfree(inti);