[x86] KVM: Add IOPL/CPL checks to emulator, to prevent privilege escalation within a guest. (CVE-2010-0298, CVE-2010-0306)

svn path=/dists/trunk/linux-2.6/; revision=15142
This commit is contained in:
Ben Hutchings 2010-02-11 02:16:55 +00:00
parent fa9f65f4b9
commit ffe78cf59f
6 changed files with 1046 additions and 0 deletions

2
debian/changelog vendored
View File

@ -14,6 +14,8 @@ linux-2.6 (2.6.32-8) UNRELEASED; urgency=low
- Remove TIF_ABI_PENDING bit from x86, sparc & powerpc, fixing
32-bit userland/64-bit kernel breakage (Closes: #568416)
- connector: Delete buggy notification code. (CVE-2010-0410)
* [x86] KVM: Add IOPL/CPL checks to emulator, to prevent privilege
escalation within a guest. (CVE-2010-0298, CVE-2010-0306)
[ Martin Michlmayr ]
* Implement power-off for D-Link DNS-323 rev B1 and fix the blinking

View File

@ -0,0 +1,256 @@
From 0a65a79a86558a413a417baddbb11062d10e69a6 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 10 Feb 2010 05:31:12 +0000
Subject: [PATCH 4/4] Check CPL level during privilege instruction emulation.
Add CPL checking in case emulator is tricked into emulating
privilege instruction.
[forward-ported by Ben Hutchings <ben@decadent.org.uk>]
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/kvm/emulate.c | 136 ++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 136 insertions(+), 0 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 4a6526f..836d3bc 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2133,6 +2133,14 @@ special_insn:
}
break;
case 0xf4: /* hlt */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
ctxt->vcpu->arch.halt_request = 1;
break;
case 0xf5: /* cmc */
@@ -2208,6 +2216,11 @@ twobyte_insn:
if (c->modrm_mod != 3 || c->modrm_rm != 1)
goto cannot_emulate;
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
rc = kvm_fix_hypercall(ctxt->vcpu);
if (rc)
goto done;
@@ -2218,6 +2231,16 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 2: /* lgdt */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
rc = read_descriptor(ctxt, ops, c->src.ptr,
&size, &address, c->op_bytes);
if (rc)
@@ -2230,6 +2253,10 @@ twobyte_insn:
if (c->modrm_mod == 3) {
switch (c->modrm_rm) {
case 1:
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
rc = kvm_fix_hypercall(ctxt->vcpu);
if (rc)
goto done;
@@ -2238,6 +2265,16 @@ twobyte_insn:
goto cannot_emulate;
}
} else {
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
rc = read_descriptor(ctxt, ops, c->src.ptr,
&size, &address,
c->op_bytes);
@@ -2253,11 +2290,26 @@ twobyte_insn:
c->dst.val = realmode_get_cr(ctxt->vcpu, 0);
break;
case 6: /* lmsw */
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
realmode_lmsw(ctxt->vcpu, (u16)c->src.val,
&ctxt->eflags);
c->dst.type = OP_NONE;
break;
case 7: /* invlpg*/
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
emulate_invlpg(ctxt->vcpu, memop);
/* Disable writeback. */
c->dst.type = OP_NONE;
@@ -2273,23 +2325,67 @@ twobyte_insn:
goto writeback;
break;
case 0x06:
+ if (c->lock_prefix) {
+ if (ctxt->mode == X86EMUL_MODE_REAL ||
+ !(ctxt->vcpu->arch.cr0 & X86_CR0_PE))
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ else
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
emulate_clts(ctxt->vcpu);
c->dst.type = OP_NONE;
break;
case 0x08: /* invd */
case 0x09: /* wbinvd */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
case 0x0d: /* GrpP (prefetch) */
case 0x18: /* Grp16 (prefetch/nop) */
c->dst.type = OP_NONE;
break;
case 0x20: /* mov cr, reg */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
if (c->modrm_mod != 3)
goto cannot_emulate;
+
c->regs[c->modrm_rm] =
realmode_get_cr(ctxt->vcpu, c->modrm_reg);
c->dst.type = OP_NONE; /* no writeback */
break;
case 0x21: /* mov from dr to reg */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
if (c->modrm_mod != 3)
goto cannot_emulate;
rc = emulator_get_dr(ctxt, c->modrm_reg, &c->regs[c->modrm_rm]);
@@ -2298,6 +2394,16 @@ twobyte_insn:
c->dst.type = OP_NONE; /* no writeback */
break;
case 0x22: /* mov reg, cr */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
if (c->modrm_mod != 3)
goto cannot_emulate;
realmode_set_cr(ctxt->vcpu,
@@ -2305,6 +2411,16 @@ twobyte_insn:
c->dst.type = OP_NONE;
break;
case 0x23: /* mov from reg to dr */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
if (c->modrm_mod != 3)
goto cannot_emulate;
rc = emulator_set_dr(ctxt, c->modrm_reg,
@@ -2315,6 +2431,16 @@ twobyte_insn:
break;
case 0x30:
/* wrmsr */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
msr_data = (u32)c->regs[VCPU_REGS_RAX]
| ((u64)c->regs[VCPU_REGS_RDX] << 32);
rc = kvm_set_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], msr_data);
@@ -2327,6 +2453,16 @@ twobyte_insn:
break;
case 0x32:
/* rdmsr */
+ if (c->lock_prefix) {
+ kvm_queue_exception(ctxt->vcpu, UD_VECTOR);
+ goto done;
+ }
+
+ if (kvm_x86_ops->get_cpl(ctxt->vcpu)) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ goto done;
+ }
+
rc = kvm_get_msr(ctxt->vcpu, c->regs[VCPU_REGS_RCX], &msr_data);
if (rc) {
kvm_inject_gp(ctxt->vcpu, 0);
--
1.6.6

View File

@ -0,0 +1,164 @@
From 79270591de89ee777f2293a1d02f46f6f3db03b3 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 10 Feb 2010 05:22:09 +0000
Subject: [PATCH 2/4] Check IOPL level during io instruction emulation.
Make emulator check that vcpu is allowed to execute IN, INS, OUT,
OUTS, CLI, STI.
[forward-ported by Ben Hutchings <ben@decadent.org.uk>]
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/include/asm/kvm_host.h | 1 +
arch/x86/kvm/emulate.c | 18 ++++++++---
arch/x86/kvm/x86.c | 65 +++++++++++++++++++++++++++++++++++---
3 files changed, 73 insertions(+), 11 deletions(-)
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index f483404..eb2531b 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -663,6 +663,7 @@ void kvm_disable_tdp(void);
int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
int complete_pio(struct kvm_vcpu *vcpu);
+bool kvm_check_iopl(struct kvm_vcpu *vcpu);
struct kvm_memory_slot *gfn_to_memslot_unaliased(struct kvm *kvm, gfn_t gfn);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 8f159f7..91f0eed 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -2098,13 +2098,21 @@ special_insn:
c->dst.type = OP_NONE; /* Disable writeback. */
break;
case 0xfa: /* cli */
- ctxt->eflags &= ~X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
+ if (kvm_check_iopl(ctxt->vcpu))
+ kvm_inject_gp(ctxt->vcpu, 0);
+ else {
+ ctxt->eflags &= ~X86_EFLAGS_IF;
+ c->dst.type = OP_NONE; /* Disable writeback. */
+ }
break;
case 0xfb: /* sti */
- toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
- ctxt->eflags |= X86_EFLAGS_IF;
- c->dst.type = OP_NONE; /* Disable writeback. */
+ if (kvm_check_iopl(ctxt->vcpu))
+ kvm_inject_gp(ctxt->vcpu, 0);
+ else {
+ toggle_interruptibility(ctxt, X86_SHADOW_INT_STI);
+ ctxt->eflags |= X86_EFLAGS_IF;
+ c->dst.type = OP_NONE; /* Disable writeback. */
+ }
break;
case 0xfc: /* cld */
ctxt->eflags &= ~EFLG_DF;
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 012dd8b..06f1b69 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -3075,11 +3075,60 @@ static int pio_string_write(struct kvm_vcpu *vcpu)
return r;
}
+bool kvm_check_iopl(struct kvm_vcpu *vcpu)
+{
+ int iopl;
+ if (!(vcpu->arch.cr0 & X86_CR0_PE))
+ return false;
+ if (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_VM)
+ return true;
+ iopl = (kvm_x86_ops->get_rflags(vcpu) & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ return kvm_x86_ops->get_cpl(vcpu) > iopl;
+}
+
+bool kvm_check_io_port_access_allowed(struct kvm_vcpu *vcpu, u16 port, u16 len)
+{
+ struct kvm_segment tr_seg;
+ int r;
+ u16 io_bitmap_ptr;
+ u8 perm, bit_idx = port & 0x7;
+ unsigned mask = (1 << len) - 1;
+
+ kvm_get_segment(vcpu, &tr_seg, VCPU_SREG_TR);
+ if (tr_seg.unusable)
+ return false;
+ if (tr_seg.limit < 103)
+ return false;
+ r = kvm_read_guest_virt_system(tr_seg.base + 102, &io_bitmap_ptr, 2,
+ vcpu, NULL);
+ if (r != X86EMUL_CONTINUE)
+ return false;
+ if (io_bitmap_ptr + port/8 >= tr_seg.limit)
+ return false;
+ r = kvm_read_guest_virt_system(tr_seg.base + io_bitmap_ptr + port/8,
+ &perm, 1, vcpu, NULL);
+ if (r != X86EMUL_CONTINUE)
+ return false;
+ if ((perm >> bit_idx) & mask)
+ return false;
+ return true;
+}
+
int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
int size, unsigned port)
{
unsigned long val;
+ trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+ size, 1);
+
+ if (kvm_check_iopl(vcpu)) {
+ if (!kvm_check_io_port_access_allowed(vcpu, port, size)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ }
+
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3091,9 +3140,6 @@ int kvm_emulate_pio(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.down = 0;
vcpu->arch.pio.rep = 0;
- trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
- size, 1);
-
val = kvm_register_read(vcpu, VCPU_REGS_RAX);
memcpy(vcpu->arch.pio_data, &val, 4);
@@ -3112,6 +3158,16 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
unsigned now, in_page;
int ret = 0;
+ trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
+ size, 1);
+
+ if (kvm_check_iopl(vcpu)) {
+ if (!kvm_check_io_port_access_allowed(vcpu, port, size)) {
+ kvm_inject_gp(vcpu, 0);
+ return 1;
+ }
+ }
+
vcpu->run->exit_reason = KVM_EXIT_IO;
vcpu->run->io.direction = in ? KVM_EXIT_IO_IN : KVM_EXIT_IO_OUT;
vcpu->run->io.size = vcpu->arch.pio.size = size;
@@ -3123,9 +3179,6 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
vcpu->arch.pio.down = down;
vcpu->arch.pio.rep = rep;
- trace_kvm_pio(vcpu->run->io.direction == KVM_EXIT_IO_OUT, port,
- size, count);
-
if (!count) {
kvm_x86_ops->skip_emulated_instruction(vcpu);
return 1;
--
1.6.6

View File

@ -0,0 +1,102 @@
From 7c17b3a2cd6787ef025762655827a9afe8c88d6b Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 10 Feb 2010 05:26:23 +0000
Subject: [PATCH 3/4] Fix popf emulation.
POPF behaves differently depending on current CPU mode. Emulate correct
logic to prevent guest from changing flags that it can't change
otherwise.
[forward-ported by Ben Hutchings <ben@decadent.org.uk>]
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/kvm/emulate.c | 54 +++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 53 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index 91f0eed..4a6526f 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -324,11 +324,18 @@ static u32 group2_table[] = {
};
/* EFLAGS bit definitions. */
+#define EFLG_ID (1<<21)
+#define EFLG_VIP (1<<20)
+#define EFLG_VIF (1<<19)
+#define EFLG_AC (1<<18)
#define EFLG_VM (1<<17)
#define EFLG_RF (1<<16)
+#define EFLG_IOPL (3<<12)
+#define EFLG_NT (1<<14)
#define EFLG_OF (1<<11)
#define EFLG_DF (1<<10)
#define EFLG_IF (1<<9)
+#define EFLG_TF (1<<8)
#define EFLG_SF (1<<7)
#define EFLG_ZF (1<<6)
#define EFLG_AF (1<<4)
@@ -1189,6 +1196,48 @@ static int emulate_pop(struct x86_emulate_ctxt *ctxt,
return rc;
}
+static int emulate_popf(struct x86_emulate_ctxt *ctxt,
+ struct x86_emulate_ops *ops,
+ void *dest, int len)
+{
+ struct decode_cache *c = &ctxt->decode;
+ int rc;
+ unsigned long val, change_mask;
+ int iopl = (ctxt->eflags & X86_EFLAGS_IOPL) >> IOPL_SHIFT;
+ int cpl = kvm_x86_ops->get_cpl(ctxt->vcpu);
+
+ rc = ops->read_emulated(register_address(c, ss_base(ctxt),
+ c->regs[VCPU_REGS_RSP]),
+ &val, c->src.bytes, ctxt->vcpu);
+ if (rc != X86EMUL_CONTINUE)
+ return rc;
+
+ register_address_increment(c, &c->regs[VCPU_REGS_RSP], c->src.bytes);
+
+ change_mask = EFLG_CF | EFLG_PF | EFLG_AF | EFLG_ZF | EFLG_SF | EFLG_OF
+ | EFLG_TF | EFLG_DF | EFLG_NT | EFLG_RF | EFLG_AC | EFLG_ID;
+
+ if (ctxt->vcpu->arch.cr0 & X86_CR0_PE) {
+ if (cpl == 0)
+ change_mask |= EFLG_IOPL;
+ if (cpl <= iopl)
+ change_mask |= EFLG_IF;
+ } else if (ctxt->eflags & EFLG_VM) {
+ if (iopl < 3) {
+ kvm_inject_gp(ctxt->vcpu, 0);
+ return X86EMUL_PROPAGATE_FAULT;
+ }
+ change_mask |= EFLG_IF;
+ }
+ else /* real mode */
+ change_mask |= (EFLG_IOPL | EFLG_IF);
+
+ *(unsigned long*)dest =
+ (ctxt->eflags & ~change_mask) | (val & change_mask);
+
+ return rc;
+}
+
static inline int emulate_grp1a(struct x86_emulate_ctxt *ctxt,
struct x86_emulate_ops *ops)
{
@@ -1913,7 +1962,10 @@ special_insn:
c->dst.type = OP_REG;
c->dst.ptr = (unsigned long *) &ctxt->eflags;
c->dst.bytes = c->op_bytes;
- goto pop_instruction;
+ rc = emulate_popf(ctxt, ops, &c->dst.val, c->op_bytes);
+ if (rc != X86EMUL_CONTINUE)
+ goto done;
+ break;
case 0xa0 ... 0xa1: /* mov */
c->dst.ptr = (unsigned long *)&c->regs[VCPU_REGS_RAX];
c->dst.val = c->src.val;
--
1.6.6

View File

@ -0,0 +1,518 @@
From 9f30f5c5cb976947efcfe47b6b039933ca23add7 Mon Sep 17 00:00:00 2001
From: Gleb Natapov <gleb@redhat.com>
Date: Wed, 10 Feb 2010 05:03:36 +0000
Subject: [PATCH 1/4] Subject: KVM: fix memory access during x86 emulation.
Currently when x86 emulator needs to access memory, page walk is done with
broadest permission possible, so if emulated instruction was executed
by userspace process it can still access kernel memory. Fix that by
providing correct memory access to page walker during emulation.
[forward-ported by Ben Hutchings <ben@decadent.org.uk]
Signed-off-by: Gleb Natapov <gleb@redhat.com>
---
arch/x86/include/asm/kvm_emulate.h | 14 +++-
arch/x86/include/asm/kvm_host.h | 7 ++-
arch/x86/kvm/emulate.c | 6 +-
arch/x86/kvm/mmu.c | 17 ++---
arch/x86/kvm/mmu.h | 6 ++
arch/x86/kvm/paging_tmpl.h | 11 ++-
arch/x86/kvm/x86.c | 130 ++++++++++++++++++++++++++++--------
7 files changed, 143 insertions(+), 48 deletions(-)
diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h
index 7c18e12..1ff11e7 100644
--- a/arch/x86/include/asm/kvm_emulate.h
+++ b/arch/x86/include/asm/kvm_emulate.h
@@ -54,13 +54,23 @@ struct x86_emulate_ctxt;
struct x86_emulate_ops {
/*
* read_std: Read bytes of standard (non-emulated/special) memory.
- * Used for instruction fetch, stack operations, and others.
+ * Used for descriptor reading.
* @addr: [IN ] Linear address from which to read.
* @val: [OUT] Value read from memory, zero-extended to 'u_long'.
* @bytes: [IN ] Number of bytes to read from memory.
*/
int (*read_std)(unsigned long addr, void *val,
- unsigned int bytes, struct kvm_vcpu *vcpu);
+ unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
+
+ /*
+ * fetch: Read bytes of standard (non-emulated/special) memory.
+ * Used for instruction fetch.
+ * @addr: [IN ] Linear address from which to read.
+ * @val: [OUT] Value read from memory, zero-extended to 'u_long'.
+ * @bytes: [IN ] Number of bytes to read from memory.
+ */
+ int (*fetch)(unsigned long addr, void *val,
+ unsigned int bytes, struct kvm_vcpu *vcpu, u32 *error);
/*
* read_emulated: Read bytes from emulated/special memory area.
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d759a1f..f483404 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -256,7 +256,8 @@ struct kvm_mmu {
void (*new_cr3)(struct kvm_vcpu *vcpu);
int (*page_fault)(struct kvm_vcpu *vcpu, gva_t gva, u32 err);
void (*free)(struct kvm_vcpu *vcpu);
- gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva);
+ gpa_t (*gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t gva, u32 access,
+ u32 *error);
void (*prefetch_page)(struct kvm_vcpu *vcpu,
struct kvm_mmu_page *page);
int (*sync_page)(struct kvm_vcpu *vcpu,
@@ -645,6 +646,10 @@ void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
int kvm_mmu_load(struct kvm_vcpu *vcpu);
void kvm_mmu_unload(struct kvm_vcpu *vcpu);
void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error);
int kvm_emulate_hypercall(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c
index e02dbb6..8f159f7 100644
--- a/arch/x86/kvm/emulate.c
+++ b/arch/x86/kvm/emulate.c
@@ -597,7 +597,7 @@ static int do_fetch_insn_byte(struct x86_emulate_ctxt *ctxt,
if (linear < fc->start || linear >= fc->end) {
size = min(15UL, PAGE_SIZE - offset_in_page(linear));
- rc = ops->read_std(linear, fc->data, size, ctxt->vcpu);
+ rc = ops->fetch(linear, fc->data, size, ctxt->vcpu, NULL);
if (rc)
return rc;
fc->start = linear;
@@ -652,11 +652,11 @@ static int read_descriptor(struct x86_emulate_ctxt *ctxt,
op_bytes = 3;
*address = 0;
rc = ops->read_std((unsigned long)ptr, (unsigned long *)size, 2,
- ctxt->vcpu);
+ ctxt->vcpu, NULL);
if (rc)
return rc;
rc = ops->read_std((unsigned long)ptr + 2, address, op_bytes,
- ctxt->vcpu);
+ ctxt->vcpu, NULL);
return rc;
}
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 3a01519..9983219 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -136,12 +136,6 @@ module_param(oos_shadow, bool, 0644);
#define PT64_PERM_MASK (PT_PRESENT_MASK | PT_WRITABLE_MASK | PT_USER_MASK \
| PT64_NX_MASK)
-#define PFERR_PRESENT_MASK (1U << 0)
-#define PFERR_WRITE_MASK (1U << 1)
-#define PFERR_USER_MASK (1U << 2)
-#define PFERR_RSVD_MASK (1U << 3)
-#define PFERR_FETCH_MASK (1U << 4)
-
#define PT_PDPE_LEVEL 3
#define PT_DIRECTORY_LEVEL 2
#define PT_PAGE_TABLE_LEVEL 1
@@ -1639,7 +1633,7 @@ struct page *gva_to_page(struct kvm_vcpu *vcpu, gva_t gva)
{
struct page *page;
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+ gpa_t gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
if (gpa == UNMAPPED_GVA)
return NULL;
@@ -2162,8 +2156,11 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
spin_unlock(&vcpu->kvm->mmu_lock);
}
-static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gva_t vaddr,
+ u32 access, u32 *error)
{
+ if (error)
+ *error = 0;
return vaddr;
}
@@ -2747,7 +2744,7 @@ int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
if (tdp_enabled)
return 0;
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, gva);
+ gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
spin_lock(&vcpu->kvm->mmu_lock);
r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
@@ -3245,7 +3242,7 @@ static void audit_mappings_page(struct kvm_vcpu *vcpu, u64 page_pte,
if (is_shadow_present_pte(ent) && !is_last_spte(ent, level))
audit_mappings_page(vcpu, ent, va, level - 1);
else {
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, va);
+ gpa_t gpa = kvm_mmu_gva_to_gpa_system(vcpu, va, NULL);
gfn_t gfn = gpa >> PAGE_SHIFT;
pfn_t pfn = gfn_to_pfn(vcpu->kvm, gfn);
hpa_t hpa = (hpa_t)pfn << PAGE_SHIFT;
diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index 61a1b38..bac7529 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -37,6 +37,12 @@
#define PT32_ROOT_LEVEL 2
#define PT32E_ROOT_LEVEL 3
+#define PFERR_PRESENT_MASK (1U << 0)
+#define PFERR_WRITE_MASK (1U << 1)
+#define PFERR_USER_MASK (1U << 2)
+#define PFERR_RSVD_MASK (1U << 3)
+#define PFERR_FETCH_MASK (1U << 4)
+
int kvm_mmu_get_spte_hierarchy(struct kvm_vcpu *vcpu, u64 addr, u64 sptes[4]);
static inline void kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu)
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 5fa3325..0b7617b 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -491,18 +491,23 @@ static void FNAME(invlpg)(struct kvm_vcpu *vcpu, gva_t gva)
spin_unlock(&vcpu->kvm->mmu_lock);
}
-static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr)
+static gpa_t FNAME(gva_to_gpa)(struct kvm_vcpu *vcpu, gva_t vaddr, u32 access,
+ u32 *error)
{
struct guest_walker walker;
gpa_t gpa = UNMAPPED_GVA;
int r;
- r = FNAME(walk_addr)(&walker, vcpu, vaddr, 0, 0, 0);
+ r = FNAME(walk_addr)(&walker, vcpu, vaddr,
+ !!(access & PFERR_WRITE_MASK),
+ !!(access & PFERR_USER_MASK),
+ !!(access & PFERR_FETCH_MASK));
if (r) {
gpa = gfn_to_gpa(walker.gfn);
gpa |= vaddr & ~PAGE_MASK;
- }
+ } else if(error)
+ *error = walker.error_code;
return gpa;
}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 145741c..012dd8b 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -2506,14 +2506,41 @@ static int vcpu_mmio_read(struct kvm_vcpu *vcpu, gpa_t addr, int len, void *v)
return kvm_io_bus_read(&vcpu->kvm->mmio_bus, addr, len, v);
}
-static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu)
+gpa_t kvm_mmu_gva_to_gpa_read(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+ gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ access |= PFERR_FETCH_MASK;
+ return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+gpa_t kvm_mmu_gva_to_gpa_write(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ access |= PFERR_WRITE_MASK;
+ return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, access, error);
+}
+
+/* uses this to access any guet's mapped memory without checking CPL */
+gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva, u32 *error)
+{
+ return vcpu->arch.mmu.gva_to_gpa(vcpu, gva, 0, error);
+}
+
+static int kvm_read_guest_virt_helper(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu, u32 access,
+ u32 *error)
{
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr, access, error);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned toread = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
@@ -2536,14 +2563,37 @@ out:
return r;
}
+/* used for instruction fetching */
+static int kvm_fetch_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu, u32 *error)
+{
+ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu,
+ access | PFERR_FETCH_MASK, error);
+}
+
+static int kvm_read_guest_virt(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu, u32 *error)
+{
+ u32 access = (kvm_x86_ops->get_cpl(vcpu) == 3) ? PFERR_USER_MASK : 0;
+ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, access,
+ error);
+}
+
+static int kvm_read_guest_virt_system(gva_t addr, void *val, unsigned int bytes,
+ struct kvm_vcpu *vcpu, u32 *error)
+{
+ return kvm_read_guest_virt_helper(addr, val, bytes, vcpu, 0, error);
+}
+
static int kvm_write_guest_virt(gva_t addr, void *val, unsigned int bytes,
- struct kvm_vcpu *vcpu)
+ struct kvm_vcpu *vcpu, u32 *error)
{
void *data = val;
int r = X86EMUL_CONTINUE;
while (bytes) {
- gpa_t gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ gpa_t gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, error);
unsigned offset = addr & (PAGE_SIZE-1);
unsigned towrite = min(bytes, (unsigned)PAGE_SIZE - offset);
int ret;
@@ -2573,6 +2623,7 @@ static int emulator_read_emulated(unsigned long addr,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ u32 error_code;
if (vcpu->mmio_read_completed) {
memcpy(val, vcpu->mmio_data, bytes);
@@ -2582,17 +2633,20 @@ static int emulator_read_emulated(unsigned long addr,
return X86EMUL_CONTINUE;
}
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ gpa = kvm_mmu_gva_to_gpa_read(vcpu, addr, &error_code);
+
+ if (gpa == UNMAPPED_GVA) {
+ kvm_inject_page_fault(vcpu, addr, error_code);
+ return X86EMUL_PROPAGATE_FAULT;
+ }
/* For APIC access vmexit */
if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
goto mmio;
- if (kvm_read_guest_virt(addr, val, bytes, vcpu)
+ if (kvm_read_guest_virt(addr, val, bytes, vcpu, NULL)
== X86EMUL_CONTINUE)
return X86EMUL_CONTINUE;
- if (gpa == UNMAPPED_GVA)
- return X86EMUL_PROPAGATE_FAULT;
mmio:
/*
@@ -2631,11 +2685,12 @@ static int emulator_write_emulated_onepage(unsigned long addr,
struct kvm_vcpu *vcpu)
{
gpa_t gpa;
+ u32 error_code;
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, &error_code);
if (gpa == UNMAPPED_GVA) {
- kvm_inject_page_fault(vcpu, addr, 2);
+ kvm_inject_page_fault(vcpu, addr, error_code);
return X86EMUL_PROPAGATE_FAULT;
}
@@ -2699,7 +2754,7 @@ static int emulator_cmpxchg_emulated(unsigned long addr,
char *kaddr;
u64 val;
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+ gpa = kvm_mmu_gva_to_gpa_write(vcpu, addr, NULL);
if (gpa == UNMAPPED_GVA ||
(gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE)
@@ -2778,7 +2833,7 @@ void kvm_report_emulation_failure(struct kvm_vcpu *vcpu, const char *context)
rip_linear = rip + get_segment_base(vcpu, VCPU_SREG_CS);
- kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu);
+ kvm_read_guest_virt(rip_linear, (void *)opcodes, 4, vcpu, NULL);
printk(KERN_ERR "emulation failed (%s) rip %lx %02x %02x %02x %02x\n",
context, rip, opcodes[0], opcodes[1], opcodes[2], opcodes[3]);
@@ -2787,6 +2842,7 @@ EXPORT_SYMBOL_GPL(kvm_report_emulation_failure);
static struct x86_emulate_ops emulate_ops = {
.read_std = kvm_read_guest_virt,
+ .fetch = kvm_fetch_guest_virt,
.read_emulated = emulator_read_emulated,
.write_emulated = emulator_write_emulated,
.cmpxchg_emulated = emulator_cmpxchg_emulated,
@@ -2922,12 +2978,17 @@ static int pio_copy_data(struct kvm_vcpu *vcpu)
gva_t q = vcpu->arch.pio.guest_gva;
unsigned bytes;
int ret;
+ u32 error_code;
bytes = vcpu->arch.pio.size * vcpu->arch.pio.cur_count;
if (vcpu->arch.pio.in)
- ret = kvm_write_guest_virt(q, p, bytes, vcpu);
+ ret = kvm_write_guest_virt(q, p, bytes, vcpu, &error_code);
else
- ret = kvm_read_guest_virt(q, p, bytes, vcpu);
+ ret = kvm_read_guest_virt(q, p, bytes, vcpu, &error_code);
+
+ if (ret == X86EMUL_PROPAGATE_FAULT)
+ kvm_inject_page_fault(vcpu, q, error_code);
+
return ret;
}
@@ -2948,7 +3009,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
if (io->in) {
r = pio_copy_data(vcpu);
if (r)
- return r;
+ goto out;
}
delta = 1;
@@ -2976,6 +3037,7 @@ int complete_pio(struct kvm_vcpu *vcpu)
}
}
+out:
io->count -= io->cur_count;
io->cur_count = 0;
@@ -3095,10 +3157,8 @@ int kvm_emulate_pio_string(struct kvm_vcpu *vcpu, struct kvm_run *run, int in,
if (!vcpu->arch.pio.in) {
/* string PIO write */
ret = pio_copy_data(vcpu);
- if (ret == X86EMUL_PROPAGATE_FAULT) {
- kvm_inject_gp(vcpu, 0);
+ if (ret == X86EMUL_PROPAGATE_FAULT)
return 1;
- }
if (ret == 0 && !pio_string_write(vcpu)) {
complete_pio(vcpu);
if (vcpu->arch.pio.count == 0)
@@ -4078,7 +4138,7 @@ static int load_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
kvm_queue_exception_e(vcpu, GP_VECTOR, selector & 0xfffc);
return 1;
}
- return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
+ return kvm_read_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
}
/* allowed just for 8 bytes segments */
@@ -4092,15 +4152,27 @@ static int save_guest_segment_descriptor(struct kvm_vcpu *vcpu, u16 selector,
if (dtable.limit < index * 8 + 7)
return 1;
- return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu);
+ return kvm_write_guest_virt(dtable.base + index*8, seg_desc, sizeof(*seg_desc), vcpu, NULL);
+}
+
+static gpa_t get_tss_base_addr_read(struct kvm_vcpu *vcpu,
+ struct desc_struct *seg_desc)
+{
+ u32 base_addr;
+
+ base_addr = seg_desc->base0;
+ base_addr |= (seg_desc->base1 << 16);
+ base_addr |= (seg_desc->base2 << 24);
+
+ return kvm_mmu_gva_to_gpa_read(vcpu, base_addr, NULL);
}
-static gpa_t get_tss_base_addr(struct kvm_vcpu *vcpu,
+static gpa_t get_tss_base_addr_write(struct kvm_vcpu *vcpu,
struct desc_struct *seg_desc)
{
u32 base_addr = get_desc_base(seg_desc);
- return vcpu->arch.mmu.gva_to_gpa(vcpu, base_addr);
+ return kvm_mmu_gva_to_gpa_write(vcpu, base_addr, NULL);
}
static u16 get_segment_selector(struct kvm_vcpu *vcpu, int seg)
@@ -4303,7 +4375,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
sizeof tss_segment_16))
goto out;
- if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
+ if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
&tss_segment_16, sizeof tss_segment_16))
goto out;
@@ -4311,7 +4383,7 @@ static int kvm_task_switch_16(struct kvm_vcpu *vcpu, u16 tss_selector,
tss_segment_16.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
- get_tss_base_addr(vcpu, nseg_desc),
+ get_tss_base_addr_write(vcpu, nseg_desc),
&tss_segment_16.prev_task_link,
sizeof tss_segment_16.prev_task_link))
goto out;
@@ -4342,7 +4414,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
sizeof tss_segment_32))
goto out;
- if (kvm_read_guest(vcpu->kvm, get_tss_base_addr(vcpu, nseg_desc),
+ if (kvm_read_guest(vcpu->kvm, get_tss_base_addr_read(vcpu, nseg_desc),
&tss_segment_32, sizeof tss_segment_32))
goto out;
@@ -4350,7 +4422,7 @@ static int kvm_task_switch_32(struct kvm_vcpu *vcpu, u16 tss_selector,
tss_segment_32.prev_task_link = old_tss_sel;
if (kvm_write_guest(vcpu->kvm,
- get_tss_base_addr(vcpu, nseg_desc),
+ get_tss_base_addr_write(vcpu, nseg_desc),
&tss_segment_32.prev_task_link,
sizeof tss_segment_32.prev_task_link))
goto out;
@@ -4373,7 +4445,7 @@ int kvm_task_switch(struct kvm_vcpu *vcpu, u16 tss_selector, int reason)
u32 old_tss_base = get_segment_base(vcpu, VCPU_SREG_TR);
u16 old_tss_sel = get_segment_selector(vcpu, VCPU_SREG_TR);
- old_tss_base = vcpu->arch.mmu.gva_to_gpa(vcpu, old_tss_base);
+ old_tss_base = kvm_mmu_gva_to_gpa_write(vcpu, old_tss_base, NULL);
/* FIXME: Handle errors. Failure to read either TSS or their
* descriptors should generate a pagefault.
@@ -4582,7 +4654,7 @@ int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
vcpu_load(vcpu);
down_read(&vcpu->kvm->slots_lock);
- gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, vaddr);
+ gpa = kvm_mmu_gva_to_gpa_system(vcpu, vaddr, NULL);
up_read(&vcpu->kvm->slots_lock);
tr->physical_address = gpa;
tr->valid = gpa != UNMAPPED_GVA;
--
1.6.6

View File

@ -9,3 +9,7 @@
- bugfix/all/e1000-enhance-fragment-detection.patch
+ bugfix/all/stable/2.6.32.8.patch
+ bugfix/all/fix-potential-crash-with-sys_move_pages.patch
+ bugfix/x86/kvm-fix-memory-access-during-x86-emulation.patch
+ bugfix/x86/kvm-Check-IOPL-level-during-io-instruction-emulation.patch
+ bugfix/x86/kvm-Fix-popf-emulation.patch
+ bugfix/x86/kvm-Check-CPL-level-during-privilege-instruction-emulation.patch