diff --git a/debian/changelog b/debian/changelog index 87f764c57..9bef1db2a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -17,6 +17,57 @@ linux (4.19.67-3) UNRELEASED; urgency=medium -- Romain Perier Wed, 28 Aug 2019 13:28:09 +0200 +linux (4.19.67-2+deb10u2) buster-security; urgency=high + + * [x86] Add mitigation for TSX Asynchronous Abort (CVE-2019-11135): + - KVM: x86: use Intel speculation bugs and features as derived in generic + x86 code + - x86/msr: Add the IA32_TSX_CTRL MSR + - x86/cpu: Add a helper function x86_read_arch_cap_msr() + - x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default + - x86/speculation/taa: Add mitigation for TSX Async Abort + - x86/speculation/taa: Add sysfs reporting for TSX Async Abort + - kvm/x86: Export MDS_NO=0 to guests when TSX is enabled + - x86/tsx: Add "auto" option to the tsx= cmdline parameter + - x86/speculation/taa: Add documentation for TSX Async Abort + - x86/tsx: Add config options to set tsx=on|off|auto + - x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs + TSX is now disabled by default; see + Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + * [x86] KVM: Add mitigation for Machine Check Error on Page Size Change + (aka iTLB multi-hit, CVE-2018-12207): + - kvm: Convert kvm_lock to a mutex + - kvm: x86: Do not release the page inside mmu_set_spte() + - KVM: x86: make FNAME(fetch) and __direct_map more similar + - KVM: x86: remove now unneeded hugepage gfn adjustment + - KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON + - KVM: x86: add tracepoints around __direct_map and FNAME(fetch) + - kvm: x86, powerpc: do not allow clearing largepages debugfs entry + - KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is active + - x86/bugs: Add ITLB_MULTIHIT bug infrastructure + - cpu/speculation: Uninline and export CPU mitigations helpers + - kvm: mmu: ITLB_MULTIHIT mitigation + - kvm: Add helper function for creating VM worker threads + - kvm: x86: mmu: Recovery of shattered NX large pages + - Documentation: Add ITLB_MULTIHIT documentation + * [x86] i915: Mitigate local privilege escalation on gen9 (CVE-2019-0155): + - drm/i915: Rename gen7 cmdparser tables + - drm/i915: Disable Secure Batches for gen6+ + - drm/i915: Remove Master tables from cmdparser + - drm/i915: Add support for mandatory cmdparsing + - drm/i915: Support ro ppgtt mapped cmdparser shadow buffers + - drm/i915: Allow parsing of unsized batches + - drm/i915: Add gen9 BCS cmdparsing + - drm/i915/cmdparser: Use explicit goto for error paths + - drm/i915/cmdparser: Add support for backward jumps + - drm/i915/cmdparser: Ignore Length operands during command matching + - drm/i915/cmdparser: Fix jump whitelist clearing + * [x86] i915: Mitigate local denial-of-service on gen8/gen9 (CVE-2019-0154): + - drm/i915: Lower RM timeout to avoid DSI hard hangs + - drm/i915/gen8+: Add RC6 CTX corruption WA + + -- Ben Hutchings Mon, 11 Nov 2019 00:30:56 +0000 + linux (4.19.67-2+deb10u1) buster-security; urgency=high [ Romain Perier ] diff --git a/debian/patches/bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch b/debian/patches/bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch new file mode 100644 index 000000000..b5cdb94b3 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch @@ -0,0 +1,176 @@ +From: Jon Bloomfield +Date: Fri, 20 Apr 2018 14:26:01 -0700 +Subject: drm/i915: Rename gen7 cmdparser tables +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 0a2f661b6c21815a7fa60e30babe975fee8e73c6 upstream. + +We're about to introduce some new tables for later gens, and the +current naming for the gen7 tables will no longer make sense. + +v2: rebase + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 70 +++++++++++++------------- + 1 file changed, 35 insertions(+), 35 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -211,7 +211,7 @@ struct drm_i915_cmd_table { + + /* Command Mask Fixed Len Action + ---------------------------------------------------------- */ +-static const struct drm_i915_cmd_descriptor common_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), + CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), +@@ -244,7 +244,7 @@ static const struct drm_i915_cmd_descrip + CMD( MI_BATCH_BUFFER_START, SMI, !F, 0xFF, S ), + }; + +-static const struct drm_i915_cmd_descriptor render_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_render_cmds[] = { + CMD( MI_FLUSH, SMI, F, 1, S ), + CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), + CMD( MI_PREDICATE, SMI, F, 1, S ), +@@ -328,7 +328,7 @@ static const struct drm_i915_cmd_descrip + CMD( GFX_OP_3DSTATE_BINDING_TABLE_EDIT_PS, S3D, !F, 0x1FF, S ), + }; + +-static const struct drm_i915_cmd_descriptor video_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_video_cmds[] = { + CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), + CMD( MI_SET_APPID, SMI, F, 1, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, +@@ -372,7 +372,7 @@ static const struct drm_i915_cmd_descrip + CMD( MFX_WAIT, SMFX, F, 1, S ), + }; + +-static const struct drm_i915_cmd_descriptor vecs_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_vecs_cmds[] = { + CMD( MI_ARB_ON_OFF, SMI, F, 1, R ), + CMD( MI_SET_APPID, SMI, F, 1, S ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0xFF, B, +@@ -410,7 +410,7 @@ static const struct drm_i915_cmd_descrip + }}, ), + }; + +-static const struct drm_i915_cmd_descriptor blt_cmds[] = { ++static const struct drm_i915_cmd_descriptor gen7_blt_cmds[] = { + CMD( MI_DISPLAY_FLIP, SMI, !F, 0xFF, R ), + CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, B, + .bits = {{ +@@ -463,35 +463,35 @@ static const struct drm_i915_cmd_descrip + #undef B + #undef M + +-static const struct drm_i915_cmd_table gen7_render_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { render_cmds, ARRAY_SIZE(render_cmds) }, ++static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, + }; + +-static const struct drm_i915_cmd_table hsw_render_ring_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { render_cmds, ARRAY_SIZE(render_cmds) }, ++static const struct drm_i915_cmd_table hsw_render_ring_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_render_cmds, ARRAY_SIZE(gen7_render_cmds) }, + { hsw_render_cmds, ARRAY_SIZE(hsw_render_cmds) }, + }; + +-static const struct drm_i915_cmd_table gen7_video_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { video_cmds, ARRAY_SIZE(video_cmds) }, ++static const struct drm_i915_cmd_table gen7_video_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_video_cmds, ARRAY_SIZE(gen7_video_cmds) }, + }; + +-static const struct drm_i915_cmd_table hsw_vebox_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { vecs_cmds, ARRAY_SIZE(vecs_cmds) }, ++static const struct drm_i915_cmd_table hsw_vebox_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_vecs_cmds, ARRAY_SIZE(gen7_vecs_cmds) }, + }; + +-static const struct drm_i915_cmd_table gen7_blt_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { blt_cmds, ARRAY_SIZE(blt_cmds) }, ++static const struct drm_i915_cmd_table gen7_blt_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, + }; + +-static const struct drm_i915_cmd_table hsw_blt_ring_cmds[] = { +- { common_cmds, ARRAY_SIZE(common_cmds) }, +- { blt_cmds, ARRAY_SIZE(blt_cmds) }, ++static const struct drm_i915_cmd_table hsw_blt_ring_cmd_table[] = { ++ { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, ++ { gen7_blt_cmds, ARRAY_SIZE(gen7_blt_cmds) }, + { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, + }; + +@@ -871,12 +871,12 @@ void intel_engine_init_cmd_parser(struct + switch (engine->id) { + case RCS: + if (IS_HASWELL(engine->i915)) { +- cmd_tables = hsw_render_ring_cmds; ++ cmd_tables = hsw_render_ring_cmd_table; + cmd_table_count = +- ARRAY_SIZE(hsw_render_ring_cmds); ++ ARRAY_SIZE(hsw_render_ring_cmd_table); + } else { +- cmd_tables = gen7_render_cmds; +- cmd_table_count = ARRAY_SIZE(gen7_render_cmds); ++ cmd_tables = gen7_render_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen7_render_cmd_table); + } + + if (IS_HASWELL(engine->i915)) { +@@ -890,17 +890,17 @@ void intel_engine_init_cmd_parser(struct + engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; + break; + case VCS: +- cmd_tables = gen7_video_cmds; +- cmd_table_count = ARRAY_SIZE(gen7_video_cmds); ++ cmd_tables = gen7_video_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen7_video_cmd_table); + engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + case BCS: + if (IS_HASWELL(engine->i915)) { +- cmd_tables = hsw_blt_ring_cmds; +- cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmds); ++ cmd_tables = hsw_blt_ring_cmd_table; ++ cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); + } else { +- cmd_tables = gen7_blt_cmds; +- cmd_table_count = ARRAY_SIZE(gen7_blt_cmds); ++ cmd_tables = gen7_blt_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); + } + + if (IS_HASWELL(engine->i915)) { +@@ -914,8 +914,8 @@ void intel_engine_init_cmd_parser(struct + engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + break; + case VECS: +- cmd_tables = hsw_vebox_cmds; +- cmd_table_count = ARRAY_SIZE(hsw_vebox_cmds); ++ cmd_tables = hsw_vebox_cmd_table; ++ cmd_table_count = ARRAY_SIZE(hsw_vebox_cmd_table); + /* VECS can use the same length_mask function as VCS */ + engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; diff --git a/debian/patches/bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch b/debian/patches/bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch new file mode 100644 index 000000000..0d347919d --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch @@ -0,0 +1,93 @@ +From: Jon Bloomfield +Date: Fri, 8 Jun 2018 08:53:46 -0700 +Subject: drm/i915: Disable Secure Batches for gen6+ +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 44157641d448cbc0c4b73c5231d2b911f0cb0427 upstream. + +Retroactively stop reporting support for secure batches +through the api for gen6+ so that older binaries trigger +the fallback path instead. + +Older binaries use secure batches pre gen6 to access resources +that are not available to normal usermode processes. However, +all known userspace explicitly checks for HAS_SECURE_BATCHES +before relying on the secure batch feature. + +Since there are no known binaries relying on this for newer gens +we can kill secure batches from gen6, via I915_PARAM_HAS_SECURE_BATCHES. + +v2: rebase (Mika) +v3: rebase (Mika) + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_drv.c | 2 +- + drivers/gpu/drm/i915/i915_drv.h | 2 ++ + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 12 ++++++++++-- + 3 files changed, 13 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_drv.c ++++ b/drivers/gpu/drm/i915/i915_drv.c +@@ -351,7 +351,7 @@ static int i915_getparam_ioctl(struct dr + value = HAS_LEGACY_SEMAPHORES(dev_priv); + break; + case I915_PARAM_HAS_SECURE_BATCHES: +- value = capable(CAP_SYS_ADMIN); ++ value = HAS_SECURE_BATCHES(dev_priv) && capable(CAP_SYS_ADMIN); + break; + case I915_PARAM_CMD_PARSER_VERSION: + value = i915_cmd_parser_get_version(dev_priv); +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -2517,6 +2517,8 @@ intel_info(const struct drm_i915_private + + #define HAS_LEGACY_SEMAPHORES(dev_priv) IS_GEN7(dev_priv) + ++#define HAS_SECURE_BATCHES(dev_priv) (INTEL_GEN(dev_priv) < 6) ++ + #define HAS_LLC(dev_priv) ((dev_priv)->info.has_llc) + #define HAS_SNOOP(dev_priv) ((dev_priv)->info.has_snoop) + #define HAS_EDRAM(dev_priv) (!!((dev_priv)->edram_cap & EDRAM_ENABLED)) +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -2177,6 +2177,7 @@ i915_gem_do_execbuffer(struct drm_device + struct drm_i915_gem_exec_object2 *exec, + struct drm_syncobj **fences) + { ++ struct drm_i915_private *i915 = to_i915(dev); + struct i915_execbuffer eb; + struct dma_fence *in_fence = NULL; + struct sync_file *out_fence = NULL; +@@ -2187,7 +2188,7 @@ i915_gem_do_execbuffer(struct drm_device + BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & + ~__EXEC_OBJECT_UNKNOWN_FLAGS); + +- eb.i915 = to_i915(dev); ++ eb.i915 = i915; + eb.file = file; + eb.args = args; + if (DBG_FORCE_RELOC || !(args->flags & I915_EXEC_NO_RELOC)) +@@ -2209,8 +2210,15 @@ i915_gem_do_execbuffer(struct drm_device + + eb.batch_flags = 0; + if (args->flags & I915_EXEC_SECURE) { ++ if (INTEL_GEN(i915) >= 11) ++ return -ENODEV; ++ ++ /* Return -EPERM to trigger fallback code on old binaries. */ ++ if (!HAS_SECURE_BATCHES(i915)) ++ return -EPERM; ++ + if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN)) +- return -EPERM; ++ return -EPERM; + + eb.batch_flags |= I915_DISPATCH_SECURE; + } diff --git a/debian/patches/bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch b/debian/patches/bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch new file mode 100644 index 000000000..dddacff03 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch @@ -0,0 +1,295 @@ +From: Jon Bloomfield +Date: Fri, 8 Jun 2018 10:05:26 -0700 +Subject: drm/i915: Remove Master tables from cmdparser +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 66d8aba1cd6db34af10de465c0d52af679288cb6 upstream. + +The previous patch has killed support for secure batches +on gen6+, and hence the cmdparsers master tables are +now dead code. Remove them. + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 84 ++++++---------------- + drivers/gpu/drm/i915/i915_drv.h | 3 +- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 7 +- + 3 files changed, 26 insertions(+), 68 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -51,13 +51,11 @@ + * granting userspace undue privileges. There are three categories of privilege. + * + * First, commands which are explicitly defined as privileged or which should +- * only be used by the kernel driver. The parser generally rejects such +- * commands, though it may allow some from the drm master process. ++ * only be used by the kernel driver. The parser rejects such commands + * + * Second, commands which access registers. To support correct/enhanced + * userspace functionality, particularly certain OpenGL extensions, the parser +- * provides a whitelist of registers which userspace may safely access (for both +- * normal and drm master processes). ++ * provides a whitelist of registers which userspace may safely access + * + * Third, commands which access privileged memory (i.e. GGTT, HWS page, etc). + * The parser always rejects such commands. +@@ -82,9 +80,9 @@ + * in the per-engine command tables. + * + * Other command table entries map fairly directly to high level categories +- * mentioned above: rejected, master-only, register whitelist. The parser +- * implements a number of checks, including the privileged memory checks, via a +- * general bitmasking mechanism. ++ * mentioned above: rejected, register whitelist. The parser implements a number ++ * of checks, including the privileged memory checks, via a general bitmasking ++ * mechanism. + */ + + /* +@@ -102,8 +100,6 @@ struct drm_i915_cmd_descriptor { + * CMD_DESC_REJECT: The command is never allowed + * CMD_DESC_REGISTER: The command should be checked against the + * register whitelist for the appropriate ring +- * CMD_DESC_MASTER: The command is allowed if the submitting process +- * is the DRM master + */ + u32 flags; + #define CMD_DESC_FIXED (1<<0) +@@ -111,7 +107,6 @@ struct drm_i915_cmd_descriptor { + #define CMD_DESC_REJECT (1<<2) + #define CMD_DESC_REGISTER (1<<3) + #define CMD_DESC_BITMASK (1<<4) +-#define CMD_DESC_MASTER (1<<5) + + /* + * The command's unique identification bits and the bitmask to get them. +@@ -207,14 +202,13 @@ struct drm_i915_cmd_table { + #define R CMD_DESC_REJECT + #define W CMD_DESC_REGISTER + #define B CMD_DESC_BITMASK +-#define M CMD_DESC_MASTER + + /* Command Mask Fixed Len Action + ---------------------------------------------------------- */ + static const struct drm_i915_cmd_descriptor gen7_common_cmds[] = { + CMD( MI_NOOP, SMI, F, 1, S ), + CMD( MI_USER_INTERRUPT, SMI, F, 1, R ), +- CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, M ), ++ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, R ), + CMD( MI_ARB_CHECK, SMI, F, 1, S ), + CMD( MI_REPORT_HEAD, SMI, F, 1, S ), + CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), +@@ -311,7 +305,7 @@ static const struct drm_i915_cmd_descrip + CMD( MI_URB_ATOMIC_ALLOC, SMI, F, 1, S ), + CMD( MI_SET_APPID, SMI, F, 1, S ), + CMD( MI_RS_CONTEXT, SMI, F, 1, S ), +- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), ++ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), +@@ -444,7 +438,7 @@ static const struct drm_i915_cmd_descrip + }; + + static const struct drm_i915_cmd_descriptor hsw_blt_cmds[] = { +- CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, M ), ++ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, R ), + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + }; + +@@ -461,7 +455,6 @@ static const struct drm_i915_cmd_descrip + #undef R + #undef W + #undef B +-#undef M + + static const struct drm_i915_cmd_table gen7_render_cmd_table[] = { + { gen7_common_cmds, ARRAY_SIZE(gen7_common_cmds) }, +@@ -610,47 +603,29 @@ static const struct drm_i915_reg_descrip + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + }; + +-static const struct drm_i915_reg_descriptor ivb_master_regs[] = { +- REG32(FORCEWAKE_MT), +- REG32(DERRMR), +- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_A)), +- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_B)), +- REG32(GEN7_PIPE_DE_LOAD_SL(PIPE_C)), +-}; +- +-static const struct drm_i915_reg_descriptor hsw_master_regs[] = { +- REG32(FORCEWAKE_MT), +- REG32(DERRMR), +-}; +- + #undef REG64 + #undef REG32 + + struct drm_i915_reg_table { + const struct drm_i915_reg_descriptor *regs; + int num_regs; +- bool master; + }; + + static const struct drm_i915_reg_table ivb_render_reg_tables[] = { +- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, +- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, ++ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, + }; + + static const struct drm_i915_reg_table ivb_blt_reg_tables[] = { +- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, +- { ivb_master_regs, ARRAY_SIZE(ivb_master_regs), true }, ++ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, + }; + + static const struct drm_i915_reg_table hsw_render_reg_tables[] = { +- { gen7_render_regs, ARRAY_SIZE(gen7_render_regs), false }, +- { hsw_render_regs, ARRAY_SIZE(hsw_render_regs), false }, +- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, ++ { gen7_render_regs, ARRAY_SIZE(gen7_render_regs) }, ++ { hsw_render_regs, ARRAY_SIZE(hsw_render_regs) }, + }; + + static const struct drm_i915_reg_table hsw_blt_reg_tables[] = { +- { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs), false }, +- { hsw_master_regs, ARRAY_SIZE(hsw_master_regs), true }, ++ { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, + }; + + static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) +@@ -1027,22 +1002,16 @@ __find_reg(const struct drm_i915_reg_des + } + + static const struct drm_i915_reg_descriptor * +-find_reg(const struct intel_engine_cs *engine, bool is_master, u32 addr) ++find_reg(const struct intel_engine_cs *engine, u32 addr) + { + const struct drm_i915_reg_table *table = engine->reg_tables; ++ const struct drm_i915_reg_descriptor *reg = NULL; + int count = engine->reg_table_count; + +- for (; count > 0; ++table, --count) { +- if (!table->master || is_master) { +- const struct drm_i915_reg_descriptor *reg; +- +- reg = __find_reg(table->regs, table->num_regs, addr); +- if (reg != NULL) +- return reg; +- } +- } ++ for (; !reg && (count > 0); ++table, --count) ++ reg = __find_reg(table->regs, table->num_regs, addr); + +- return NULL; ++ return reg; + } + + /* Returns a vmap'd pointer to dst_obj, which the caller must unmap */ +@@ -1127,8 +1096,7 @@ unpin_src: + + static bool check_cmd(const struct intel_engine_cs *engine, + const struct drm_i915_cmd_descriptor *desc, +- const u32 *cmd, u32 length, +- const bool is_master) ++ const u32 *cmd, u32 length) + { + if (desc->flags & CMD_DESC_SKIP) + return true; +@@ -1138,12 +1106,6 @@ static bool check_cmd(const struct intel + return false; + } + +- if ((desc->flags & CMD_DESC_MASTER) && !is_master) { +- DRM_DEBUG_DRIVER("CMD: Rejected master-only command: 0x%08X\n", +- *cmd); +- return false; +- } +- + if (desc->flags & CMD_DESC_REGISTER) { + /* + * Get the distance between individual register offset +@@ -1157,7 +1119,7 @@ static bool check_cmd(const struct intel + offset += step) { + const u32 reg_addr = cmd[offset] & desc->reg.mask; + const struct drm_i915_reg_descriptor *reg = +- find_reg(engine, is_master, reg_addr); ++ find_reg(engine, reg_addr); + + if (!reg) { + DRM_DEBUG_DRIVER("CMD: Rejected register 0x%08X in command: 0x%08X (%s)\n", +@@ -1244,7 +1206,6 @@ static bool check_cmd(const struct intel + * @shadow_batch_obj: copy of the batch buffer in question + * @batch_start_offset: byte offset in the batch at which execution starts + * @batch_len: length of the commands in batch_obj +- * @is_master: is the submitting process the drm master? + * + * Parses the specified batch buffer looking for privilege violations as + * described in the overview. +@@ -1256,8 +1217,7 @@ int intel_engine_cmd_parser(struct intel + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, +- u32 batch_len, +- bool is_master) ++ u32 batch_len) + { + u32 *cmd, *batch_end; + struct drm_i915_cmd_descriptor default_desc = noop_desc; +@@ -1323,7 +1283,7 @@ int intel_engine_cmd_parser(struct intel + break; + } + +- if (!check_cmd(engine, desc, cmd, length, is_master)) { ++ if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; + break; + } +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -3343,8 +3343,7 @@ int intel_engine_cmd_parser(struct intel + struct drm_i915_gem_object *batch_obj, + struct drm_i915_gem_object *shadow_batch_obj, + u32 batch_start_offset, +- u32 batch_len, +- bool is_master); ++ u32 batch_len); + + /* i915_perf.c */ + extern void i915_perf_init(struct drm_i915_private *dev_priv); +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -1893,7 +1893,7 @@ static int i915_reset_gen7_sol_offsets(s + return 0; + } + +-static struct i915_vma *eb_parse(struct i915_execbuffer *eb, bool is_master) ++static struct i915_vma *eb_parse(struct i915_execbuffer *eb) + { + struct drm_i915_gem_object *shadow_batch_obj; + struct i915_vma *vma; +@@ -1908,8 +1908,7 @@ static struct i915_vma *eb_parse(struct + eb->batch->obj, + shadow_batch_obj, + eb->batch_start_offset, +- eb->batch_len, +- is_master); ++ eb->batch_len); + if (err) { + if (err == -EACCES) /* unhandled chained batch */ + vma = NULL; +@@ -2308,7 +2307,7 @@ i915_gem_do_execbuffer(struct drm_device + if (eb_use_cmdparser(&eb)) { + struct i915_vma *vma; + +- vma = eb_parse(&eb, drm_is_current_master(file)); ++ vma = eb_parse(&eb); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_vma; diff --git a/debian/patches/bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch b/debian/patches/bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch new file mode 100644 index 000000000..47ac2482f --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch @@ -0,0 +1,110 @@ +From: Jon Bloomfield +Date: Wed, 1 Aug 2018 09:33:59 -0700 +Subject: drm/i915: Add support for mandatory cmdparsing +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 311a50e76a33d1e029563c24b2ff6db0c02b5afe upstream. + +The existing cmdparser for gen7 can be bypassed by specifying +batch_len=0 in the execbuf call. This is safe because bypassing +simply reduces the cmd-set available. + +In a later patch we will introduce cmdparsing for gen9, as a +security measure, which must be strictly enforced since without +it we are vulnerable to DoS attacks. + +Introduce the concept of 'required' cmd parsing that cannot be +bypassed by submitting zero-length bb's. + +v2: rebase (Mika) +v2: rebase (Mika) +v3: fix conflict on engine flags (Mika) + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 6 +++--- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 3 ++- + drivers/gpu/drm/i915/intel_ringbuffer.h | 17 ++++++++++++----- + 3 files changed, 17 insertions(+), 9 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -916,7 +916,7 @@ void intel_engine_init_cmd_parser(struct + return; + } + +- engine->flags |= I915_ENGINE_NEEDS_CMD_PARSER; ++ engine->flags |= I915_ENGINE_USING_CMD_PARSER; + } + + /** +@@ -928,7 +928,7 @@ void intel_engine_init_cmd_parser(struct + */ + void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine) + { +- if (!intel_engine_needs_cmd_parser(engine)) ++ if (!intel_engine_using_cmd_parser(engine)) + return; + + fini_hash_table(engine); +@@ -1317,7 +1317,7 @@ int i915_cmd_parser_get_version(struct d + + /* If the command parser is not enabled, report 0 - unsupported */ + for_each_engine(engine, dev_priv, id) { +- if (intel_engine_needs_cmd_parser(engine)) { ++ if (intel_engine_using_cmd_parser(engine)) { + active = true; + break; + } +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -309,7 +309,8 @@ static inline u64 gen8_noncanonical_addr + + static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) + { +- return intel_engine_needs_cmd_parser(eb->engine) && eb->batch_len; ++ return intel_engine_requires_cmd_parser(eb->engine) || ++ (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len); + } + + static int eb_create(struct i915_execbuffer *eb) +--- a/drivers/gpu/drm/i915/intel_ringbuffer.h ++++ b/drivers/gpu/drm/i915/intel_ringbuffer.h +@@ -584,9 +584,10 @@ struct intel_engine_cs { + + struct intel_engine_hangcheck hangcheck; + +-#define I915_ENGINE_NEEDS_CMD_PARSER BIT(0) +-#define I915_ENGINE_SUPPORTS_STATS BIT(1) +-#define I915_ENGINE_HAS_PREEMPTION BIT(2) ++#define I915_ENGINE_USING_CMD_PARSER BIT(0) ++#define I915_ENGINE_SUPPORTS_STATS BIT(1) ++#define I915_ENGINE_HAS_PREEMPTION BIT(2) ++#define I915_ENGINE_REQUIRES_CMD_PARSER BIT(3) + unsigned int flags; + + /* +@@ -647,9 +648,15 @@ struct intel_engine_cs { + }; + + static inline bool +-intel_engine_needs_cmd_parser(const struct intel_engine_cs *engine) ++intel_engine_using_cmd_parser(const struct intel_engine_cs *engine) + { +- return engine->flags & I915_ENGINE_NEEDS_CMD_PARSER; ++ return engine->flags & I915_ENGINE_USING_CMD_PARSER; ++} ++ ++static inline bool ++intel_engine_requires_cmd_parser(const struct intel_engine_cs *engine) ++{ ++ return engine->flags & I915_ENGINE_REQUIRES_CMD_PARSER; + } + + static inline bool diff --git a/debian/patches/bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch b/debian/patches/bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch new file mode 100644 index 000000000..d03d396f5 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch @@ -0,0 +1,198 @@ +From: Jon Bloomfield +Date: Tue, 22 May 2018 13:59:06 -0700 +Subject: drm/i915: Support ro ppgtt mapped cmdparser shadow buffers +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 4f7af1948abcb18b4772fe1bcd84d7d27d96258c upstream. + +For Gen7, the original cmdparser motive was to permit limited +use of register read/write instructions in unprivileged BB's. +This worked by copying the user supplied bb to a kmd owned +bb, and running it in secure mode, from the ggtt, only if +the scanner finds no unsafe commands or registers. + +For Gen8+ we can't use this same technique because running bb's +from the ggtt also disables access to ppgtt space. But we also +do not actually require 'secure' execution since we are only +trying to reduce the available command/register set. Instead we +will copy the user buffer to a kmd owned read-only bb in ppgtt, +and run in the usual non-secure mode. + +Note that ro pages are only supported by ppgtt (not ggtt), but +luckily that's exactly what we need. + +Add the required paths to map the shadow buffer to ppgtt ro for Gen8+ + +v2: IS_GEN7/IS_GEN (Mika) +v3: rebase +v4: rebase +v5: rebase + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_drv.h | 14 ++++++ + drivers/gpu/drm/i915/i915_gem.c | 16 +++++- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 57 +++++++++++++++------- + 3 files changed, 68 insertions(+), 19 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -2496,6 +2496,12 @@ intel_info(const struct drm_i915_private + #define IS_GEN9_LP(dev_priv) (IS_GEN9(dev_priv) && IS_LP(dev_priv)) + #define IS_GEN9_BC(dev_priv) (IS_GEN9(dev_priv) && !IS_LP(dev_priv)) + ++/* ++ * The Gen7 cmdparser copies the scanned buffer to the ggtt for execution ++ * All later gens can run the final buffer from the ppgtt ++ */ ++#define CMDPARSER_USES_GGTT(dev_priv) IS_GEN7(dev_priv) ++ + #define ENGINE_MASK(id) BIT(id) + #define RENDER_RING ENGINE_MASK(RCS) + #define BSD_RING ENGINE_MASK(VCS) +@@ -2946,6 +2952,14 @@ i915_gem_object_ggtt_pin(struct drm_i915 + u64 alignment, + u64 flags); + ++struct i915_vma * __must_check ++i915_gem_object_pin(struct drm_i915_gem_object *obj, ++ struct i915_address_space *vm, ++ const struct i915_ggtt_view *view, ++ u64 size, ++ u64 alignment, ++ u64 flags); ++ + int i915_gem_object_unbind(struct drm_i915_gem_object *obj); + void i915_gem_release_mmap(struct drm_i915_gem_object *obj); + +--- a/drivers/gpu/drm/i915/i915_gem.c ++++ b/drivers/gpu/drm/i915/i915_gem.c +@@ -4414,6 +4414,20 @@ i915_gem_object_ggtt_pin(struct drm_i915 + { + struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_address_space *vm = &dev_priv->ggtt.vm; ++ ++ return i915_gem_object_pin(obj, vm, view, size, alignment, ++ flags | PIN_GLOBAL); ++} ++ ++struct i915_vma * ++i915_gem_object_pin(struct drm_i915_gem_object *obj, ++ struct i915_address_space *vm, ++ const struct i915_ggtt_view *view, ++ u64 size, ++ u64 alignment, ++ u64 flags) ++{ ++ struct drm_i915_private *dev_priv = to_i915(obj->base.dev); + struct i915_vma *vma; + int ret; + +@@ -4477,7 +4491,7 @@ i915_gem_object_ggtt_pin(struct drm_i915 + return ERR_PTR(ret); + } + +- ret = i915_vma_pin(vma, size, alignment, flags | PIN_GLOBAL); ++ ret = i915_vma_pin(vma, size, alignment, flags); + if (ret) + return ERR_PTR(ret); + +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -1894,6 +1894,33 @@ static int i915_reset_gen7_sol_offsets(s + return 0; + } + ++static struct i915_vma * ++shadow_batch_pin(struct i915_execbuffer *eb, struct drm_i915_gem_object *obj) ++{ ++ struct drm_i915_private *dev_priv = eb->i915; ++ struct i915_address_space *vm; ++ u64 flags; ++ ++ /* ++ * PPGTT backed shadow buffers must be mapped RO, to prevent ++ * post-scan tampering ++ */ ++ if (CMDPARSER_USES_GGTT(dev_priv)) { ++ flags = PIN_GLOBAL; ++ vm = &dev_priv->ggtt.vm; ++ eb->batch_flags |= I915_DISPATCH_SECURE; ++ } else if (eb->vm->has_read_only) { ++ flags = PIN_USER; ++ vm = eb->vm; ++ i915_gem_object_set_readonly(obj); ++ } else { ++ DRM_DEBUG("Cannot prevent post-scan tampering without RO capable vm\n"); ++ return ERR_PTR(-EINVAL); ++ } ++ ++ return i915_gem_object_pin(obj, vm, NULL, 0, 0, flags); ++} ++ + static struct i915_vma *eb_parse(struct i915_execbuffer *eb) + { + struct drm_i915_gem_object *shadow_batch_obj; +@@ -1911,14 +1938,21 @@ static struct i915_vma *eb_parse(struct + eb->batch_start_offset, + eb->batch_len); + if (err) { +- if (err == -EACCES) /* unhandled chained batch */ ++ /* ++ * Unsafe GGTT-backed buffers can still be submitted safely ++ * as non-secure. ++ * For PPGTT backing however, we have no choice but to forcibly ++ * reject unsafe buffers ++ */ ++ if (CMDPARSER_USES_GGTT(eb->i915) && (err == -EACCES)) ++ /* Execute original buffer non-secure */ + vma = NULL; + else + vma = ERR_PTR(err); + goto out; + } + +- vma = i915_gem_object_ggtt_pin(shadow_batch_obj, NULL, 0, 0, 0); ++ vma = shadow_batch_pin(eb, shadow_batch_obj); + if (IS_ERR(vma)) + goto out; + +@@ -1927,7 +1961,9 @@ static struct i915_vma *eb_parse(struct + __EXEC_OBJECT_HAS_PIN | __EXEC_OBJECT_HAS_REF; + vma->exec_flags = &eb->flags[eb->buffer_count]; + eb->buffer_count++; +- ++ eb->batch_start_offset = 0; ++ eb->batch = vma; ++ /* eb->batch_len unchanged */ + out: + i915_gem_object_unpin_pages(shadow_batch_obj); + return vma; +@@ -2313,21 +2349,6 @@ i915_gem_do_execbuffer(struct drm_device + err = PTR_ERR(vma); + goto err_vma; + } +- +- if (vma) { +- /* +- * Batch parsed and accepted: +- * +- * Set the DISPATCH_SECURE bit to remove the NON_SECURE +- * bit from MI_BATCH_BUFFER_START commands issued in +- * the dispatch_execbuffer implementations. We +- * specifically don't want that set on batches the +- * command parser has accepted. +- */ +- eb.batch_flags |= I915_DISPATCH_SECURE; +- eb.batch_start_offset = 0; +- eb.batch = vma; +- } + } + + if (eb.batch_len == 0) diff --git a/debian/patches/bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch b/debian/patches/bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch new file mode 100644 index 000000000..93a70de0f --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch @@ -0,0 +1,57 @@ +From: Jon Bloomfield +Date: Wed, 1 Aug 2018 09:45:50 -0700 +Subject: drm/i915: Allow parsing of unsized batches +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 435e8fc059dbe0eec823a75c22da2972390ba9e0 upstream. + +In "drm/i915: Add support for mandatory cmdparsing" we introduced the +concept of mandatory parsing. This allows the cmdparser to be invoked +even when user passes batch_len=0 to the execbuf ioctl's. + +However, the cmdparser needs to know the extents of the buffer being +scanned. Refactor the code to ensure the cmdparser uses the actual +object size, instead of the incoming length, if user passes 0. + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 9 +++++---- + 1 file changed, 5 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -310,7 +310,8 @@ static inline u64 gen8_noncanonical_addr + static inline bool eb_use_cmdparser(const struct i915_execbuffer *eb) + { + return intel_engine_requires_cmd_parser(eb->engine) || +- (intel_engine_using_cmd_parser(eb->engine) && eb->batch_len); ++ (intel_engine_using_cmd_parser(eb->engine) && ++ eb->args->batch_len); + } + + static int eb_create(struct i915_execbuffer *eb) +@@ -2341,6 +2342,9 @@ i915_gem_do_execbuffer(struct drm_device + goto err_vma; + } + ++ if (eb.batch_len == 0) ++ eb.batch_len = eb.batch->size - eb.batch_start_offset; ++ + if (eb_use_cmdparser(&eb)) { + struct i915_vma *vma; + +@@ -2351,9 +2355,6 @@ i915_gem_do_execbuffer(struct drm_device + } + } + +- if (eb.batch_len == 0) +- eb.batch_len = eb.batch->size - eb.batch_start_offset; +- + /* + * snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure + * batch" bit. Hence we need to pin secure batches into the global gtt. diff --git a/debian/patches/bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch b/debian/patches/bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch new file mode 100644 index 000000000..e9a9981c3 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch @@ -0,0 +1,258 @@ +From: Jon Bloomfield +Date: Mon, 23 Apr 2018 11:12:15 -0700 +Subject: drm/i915: Add gen9 BCS cmdparsing +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 0f2f39758341df70202ae1c42d5a1e4ee392b6d3 upstream. + +For gen9 we enable cmdparsing on the BCS ring, specifically +to catch inadvertent accesses to sensitive registers + +Unlike gen7/hsw, we use the parser only to block certain +registers. We can rely on h/w to block restricted commands, +so the command tables only provide enough info to allow the +parser to delineate each command, and identify commands that +access registers. + +Note: This patch deliberately ignores checkpatch issues in +favour of matching the style of the surrounding code. We'll +correct the entire file in one go in a later patch. + +Signed-off-by: Jon Bloomfield +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 116 ++++++++++++++++++++++--- + drivers/gpu/drm/i915/i915_gem_gtt.c | 3 +- + drivers/gpu/drm/i915/i915_reg.h | 4 + + 3 files changed, 112 insertions(+), 11 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -442,6 +442,47 @@ static const struct drm_i915_cmd_descrip + CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, R ), + }; + ++/* ++ * For Gen9 we can still rely on the h/w to enforce cmd security, and only ++ * need to re-enforce the register access checks. We therefore only need to ++ * teach the cmdparser how to find the end of each command, and identify ++ * register accesses. The table doesn't need to reject any commands, and so ++ * the only commands listed here are: ++ * 1) Those that touch registers ++ * 2) Those that do not have the default 8-bit length ++ * ++ * Note that the default MI length mask chosen for this table is 0xFF, not ++ * the 0x3F used on older devices. This is because the vast majority of MI ++ * cmds on Gen9 use a standard 8-bit Length field. ++ * All the Gen9 blitter instructions are standard 0xFF length mask, and ++ * none allow access to non-general registers, so in fact no BLT cmds are ++ * included in the table at all. ++ * ++ */ ++static const struct drm_i915_cmd_descriptor gen9_blt_cmds[] = { ++ CMD( MI_NOOP, SMI, F, 1, S ), ++ CMD( MI_USER_INTERRUPT, SMI, F, 1, S ), ++ CMD( MI_WAIT_FOR_EVENT, SMI, F, 1, S ), ++ CMD( MI_FLUSH, SMI, F, 1, S ), ++ CMD( MI_ARB_CHECK, SMI, F, 1, S ), ++ CMD( MI_REPORT_HEAD, SMI, F, 1, S ), ++ CMD( MI_ARB_ON_OFF, SMI, F, 1, S ), ++ CMD( MI_SUSPEND_FLUSH, SMI, F, 1, S ), ++ CMD( MI_LOAD_SCAN_LINES_INCL, SMI, !F, 0x3F, S ), ++ CMD( MI_LOAD_SCAN_LINES_EXCL, SMI, !F, 0x3F, S ), ++ CMD( MI_STORE_DWORD_IMM, SMI, !F, 0x3FF, S ), ++ CMD( MI_LOAD_REGISTER_IMM(1), SMI, !F, 0xFF, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 2 } ), ++ CMD( MI_UPDATE_GTT, SMI, !F, 0x3FF, S ), ++ CMD( MI_STORE_REGISTER_MEM_GEN8, SMI, F, 4, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC } ), ++ CMD( MI_FLUSH_DW, SMI, !F, 0x3F, S ), ++ CMD( MI_LOAD_REGISTER_MEM_GEN8, SMI, F, 4, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC } ), ++ CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, ++ .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), ++}; ++ + static const struct drm_i915_cmd_descriptor noop_desc = + CMD(MI_NOOP, SMI, F, 1, S); + +@@ -488,6 +529,11 @@ static const struct drm_i915_cmd_table h + { hsw_blt_cmds, ARRAY_SIZE(hsw_blt_cmds) }, + }; + ++static const struct drm_i915_cmd_table gen9_blt_cmd_table[] = { ++ { gen9_blt_cmds, ARRAY_SIZE(gen9_blt_cmds) }, ++}; ++ ++ + /* + * Register whitelists, sorted by increasing register offset. + */ +@@ -603,6 +649,29 @@ static const struct drm_i915_reg_descrip + REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), + }; + ++static const struct drm_i915_reg_descriptor gen9_blt_regs[] = { ++ REG64_IDX(RING_TIMESTAMP, RENDER_RING_BASE), ++ REG64_IDX(RING_TIMESTAMP, BSD_RING_BASE), ++ REG32(BCS_SWCTRL), ++ REG64_IDX(RING_TIMESTAMP, BLT_RING_BASE), ++ REG64_IDX(BCS_GPR, 0), ++ REG64_IDX(BCS_GPR, 1), ++ REG64_IDX(BCS_GPR, 2), ++ REG64_IDX(BCS_GPR, 3), ++ REG64_IDX(BCS_GPR, 4), ++ REG64_IDX(BCS_GPR, 5), ++ REG64_IDX(BCS_GPR, 6), ++ REG64_IDX(BCS_GPR, 7), ++ REG64_IDX(BCS_GPR, 8), ++ REG64_IDX(BCS_GPR, 9), ++ REG64_IDX(BCS_GPR, 10), ++ REG64_IDX(BCS_GPR, 11), ++ REG64_IDX(BCS_GPR, 12), ++ REG64_IDX(BCS_GPR, 13), ++ REG64_IDX(BCS_GPR, 14), ++ REG64_IDX(BCS_GPR, 15), ++}; ++ + #undef REG64 + #undef REG32 + +@@ -628,6 +697,10 @@ static const struct drm_i915_reg_table h + { gen7_blt_regs, ARRAY_SIZE(gen7_blt_regs) }, + }; + ++static const struct drm_i915_reg_table gen9_blt_reg_tables[] = { ++ { gen9_blt_regs, ARRAY_SIZE(gen9_blt_regs) }, ++}; ++ + static u32 gen7_render_get_cmd_length_mask(u32 cmd_header) + { + u32 client = cmd_header >> INSTR_CLIENT_SHIFT; +@@ -683,6 +756,17 @@ static u32 gen7_blt_get_cmd_length_mask( + return 0; + } + ++static u32 gen9_blt_get_cmd_length_mask(u32 cmd_header) ++{ ++ u32 client = cmd_header >> INSTR_CLIENT_SHIFT; ++ ++ if (client == INSTR_MI_CLIENT || client == INSTR_BC_CLIENT) ++ return 0xFF; ++ ++ DRM_DEBUG_DRIVER("CMD: Abnormal blt cmd length! 0x%08X\n", cmd_header); ++ return 0; ++} ++ + static bool validate_cmds_sorted(const struct intel_engine_cs *engine, + const struct drm_i915_cmd_table *cmd_tables, + int cmd_table_count) +@@ -840,7 +924,8 @@ void intel_engine_init_cmd_parser(struct + int cmd_table_count; + int ret; + +- if (!IS_GEN7(engine->i915)) ++ if (!IS_GEN7(engine->i915) && !(IS_GEN9(engine->i915) && ++ engine->id == BCS)) + return; + + switch (engine->id) { +@@ -861,7 +946,6 @@ void intel_engine_init_cmd_parser(struct + engine->reg_tables = ivb_render_reg_tables; + engine->reg_table_count = ARRAY_SIZE(ivb_render_reg_tables); + } +- + engine->get_cmd_length_mask = gen7_render_get_cmd_length_mask; + break; + case VCS: +@@ -870,7 +954,16 @@ void intel_engine_init_cmd_parser(struct + engine->get_cmd_length_mask = gen7_bsd_get_cmd_length_mask; + break; + case BCS: +- if (IS_HASWELL(engine->i915)) { ++ engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; ++ if (IS_GEN9(engine->i915)) { ++ cmd_tables = gen9_blt_cmd_table; ++ cmd_table_count = ARRAY_SIZE(gen9_blt_cmd_table); ++ engine->get_cmd_length_mask = ++ gen9_blt_get_cmd_length_mask; ++ ++ /* BCS Engine unsafe without parser */ ++ engine->flags |= I915_ENGINE_REQUIRES_CMD_PARSER; ++ } else if (IS_HASWELL(engine->i915)) { + cmd_tables = hsw_blt_ring_cmd_table; + cmd_table_count = ARRAY_SIZE(hsw_blt_ring_cmd_table); + } else { +@@ -878,15 +971,17 @@ void intel_engine_init_cmd_parser(struct + cmd_table_count = ARRAY_SIZE(gen7_blt_cmd_table); + } + +- if (IS_HASWELL(engine->i915)) { ++ if (IS_GEN9(engine->i915)) { ++ engine->reg_tables = gen9_blt_reg_tables; ++ engine->reg_table_count = ++ ARRAY_SIZE(gen9_blt_reg_tables); ++ } else if (IS_HASWELL(engine->i915)) { + engine->reg_tables = hsw_blt_reg_tables; + engine->reg_table_count = ARRAY_SIZE(hsw_blt_reg_tables); + } else { + engine->reg_tables = ivb_blt_reg_tables; + engine->reg_table_count = ARRAY_SIZE(ivb_blt_reg_tables); + } +- +- engine->get_cmd_length_mask = gen7_blt_get_cmd_length_mask; + break; + case VECS: + cmd_tables = hsw_vebox_cmd_table; +@@ -1260,9 +1355,9 @@ int intel_engine_cmd_parser(struct intel + } + + /* +- * If the batch buffer contains a chained batch, return an +- * error that tells the caller to abort and dispatch the +- * workload as a non-secure batch. ++ * We don't try to handle BATCH_BUFFER_START because it adds ++ * non-trivial complexity. Instead we abort the scan and return ++ * and error to indicate that the batch is unsafe. + */ + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = -EACCES; +@@ -1342,6 +1437,7 @@ int i915_cmd_parser_get_version(struct d + * the parser enabled. + * 9. Don't whitelist or handle oacontrol specially, as ownership + * for oacontrol state is moving to i915-perf. ++ * 10. Support for Gen9 BCS Parsing + */ +- return 9; ++ return 10; + } +--- a/drivers/gpu/drm/i915/i915_gem_gtt.c ++++ b/drivers/gpu/drm/i915/i915_gem_gtt.c +@@ -158,7 +158,8 @@ int intel_sanitize_enable_ppgtt(struct d + if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9) + return 0; + +- if (enable_ppgtt == 1) ++ /* Full PPGTT is required by the Gen9 cmdparser */ ++ if (enable_ppgtt == 1 && INTEL_GEN(dev_priv) != 9) + return 1; + + if (enable_ppgtt == 2 && has_full_ppgtt) +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -471,6 +471,10 @@ static inline bool i915_mmio_reg_valid(i + */ + #define BCS_SWCTRL _MMIO(0x22200) + ++/* There are 16 GPR registers */ ++#define BCS_GPR(n) _MMIO(0x22600 + (n) * 8) ++#define BCS_GPR_UDW(n) _MMIO(0x22600 + (n) * 8 + 4) ++ + #define GPGPU_THREADS_DISPATCHED _MMIO(0x2290) + #define GPGPU_THREADS_DISPATCHED_UDW _MMIO(0x2290 + 4) + #define HS_INVOCATION_COUNT _MMIO(0x2300) diff --git a/debian/patches/bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch b/debian/patches/bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch new file mode 100644 index 000000000..9677432ba --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch @@ -0,0 +1,94 @@ +From: Jon Bloomfield +Date: Thu, 27 Sep 2018 10:23:17 -0700 +Subject: drm/i915/cmdparser: Use explicit goto for error paths +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 0546a29cd884fb8184731c79ab008927ca8859d0 upstream. + +In the next patch we will be adding a second valid +termination condition which will require a small +amount of refactoring to share logic with the BB_END +case. + +Refactor all error conditions to jump to a dedicated +exit path, with 'break' reserved only for a successful +parse. + +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Jon Bloomfield +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 25 +++++++++++++------------ + 1 file changed, 13 insertions(+), 12 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -1337,21 +1337,15 @@ int intel_engine_cmd_parser(struct intel + do { + u32 length; + +- if (*cmd == MI_BATCH_BUFFER_END) { +- if (needs_clflush_after) { +- void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); +- drm_clflush_virt_range(ptr, +- (void *)(cmd + 1) - ptr); +- } ++ if (*cmd == MI_BATCH_BUFFER_END) + break; +- } + + desc = find_cmd(engine, *cmd, desc, &default_desc); + if (!desc) { + DRM_DEBUG_DRIVER("CMD: Unrecognized command: 0x%08X\n", + *cmd); + ret = -EINVAL; +- break; ++ goto err; + } + + /* +@@ -1361,7 +1355,7 @@ int intel_engine_cmd_parser(struct intel + */ + if (desc->cmd.value == MI_BATCH_BUFFER_START) { + ret = -EACCES; +- break; ++ goto err; + } + + if (desc->flags & CMD_DESC_FIXED) +@@ -1375,22 +1369,29 @@ int intel_engine_cmd_parser(struct intel + length, + batch_end - cmd); + ret = -EINVAL; +- break; ++ goto err; + } + + if (!check_cmd(engine, desc, cmd, length)) { + ret = -EACCES; +- break; ++ goto err; + } + + cmd += length; + if (cmd >= batch_end) { + DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + ret = -EINVAL; +- break; ++ goto err; + } + } while (1); + ++ if (needs_clflush_after) { ++ void *ptr = page_mask_bits(shadow_batch_obj->mm.mapping); ++ ++ drm_clflush_virt_range(ptr, (void *)(cmd + 1) - ptr); ++ } ++ ++err: + i915_gem_object_unpin_map(shadow_batch_obj); + return ret; + } diff --git a/debian/patches/bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch b/debian/patches/bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch new file mode 100644 index 000000000..ac91adbd2 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch @@ -0,0 +1,404 @@ +From: Jon Bloomfield +Date: Thu, 20 Sep 2018 09:58:36 -0700 +Subject: drm/i915/cmdparser: Add support for backward jumps +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit f8c08d8faee5567803c8c533865296ca30286bbf upstream. + +To keep things manageable, the pre-gen9 cmdparser does not +attempt to track any form of nested BB_START's. This did not +prevent usermode from using nested starts, or even chained +batches because the cmdparser is not strictly enforced pre gen9. + +Instead, the existence of a nested BB_START would cause the batch +to be emitted in insecure mode, and any privileged capabilities +would not be available. + +For Gen9, the cmdparser becomes mandatory (for BCS at least), and +so not providing any form of nested BB_START support becomes +overly restrictive. Any such batch will simply not run. + +We make heavy use of backward jumps in igt, and it is much easier +to add support for this restricted subset of nested jumps, than to +rewrite the whole of our test suite to avoid them. + +Add the required logic to support limited backward jumps, to +instructions that have already been validated by the parser. + +Note that it's not sufficient to simply approve any BB_START +that jumps backwards in the buffer because this would allow an +attacker to embed a rogue instruction sequence within the +operand words of a harmless instruction (say LRI) and jump to +that. + +We introduce a bit array to track every instr offset successfully +validated, and test the target of BB_START against this. If the +target offset hits, it is re-written to the same offset in the +shadow buffer and the BB_START cmd is allowed. + +Note: This patch deliberately ignores checkpatch issues in the +cmdtables, in order to match the style of the surrounding code. +We'll correct the entire file in one go in a later patch. + +v2: set dispatch secure late (Mika) +v3: rebase (Mika) +v4: Clear whitelist on each parse +Minor review updates (Chris) +v5: Correct backward jump batching +v6: fix compilation error due to struct eb shuffle (Mika) + +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Jon Bloomfield +Signed-off-by: Mika Kuoppala +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 151 +++++++++++++++++++-- + drivers/gpu/drm/i915/i915_drv.h | 9 +- + drivers/gpu/drm/i915/i915_gem_context.c | 5 + + drivers/gpu/drm/i915/i915_gem_context.h | 6 + + drivers/gpu/drm/i915/i915_gem_execbuffer.c | 34 +++-- + 5 files changed, 179 insertions(+), 26 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -481,6 +481,19 @@ static const struct drm_i915_cmd_descrip + .reg = { .offset = 1, .mask = 0x007FFFFC } ), + CMD( MI_LOAD_REGISTER_REG, SMI, !F, 0xFF, W, + .reg = { .offset = 1, .mask = 0x007FFFFC, .step = 1 } ), ++ ++ /* ++ * We allow BB_START but apply further checks. We just sanitize the ++ * basic fields here. ++ */ ++#define MI_BB_START_OPERAND_MASK GENMASK(SMI-1, 0) ++#define MI_BB_START_OPERAND_EXPECT (MI_BATCH_PPGTT_HSW | 1) ++ CMD( MI_BATCH_BUFFER_START_GEN8, SMI, !F, 0xFF, B, ++ .bits = {{ ++ .offset = 0, ++ .mask = MI_BB_START_OPERAND_MASK, ++ .expected = MI_BB_START_OPERAND_EXPECT, ++ }}, ), + }; + + static const struct drm_i915_cmd_descriptor noop_desc = +@@ -1292,15 +1305,113 @@ static bool check_cmd(const struct intel + return true; + } + ++static int check_bbstart(const struct i915_gem_context *ctx, ++ u32 *cmd, u32 offset, u32 length, ++ u32 batch_len, ++ u64 batch_start, ++ u64 shadow_batch_start) ++{ ++ u64 jump_offset, jump_target; ++ u32 target_cmd_offset, target_cmd_index; ++ ++ /* For igt compatibility on older platforms */ ++ if (CMDPARSER_USES_GGTT(ctx->i915)) { ++ DRM_DEBUG("CMD: Rejecting BB_START for ggtt based submission\n"); ++ return -EACCES; ++ } ++ ++ if (length != 3) { ++ DRM_DEBUG("CMD: Recursive BB_START with bad length(%u)\n", ++ length); ++ return -EINVAL; ++ } ++ ++ jump_target = *(u64*)(cmd+1); ++ jump_offset = jump_target - batch_start; ++ ++ /* ++ * Any underflow of jump_target is guaranteed to be outside the range ++ * of a u32, so >= test catches both too large and too small ++ */ ++ if (jump_offset >= batch_len) { ++ DRM_DEBUG("CMD: BB_START to 0x%llx jumps out of BB\n", ++ jump_target); ++ return -EINVAL; ++ } ++ ++ /* ++ * This cannot overflow a u32 because we already checked jump_offset ++ * is within the BB, and the batch_len is a u32 ++ */ ++ target_cmd_offset = lower_32_bits(jump_offset); ++ target_cmd_index = target_cmd_offset / sizeof(u32); ++ ++ *(u64*)(cmd + 1) = shadow_batch_start + target_cmd_offset; ++ ++ if (target_cmd_index == offset) ++ return 0; ++ ++ if (ctx->jump_whitelist_cmds <= target_cmd_index) { ++ DRM_DEBUG("CMD: Rejecting BB_START - truncated whitelist array\n"); ++ return -EINVAL; ++ } else if (!test_bit(target_cmd_index, ctx->jump_whitelist)) { ++ DRM_DEBUG("CMD: BB_START to 0x%llx not a previously executed cmd\n", ++ jump_target); ++ return -EINVAL; ++ } ++ ++ return 0; ++} ++ ++static void init_whitelist(struct i915_gem_context *ctx, u32 batch_len) ++{ ++ const u32 batch_cmds = DIV_ROUND_UP(batch_len, sizeof(u32)); ++ const u32 exact_size = BITS_TO_LONGS(batch_cmds); ++ u32 next_size = BITS_TO_LONGS(roundup_pow_of_two(batch_cmds)); ++ unsigned long *next_whitelist; ++ ++ if (CMDPARSER_USES_GGTT(ctx->i915)) ++ return; ++ ++ if (batch_cmds <= ctx->jump_whitelist_cmds) { ++ memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32)); ++ return; ++ } ++ ++again: ++ next_whitelist = kcalloc(next_size, sizeof(long), GFP_KERNEL); ++ if (next_whitelist) { ++ kfree(ctx->jump_whitelist); ++ ctx->jump_whitelist = next_whitelist; ++ ctx->jump_whitelist_cmds = ++ next_size * BITS_PER_BYTE * sizeof(long); ++ return; ++ } ++ ++ if (next_size > exact_size) { ++ next_size = exact_size; ++ goto again; ++ } ++ ++ DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); ++ memset(ctx->jump_whitelist, 0, ++ BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32)); ++ ++ return; ++} ++ + #define LENGTH_BIAS 2 + + /** + * i915_parse_cmds() - parse a submitted batch buffer for privilege violations ++ * @ctx: the context in which the batch is to execute + * @engine: the engine on which the batch is to execute + * @batch_obj: the batch buffer in question +- * @shadow_batch_obj: copy of the batch buffer in question ++ * @batch_start: Canonical base address of batch + * @batch_start_offset: byte offset in the batch at which execution starts + * @batch_len: length of the commands in batch_obj ++ * @shadow_batch_obj: copy of the batch buffer in question ++ * @shadow_batch_start: Canonical base address of shadow_batch_obj + * + * Parses the specified batch buffer looking for privilege violations as + * described in the overview. +@@ -1308,13 +1419,17 @@ static bool check_cmd(const struct intel + * Return: non-zero if the parser finds violations or otherwise fails; -EACCES + * if the batch appears legal but should use hardware parsing + */ +-int intel_engine_cmd_parser(struct intel_engine_cs *engine, ++ ++int intel_engine_cmd_parser(struct i915_gem_context *ctx, ++ struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, +- struct drm_i915_gem_object *shadow_batch_obj, ++ u64 batch_start, + u32 batch_start_offset, +- u32 batch_len) ++ u32 batch_len, ++ struct drm_i915_gem_object *shadow_batch_obj, ++ u64 shadow_batch_start) + { +- u32 *cmd, *batch_end; ++ u32 *cmd, *batch_end, offset = 0; + struct drm_i915_cmd_descriptor default_desc = noop_desc; + const struct drm_i915_cmd_descriptor *desc = &default_desc; + bool needs_clflush_after = false; +@@ -1328,6 +1443,8 @@ int intel_engine_cmd_parser(struct intel + return PTR_ERR(cmd); + } + ++ init_whitelist(ctx, batch_len); ++ + /* + * We use the batch length as size because the shadow object is as + * large or larger and copy_batch() will write MI_NOPs to the extra +@@ -1348,16 +1465,6 @@ int intel_engine_cmd_parser(struct intel + goto err; + } + +- /* +- * We don't try to handle BATCH_BUFFER_START because it adds +- * non-trivial complexity. Instead we abort the scan and return +- * and error to indicate that the batch is unsafe. +- */ +- if (desc->cmd.value == MI_BATCH_BUFFER_START) { +- ret = -EACCES; +- goto err; +- } +- + if (desc->flags & CMD_DESC_FIXED) + length = desc->length.fixed; + else +@@ -1377,7 +1484,21 @@ int intel_engine_cmd_parser(struct intel + goto err; + } + ++ if (desc->cmd.value == MI_BATCH_BUFFER_START) { ++ ret = check_bbstart(ctx, cmd, offset, length, ++ batch_len, batch_start, ++ shadow_batch_start); ++ ++ if (ret) ++ goto err; ++ break; ++ } ++ ++ if (ctx->jump_whitelist_cmds > offset) ++ set_bit(offset, ctx->jump_whitelist); ++ + cmd += length; ++ offset += length; + if (cmd >= batch_end) { + DRM_DEBUG_DRIVER("CMD: Got to the end of the buffer w/o a BBE cmd!\n"); + ret = -EINVAL; +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -3353,11 +3353,14 @@ const char *i915_cache_level_str(struct + int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv); + void intel_engine_init_cmd_parser(struct intel_engine_cs *engine); + void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine); +-int intel_engine_cmd_parser(struct intel_engine_cs *engine, ++int intel_engine_cmd_parser(struct i915_gem_context *cxt, ++ struct intel_engine_cs *engine, + struct drm_i915_gem_object *batch_obj, +- struct drm_i915_gem_object *shadow_batch_obj, ++ u64 user_batch_start, + u32 batch_start_offset, +- u32 batch_len); ++ u32 batch_len, ++ struct drm_i915_gem_object *shadow_batch_obj, ++ u64 shadow_batch_start); + + /* i915_perf.c */ + extern void i915_perf_init(struct drm_i915_private *dev_priv); +--- a/drivers/gpu/drm/i915/i915_gem_context.c ++++ b/drivers/gpu/drm/i915/i915_gem_context.c +@@ -124,6 +124,8 @@ static void i915_gem_context_free(struct + + i915_ppgtt_put(ctx->ppgtt); + ++ kfree(ctx->jump_whitelist); ++ + for (n = 0; n < ARRAY_SIZE(ctx->__engine); n++) { + struct intel_context *ce = &ctx->__engine[n]; + +@@ -339,6 +341,9 @@ __create_hw_context(struct drm_i915_priv + else + ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE; + ++ ctx->jump_whitelist = NULL; ++ ctx->jump_whitelist_cmds = 0; ++ + return ctx; + + err_pid: +--- a/drivers/gpu/drm/i915/i915_gem_context.h ++++ b/drivers/gpu/drm/i915/i915_gem_context.h +@@ -183,6 +183,12 @@ struct i915_gem_context { + /** remap_slice: Bitmask of cache lines that need remapping */ + u8 remap_slice; + ++ /** jump_whitelist: Bit array for tracking cmds during cmdparsing */ ++ unsigned long *jump_whitelist; ++ ++ /** jump_whitelist_cmds: No of cmd slots available */ ++ u32 jump_whitelist_cmds; ++ + /** handles_vma: rbtree to look up our context specific obj/vma for + * the user handle. (user handles are per fd, but the binding is + * per vm, which may be one per context or shared with the global GTT) +--- a/drivers/gpu/drm/i915/i915_gem_execbuffer.c ++++ b/drivers/gpu/drm/i915/i915_gem_execbuffer.c +@@ -1909,7 +1909,6 @@ shadow_batch_pin(struct i915_execbuffer + if (CMDPARSER_USES_GGTT(dev_priv)) { + flags = PIN_GLOBAL; + vm = &dev_priv->ggtt.vm; +- eb->batch_flags |= I915_DISPATCH_SECURE; + } else if (eb->vm->has_read_only) { + flags = PIN_USER; + vm = eb->vm; +@@ -1926,6 +1925,8 @@ static struct i915_vma *eb_parse(struct + { + struct drm_i915_gem_object *shadow_batch_obj; + struct i915_vma *vma; ++ u64 batch_start; ++ u64 shadow_batch_start; + int err; + + shadow_batch_obj = i915_gem_batch_pool_get(&eb->engine->batch_pool, +@@ -1933,12 +1934,27 @@ static struct i915_vma *eb_parse(struct + if (IS_ERR(shadow_batch_obj)) + return ERR_CAST(shadow_batch_obj); + +- err = intel_engine_cmd_parser(eb->engine, ++ vma = shadow_batch_pin(eb, shadow_batch_obj); ++ if (IS_ERR(vma)) ++ goto out; ++ ++ batch_start = gen8_canonical_addr(eb->batch->node.start) + ++ eb->batch_start_offset; ++ ++ shadow_batch_start = gen8_canonical_addr(vma->node.start); ++ ++ err = intel_engine_cmd_parser(eb->ctx, ++ eb->engine, + eb->batch->obj, +- shadow_batch_obj, ++ batch_start, + eb->batch_start_offset, +- eb->batch_len); ++ eb->batch_len, ++ shadow_batch_obj, ++ shadow_batch_start); ++ + if (err) { ++ i915_vma_unpin(vma); ++ + /* + * Unsafe GGTT-backed buffers can still be submitted safely + * as non-secure. +@@ -1950,12 +1966,9 @@ static struct i915_vma *eb_parse(struct + vma = NULL; + else + vma = ERR_PTR(err); +- goto out; +- } + +- vma = shadow_batch_pin(eb, shadow_batch_obj); +- if (IS_ERR(vma)) + goto out; ++ } + + eb->vma[eb->buffer_count] = i915_vma_get(vma); + eb->flags[eb->buffer_count] = +@@ -1964,7 +1977,12 @@ static struct i915_vma *eb_parse(struct + eb->buffer_count++; + eb->batch_start_offset = 0; + eb->batch = vma; ++ + /* eb->batch_len unchanged */ ++ ++ if (CMDPARSER_USES_GGTT(eb->i915)) ++ eb->batch_flags |= I915_DISPATCH_SECURE; ++ + out: + i915_gem_object_unpin_pages(shadow_batch_obj); + return vma; diff --git a/debian/patches/bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch b/debian/patches/bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch new file mode 100644 index 000000000..615e994b5 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch @@ -0,0 +1,37 @@ +From: Jon Bloomfield +Date: Thu, 20 Sep 2018 09:45:10 -0700 +Subject: drm/i915/cmdparser: Ignore Length operands during command matching +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +commit 926abff21a8f29ef159a3ac893b05c6e50e043c3 upstream. + +Some of the gen instruction macros (e.g. MI_DISPLAY_FLIP) have the +length directly encoded in them. Since these are used directly in +the tables, the Length becomes part of the comparison used for +matching during parsing. Thus, if the cmd being parsed has a +different length to that in the table, it is not matched and the +cmd is accepted via the default variable length path. + +Fix by masking out everything except the Opcode in the cmd tables + +Cc: Tony Luck +Cc: Dave Airlie +Cc: Takashi Iwai +Cc: Tyler Hicks +Signed-off-by: Jon Bloomfield +Reviewed-by: Chris Wilson +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -187,7 +187,7 @@ struct drm_i915_cmd_table { + #define CMD(op, opm, f, lm, fl, ...) \ + { \ + .flags = (fl) | ((f) ? CMD_DESC_FIXED : 0), \ +- .cmd = { (op), ~0u << (opm) }, \ ++ .cmd = { (op & ~0u << (opm)), ~0u << (opm) }, \ + .length = { (lm) }, \ + __VA_ARGS__ \ + } diff --git a/debian/patches/bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch b/debian/patches/bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch new file mode 100644 index 000000000..84acd8734 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch @@ -0,0 +1,72 @@ +From: Uma Shankar +Date: Tue, 7 Aug 2018 21:15:35 +0530 +Subject: drm/i915: Lower RM timeout to avoid DSI hard hangs +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0154 + +commit 1d85a299c4db57c55e0229615132c964d17aa765 upstream. + +In BXT/APL, device 2 MMIO reads from MIPI controller requires its PLL +to be turned ON. When MIPI PLL is turned off (MIPI Display is not +active or connected), and someone (host or GT engine) tries to read +MIPI registers, it causes hard hang. This is a hardware restriction +or limitation. + +Driver by itself doesn't read MIPI registers when MIPI display is off. +But any userspace application can submit unprivileged batch buffer for +execution. In that batch buffer there can be mmio reads. And these +reads are allowed even for unprivileged applications. If these +register reads are for MIPI DSI controller and MIPI display is not +active during that time, then the MMIO read operation causes system +hard hang and only way to recover is hard reboot. A genuine +process/application won't submit batch buffer like this and doesn't +cause any issue. But on a compromised system, a malign userspace +process/app can generate such batch buffer and can trigger system +hard hang (denial of service attack). + +The fix is to lower the internal MMIO timeout value to an optimum +value of 950us as recommended by hardware team. If the timeout is +beyond 1ms (which will hit for any value we choose if MMIO READ on a +DSI specific register is performed without PLL ON), it causes the +system hang. But if the timeout value is lower than it will be below +the threshold (even if timeout happens) and system will not get into +a hung state. This will avoid a system hang without losing any +programming or GT interrupts, taking the worst case of lowest CDCLK +frequency and early DC5 abort into account. + +Signed-off-by: Uma Shankar +Reviewed-by: Jon Bloomfield +--- + drivers/gpu/drm/i915/i915_reg.h | 4 ++++ + drivers/gpu/drm/i915/intel_pm.c | 8 ++++++++ + 2 files changed, 12 insertions(+) + +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -7009,6 +7009,10 @@ enum { + #define SKL_CSR_DC5_DC6_COUNT _MMIO(0x8002C) + #define BXT_CSR_DC3_DC5_COUNT _MMIO(0x80038) + ++/* Display Internal Timeout Register */ ++#define RM_TIMEOUT _MMIO(0x42060) ++#define MMIO_TIMEOUT_US(us) ((us) << 0) ++ + /* interrupts */ + #define DE_MASTER_IRQ_CONTROL (1 << 31) + #define DE_SPRITEB_FLIP_DONE (1 << 29) +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -114,6 +114,14 @@ static void bxt_init_clock_gating(struct + */ + I915_WRITE(GEN9_CLKGATE_DIS_0, I915_READ(GEN9_CLKGATE_DIS_0) | + PWM1_GATING_DIS | PWM2_GATING_DIS); ++ ++ /* ++ * Lower the display internal timeout. ++ * This is needed to avoid any hard hangs when DSI port PLL ++ * is off and a MMIO access is attempted by any privilege ++ * application, using batch buffers or any other means. ++ */ ++ I915_WRITE(RM_TIMEOUT, MMIO_TIMEOUT_US(950)); + } + + static void glk_init_clock_gating(struct drm_i915_private *dev_priv) diff --git a/debian/patches/bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch b/debian/patches/bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch new file mode 100644 index 000000000..e555b72d0 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch @@ -0,0 +1,282 @@ +From: Imre Deak +Date: Mon, 9 Jul 2018 18:24:27 +0300 +Subject: drm/i915/gen8+: Add RC6 CTX corruption WA +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0154 + +commit 7e34f4e4aad3fd34c02b294a3cf2321adf5b4438 upstream. + +In some circumstances the RC6 context can get corrupted. We can detect +this and take the required action, that is disable RC6 and runtime PM. +The HW recovers from the corrupted state after a system suspend/resume +cycle, so detect the recovery and re-enable RC6 and runtime PM. + +v2: rebase (Mika) +v3: +- Move intel_suspend_gt_powersave() to the end of the GEM suspend + sequence. +- Add commit message. +v4: +- Rebased on intel_uncore_forcewake_put(i915->uncore, ...) API + change. +v5: rebased on gem/gt split (Mika) + +Signed-off-by: Imre Deak +Signed-off-by: Mika Kuoppala +--- + drivers/gpu/drm/i915/i915_drv.c | 3 + + drivers/gpu/drm/i915/i915_drv.h | 7 +- + drivers/gpu/drm/i915/i915_gem.c | 8 +++ + drivers/gpu/drm/i915/i915_reg.h | 2 + + drivers/gpu/drm/i915/intel_drv.h | 3 + + drivers/gpu/drm/i915/intel_pm.c | 107 ++++++++++++++++++++++++++++++- + 6 files changed, 126 insertions(+), 4 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_drv.c ++++ b/drivers/gpu/drm/i915/i915_drv.c +@@ -1621,6 +1621,7 @@ static int i915_drm_suspend_late(struct + i915_gem_suspend_late(dev_priv); + + intel_display_set_init_power(dev_priv, false); ++ i915_rc6_ctx_wa_suspend(dev_priv); + intel_uncore_suspend(dev_priv); + + /* +@@ -1847,6 +1848,8 @@ static int i915_drm_resume_early(struct + else + intel_display_set_init_power(dev_priv, true); + ++ i915_rc6_ctx_wa_resume(dev_priv); ++ + intel_engines_sanitize(dev_priv); + + enable_rpm_wakeref_asserts(dev_priv); +--- a/drivers/gpu/drm/i915/i915_drv.h ++++ b/drivers/gpu/drm/i915/i915_drv.h +@@ -801,6 +801,7 @@ struct intel_rps { + + struct intel_rc6 { + bool enabled; ++ bool ctx_corrupted; + u64 prev_hw_residency[4]; + u64 cur_residency[4]; + }; +@@ -2557,10 +2558,12 @@ intel_info(const struct drm_i915_private + /* Early gen2 have a totally busted CS tlb and require pinned batches. */ + #define HAS_BROKEN_CS_TLB(dev_priv) (IS_I830(dev_priv) || IS_I845G(dev_priv)) + ++#define NEEDS_RC6_CTX_CORRUPTION_WA(dev_priv) \ ++ (IS_BROADWELL(dev_priv) || INTEL_GEN(dev_priv) == 9) ++ + /* WaRsDisableCoarsePowerGating:skl,cnl */ + #define NEEDS_WaRsDisableCoarsePowerGating(dev_priv) \ +- (IS_CANNONLAKE(dev_priv) || \ +- IS_SKL_GT3(dev_priv) || IS_SKL_GT4(dev_priv)) ++ (IS_CANNONLAKE(dev_priv) || INTEL_GEN(dev_priv) == 9) + + #define HAS_GMBUS_IRQ(dev_priv) (INTEL_GEN(dev_priv) >= 4) + #define HAS_GMBUS_BURST_READ(dev_priv) (INTEL_GEN(dev_priv) >= 10 || \ +--- a/drivers/gpu/drm/i915/i915_gem.c ++++ b/drivers/gpu/drm/i915/i915_gem.c +@@ -174,6 +174,11 @@ static u32 __i915_gem_park(struct drm_i9 + if (INTEL_GEN(i915) >= 6) + gen6_rps_idle(i915); + ++ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) { ++ i915_rc6_ctx_wa_check(i915); ++ intel_uncore_forcewake_put(i915, FORCEWAKE_ALL); ++ } ++ + intel_display_power_put(i915, POWER_DOMAIN_GT_IRQ); + + intel_runtime_pm_put(i915); +@@ -220,6 +225,9 @@ void i915_gem_unpark(struct drm_i915_pri + */ + intel_display_power_get(i915, POWER_DOMAIN_GT_IRQ); + ++ if (NEEDS_RC6_CTX_CORRUPTION_WA(i915)) ++ intel_uncore_forcewake_get(i915, FORCEWAKE_ALL); ++ + i915->gt.awake = true; + if (unlikely(++i915->gt.epoch == 0)) /* keep 0 as invalid */ + i915->gt.epoch = 1; +--- a/drivers/gpu/drm/i915/i915_reg.h ++++ b/drivers/gpu/drm/i915/i915_reg.h +@@ -387,6 +387,8 @@ static inline bool i915_mmio_reg_valid(i + #define ECOCHK_PPGTT_WT_HSW (0x2 << 3) + #define ECOCHK_PPGTT_WB_HSW (0x3 << 3) + ++#define GEN8_RC6_CTX_INFO _MMIO(0x8504) ++ + #define GAC_ECO_BITS _MMIO(0x14090) + #define ECOBITS_SNB_BIT (1 << 13) + #define ECOBITS_PPGTT_CACHE64B (3 << 8) +--- a/drivers/gpu/drm/i915/intel_drv.h ++++ b/drivers/gpu/drm/i915/intel_drv.h +@@ -2064,6 +2064,9 @@ void intel_sanitize_gt_powersave(struct + void intel_enable_gt_powersave(struct drm_i915_private *dev_priv); + void intel_disable_gt_powersave(struct drm_i915_private *dev_priv); + void intel_suspend_gt_powersave(struct drm_i915_private *dev_priv); ++bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915); ++void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915); ++void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915); + void gen6_rps_busy(struct drm_i915_private *dev_priv); + void gen6_rps_reset_ei(struct drm_i915_private *dev_priv); + void gen6_rps_idle(struct drm_i915_private *dev_priv); +--- a/drivers/gpu/drm/i915/intel_pm.c ++++ b/drivers/gpu/drm/i915/intel_pm.c +@@ -8196,6 +8196,95 @@ static void intel_init_emon(struct drm_i + dev_priv->ips.corr = (lcfuse & LCFUSE_HIV_MASK); + } + ++static bool i915_rc6_ctx_corrupted(struct drm_i915_private *dev_priv) ++{ ++ return !I915_READ(GEN8_RC6_CTX_INFO); ++} ++ ++static void i915_rc6_ctx_wa_init(struct drm_i915_private *i915) ++{ ++ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) ++ return; ++ ++ if (i915_rc6_ctx_corrupted(i915)) { ++ DRM_INFO("RC6 context corrupted, disabling runtime power management\n"); ++ i915->gt_pm.rc6.ctx_corrupted = true; ++ intel_runtime_pm_get(i915); ++ } ++} ++ ++static void i915_rc6_ctx_wa_cleanup(struct drm_i915_private *i915) ++{ ++ if (i915->gt_pm.rc6.ctx_corrupted) { ++ intel_runtime_pm_put(i915); ++ i915->gt_pm.rc6.ctx_corrupted = false; ++ } ++} ++ ++/** ++ * i915_rc6_ctx_wa_suspend - system suspend sequence for the RC6 CTX WA ++ * @i915: i915 device ++ * ++ * Perform any steps needed to clean up the RC6 CTX WA before system suspend. ++ */ ++void i915_rc6_ctx_wa_suspend(struct drm_i915_private *i915) ++{ ++ if (i915->gt_pm.rc6.ctx_corrupted) ++ intel_runtime_pm_put(i915); ++} ++ ++/** ++ * i915_rc6_ctx_wa_resume - system resume sequence for the RC6 CTX WA ++ * @i915: i915 device ++ * ++ * Perform any steps needed to re-init the RC6 CTX WA after system resume. ++ */ ++void i915_rc6_ctx_wa_resume(struct drm_i915_private *i915) ++{ ++ if (!i915->gt_pm.rc6.ctx_corrupted) ++ return; ++ ++ if (i915_rc6_ctx_corrupted(i915)) { ++ intel_runtime_pm_get(i915); ++ return; ++ } ++ ++ DRM_INFO("RC6 context restored, re-enabling runtime power management\n"); ++ i915->gt_pm.rc6.ctx_corrupted = false; ++} ++ ++static void intel_disable_rc6(struct drm_i915_private *dev_priv); ++ ++/** ++ * i915_rc6_ctx_wa_check - check for a new RC6 CTX corruption ++ * @i915: i915 device ++ * ++ * Check if an RC6 CTX corruption has happened since the last check and if so ++ * disable RC6 and runtime power management. ++ * ++ * Return false if no context corruption has happened since the last call of ++ * this function, true otherwise. ++*/ ++bool i915_rc6_ctx_wa_check(struct drm_i915_private *i915) ++{ ++ if (!NEEDS_RC6_CTX_CORRUPTION_WA(i915)) ++ return false; ++ ++ if (i915->gt_pm.rc6.ctx_corrupted) ++ return false; ++ ++ if (!i915_rc6_ctx_corrupted(i915)) ++ return false; ++ ++ DRM_NOTE("RC6 context corruption, disabling runtime power management\n"); ++ ++ intel_disable_rc6(i915); ++ i915->gt_pm.rc6.ctx_corrupted = true; ++ intel_runtime_pm_get_noresume(i915); ++ ++ return true; ++} ++ + void intel_init_gt_powersave(struct drm_i915_private *dev_priv) + { + struct intel_rps *rps = &dev_priv->gt_pm.rps; +@@ -8211,6 +8300,8 @@ void intel_init_gt_powersave(struct drm_ + + mutex_lock(&dev_priv->pcu_lock); + ++ i915_rc6_ctx_wa_init(dev_priv); ++ + /* Initialize RPS limits (for userspace) */ + if (IS_CHERRYVIEW(dev_priv)) + cherryview_init_gt_powersave(dev_priv); +@@ -8257,6 +8348,8 @@ void intel_cleanup_gt_powersave(struct d + if (IS_VALLEYVIEW(dev_priv)) + valleyview_cleanup_gt_powersave(dev_priv); + ++ i915_rc6_ctx_wa_cleanup(dev_priv); ++ + if (!HAS_RC6(dev_priv)) + intel_runtime_pm_put(dev_priv); + } +@@ -8301,7 +8394,7 @@ static inline void intel_disable_llc_pst + i915->gt_pm.llc_pstate.enabled = false; + } + +-static void intel_disable_rc6(struct drm_i915_private *dev_priv) ++static void __intel_disable_rc6(struct drm_i915_private *dev_priv) + { + lockdep_assert_held(&dev_priv->pcu_lock); + +@@ -8320,6 +8413,13 @@ static void intel_disable_rc6(struct drm + dev_priv->gt_pm.rc6.enabled = false; + } + ++static void intel_disable_rc6(struct drm_i915_private *dev_priv) ++{ ++ mutex_lock(&dev_priv->pcu_lock); ++ __intel_disable_rc6(dev_priv); ++ mutex_unlock(&dev_priv->pcu_lock); ++} ++ + static void intel_disable_rps(struct drm_i915_private *dev_priv) + { + lockdep_assert_held(&dev_priv->pcu_lock); +@@ -8345,7 +8445,7 @@ void intel_disable_gt_powersave(struct d + { + mutex_lock(&dev_priv->pcu_lock); + +- intel_disable_rc6(dev_priv); ++ __intel_disable_rc6(dev_priv); + intel_disable_rps(dev_priv); + if (HAS_LLC(dev_priv)) + intel_disable_llc_pstate(dev_priv); +@@ -8372,6 +8472,9 @@ static void intel_enable_rc6(struct drm_ + if (dev_priv->gt_pm.rc6.enabled) + return; + ++ if (dev_priv->gt_pm.rc6.ctx_corrupted) ++ return; ++ + if (IS_CHERRYVIEW(dev_priv)) + cherryview_enable_rc6(dev_priv); + else if (IS_VALLEYVIEW(dev_priv)) diff --git a/debian/patches/bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch b/debian/patches/bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch new file mode 100644 index 000000000..210c58c19 --- /dev/null +++ b/debian/patches/bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch @@ -0,0 +1,44 @@ +From: Ben Hutchings +Date: Sun, 10 Nov 2019 22:08:12 +0000 +Subject: drm/i915/cmdparser: Fix jump whitelist clearing +Bug-Debian-Security: https://security-tracker.debian.org/tracker/CVE-2019-0155 + +When a jump_whitelist bitmap is reused, it needs to be cleared. +Currently this is done with memset() and the size calculation assumes +bitmaps are made of 32-bit words, not longs. So on 64-bit +architectures, only the first half of the bitmap is cleared. + +If some whitelist bits are carried over between successive batches +submitted on the same context, this will presumably allow embedding +the rogue instructions that we're trying to reject. + +Use bitmap_zero() instead, which gets the calculation right. + +Fixes: f8c08d8faee5 ("drm/i915/cmdparser: Add support for backward jumps") +Cc: stable@vger.kernel.org +Signed-off-by: Ben Hutchings +--- + drivers/gpu/drm/i915/i915_cmd_parser.c | 5 ++--- + 1 file changed, 2 insertions(+), 3 deletions(-) + +--- a/drivers/gpu/drm/i915/i915_cmd_parser.c ++++ b/drivers/gpu/drm/i915/i915_cmd_parser.c +@@ -1374,7 +1374,7 @@ static void init_whitelist(struct i915_g + return; + + if (batch_cmds <= ctx->jump_whitelist_cmds) { +- memset(ctx->jump_whitelist, 0, exact_size * sizeof(u32)); ++ bitmap_zero(ctx->jump_whitelist, batch_cmds); + return; + } + +@@ -1394,8 +1394,7 @@ again: + } + + DRM_DEBUG("CMD: Failed to extend whitelist. BB_START may be disallowed\n"); +- memset(ctx->jump_whitelist, 0, +- BITS_TO_LONGS(ctx->jump_whitelist_cmds) * sizeof(u32)); ++ bitmap_zero(ctx->jump_whitelist, ctx->jump_whitelist_cmds); + + return; + } diff --git a/debian/patches/bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch b/debian/patches/bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch new file mode 100644 index 000000000..8917da9e7 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch @@ -0,0 +1,250 @@ +From: Vineela Tummalapalli +Date: Mon, 4 Nov 2019 12:22:01 +0100 +Subject: x86/bugs: Add ITLB_MULTIHIT bug infrastructure + +commit db4d30fbb71b47e4ecb11c4efa5d8aad4b03dfae upstream + +Some processors may incur a machine check error possibly resulting in an +unrecoverable CPU lockup when an instruction fetch encounters a TLB +multi-hit in the instruction TLB. This can occur when the page size is +changed along with either the physical address or cache type. The relevant +erratum can be found here: + + https://bugzilla.kernel.org/show_bug.cgi?id=205195 + +There are other processors affected for which the erratum does not fully +disclose the impact. + +This issue affects both bare-metal x86 page tables and EPT. + +It can be mitigated by either eliminating the use of large pages or by +using careful TLB invalidations when changing the page size in the page +tables. + +Just like Spectre, Meltdown, L1TF and MDS, a new bit has been allocated in +MSR_IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) and will be set on CPUs which +are mitigated against this issue. + +Signed-off-by: Vineela Tummalapalli +Co-developed-by: Pawan Gupta +Signed-off-by: Pawan Gupta +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + .../ABI/testing/sysfs-devices-system-cpu | 1 + + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/msr-index.h | 7 +++ + arch/x86/kernel/cpu/bugs.c | 13 ++++ + arch/x86/kernel/cpu/common.c | 61 ++++++++++--------- + drivers/base/cpu.c | 8 +++ + include/linux/cpu.h | 2 + + 7 files changed, 65 insertions(+), 28 deletions(-) + +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -479,6 +479,7 @@ What: /sys/devices/system/cpu/vulnerabi + /sys/devices/system/cpu/vulnerabilities/l1tf + /sys/devices/system/cpu/vulnerabilities/mds + /sys/devices/system/cpu/vulnerabilities/tsx_async_abort ++ /sys/devices/system/cpu/vulnerabilities/itlb_multihit + Date: January 2018 + Contact: Linux kernel mailing list + Description: Information about CPU vulnerabilities +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -390,5 +390,6 @@ + #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ + #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ + #define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ ++#define X86_BUG_ITLB_MULTIHIT X86_BUG(23) /* CPU may incur MCE during certain page attribute changes */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -84,6 +84,13 @@ + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ ++#define ARCH_CAP_PSCHANGE_MC_NO BIT(6) /* ++ * The processor is not susceptible to a ++ * machine check error due to modifying the ++ * code page size along with either the ++ * physical address or cache type ++ * without TLB invalidation. ++ */ + #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ + #define ARCH_CAP_TAA_NO BIT(8) /* + * Not susceptible to +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1387,6 +1387,11 @@ static ssize_t l1tf_show_state(char *buf + } + #endif + ++static ssize_t itlb_multihit_show_state(char *buf) ++{ ++ return sprintf(buf, "Processor vulnerable\n"); ++} ++ + static ssize_t mds_show_state(char *buf) + { + if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { +@@ -1490,6 +1495,9 @@ static ssize_t cpu_show_common(struct de + case X86_BUG_TAA: + return tsx_async_abort_show_state(buf); + ++ case X86_BUG_ITLB_MULTIHIT: ++ return itlb_multihit_show_state(buf); ++ + default: + break; + } +@@ -1531,4 +1539,9 @@ ssize_t cpu_show_tsx_async_abort(struct + { + return cpu_show_common(dev, attr, buf, X86_BUG_TAA); + } ++ ++ssize_t cpu_show_itlb_multihit(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_ITLB_MULTIHIT); ++} + #endif +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -946,13 +946,14 @@ static void identify_cpu_without_cpuid(s + #endif + } + +-#define NO_SPECULATION BIT(0) +-#define NO_MELTDOWN BIT(1) +-#define NO_SSB BIT(2) +-#define NO_L1TF BIT(3) +-#define NO_MDS BIT(4) +-#define MSBDS_ONLY BIT(5) +-#define NO_SWAPGS BIT(6) ++#define NO_SPECULATION BIT(0) ++#define NO_MELTDOWN BIT(1) ++#define NO_SSB BIT(2) ++#define NO_L1TF BIT(3) ++#define NO_MDS BIT(4) ++#define MSBDS_ONLY BIT(5) ++#define NO_SWAPGS BIT(6) ++#define NO_ITLB_MULTIHIT BIT(7) + + #define VULNWL(_vendor, _family, _model, _whitelist) \ + { X86_VENDOR_##_vendor, _family, _model, X86_FEATURE_ANY, _whitelist } +@@ -970,26 +971,26 @@ static const __initconst struct x86_cpu_ + VULNWL(NSC, 5, X86_MODEL_ANY, NO_SPECULATION), + + /* Intel Family 6 */ +- VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION), +- VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION), +- VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION), +- VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION), +- VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION), +- +- VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), +- VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_SALTWELL, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SALTWELL_TABLET, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SALTWELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_BONNELL, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_BONNELL_MID, NO_SPECULATION | NO_ITLB_MULTIHIT), ++ ++ VULNWL_INTEL(ATOM_SILVERMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SILVERMONT_X, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_SILVERMONT_MID, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_AIRMONT, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(XEON_PHI_KNL, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(XEON_PHI_KNM, NO_SSB | NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + + VULNWL_INTEL(CORE_YONAH, NO_SSB), + +- VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_AIRMONT_MID, NO_L1TF | MSBDS_ONLY | NO_SWAPGS | NO_ITLB_MULTIHIT), + +- VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS), +- VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS), +- VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS), ++ VULNWL_INTEL(ATOM_GOLDMONT, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_X, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_INTEL(ATOM_GOLDMONT_PLUS, NO_MDS | NO_L1TF | NO_SWAPGS | NO_ITLB_MULTIHIT), + + /* + * Technically, swapgs isn't serializing on AMD (despite it previously +@@ -1000,13 +1001,13 @@ static const __initconst struct x86_cpu_ + */ + + /* AMD Family 0xf - 0x12 */ +- VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), +- VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), +- VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), +- VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS), ++ VULNWL_AMD(0x0f, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x10, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x11, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), ++ VULNWL_AMD(0x12, NO_MELTDOWN | NO_SSB | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + + /* FAMILY_ANY must be last, otherwise 0x0f - 0x12 matches won't work */ +- VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS), ++ VULNWL_AMD(X86_FAMILY_ANY, NO_MELTDOWN | NO_L1TF | NO_MDS | NO_SWAPGS | NO_ITLB_MULTIHIT), + {} + }; + +@@ -1031,6 +1032,10 @@ static void __init cpu_set_bug_bits(stru + { + u64 ia32_cap = x86_read_arch_cap_msr(); + ++ /* Set ITLB_MULTIHIT bug if cpu is not in the whitelist and not mitigated */ ++ if (!cpu_matches(NO_ITLB_MULTIHIT) && !(ia32_cap & ARCH_CAP_PSCHANGE_MC_NO)) ++ setup_force_cpu_bug(X86_BUG_ITLB_MULTIHIT); ++ + if (cpu_matches(NO_SPECULATION)) + return; + +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -559,6 +559,12 @@ ssize_t __weak cpu_show_tsx_async_abort( + return sprintf(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_itlb_multihit(struct device *dev, ++ struct device_attribute *attr, char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); +@@ -566,6 +572,7 @@ static DEVICE_ATTR(spec_store_bypass, 04 + static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); + static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); + static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); ++static DEVICE_ATTR(itlb_multihit, 0444, cpu_show_itlb_multihit, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -575,6 +582,7 @@ static struct attribute *cpu_root_vulner + &dev_attr_l1tf.attr, + &dev_attr_mds.attr, + &dev_attr_tsx_async_abort.attr, ++ &dev_attr_itlb_multihit.attr, + NULL + }; + +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -62,6 +62,8 @@ extern ssize_t cpu_show_mds(struct devic + extern ssize_t cpu_show_tsx_async_abort(struct device *dev, + struct device_attribute *attr, + char *buf); ++extern ssize_t cpu_show_itlb_multihit(struct device *dev, ++ struct device_attribute *attr, char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/debian/patches/bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch b/debian/patches/bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch new file mode 100644 index 000000000..2d9aafa2e --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch @@ -0,0 +1,97 @@ +From: Tyler Hicks +Date: Mon, 4 Nov 2019 12:22:02 +0100 +Subject: cpu/speculation: Uninline and export CPU mitigations helpers + +commit 731dc9df975a5da21237a18c3384f811a7a41cc6 upstream + +A kernel module may need to check the value of the "mitigations=" kernel +command line parameter as part of its setup when the module needs +to perform software mitigations for a CPU flaw. + +Uninline and export the helper functions surrounding the cpu_mitigations +enum to allow for their usage from a module. + +Lastly, privatize the enum and cpu_mitigations variable since the value of +cpu_mitigations can be checked with the exported helper functions. + +Signed-off-by: Tyler Hicks +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + include/linux/cpu.h | 25 ++----------------------- + kernel/cpu.c | 27 ++++++++++++++++++++++++++- + 2 files changed, 28 insertions(+), 24 deletions(-) + +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -198,28 +198,7 @@ static inline int cpuhp_smt_enable(void) + static inline int cpuhp_smt_disable(enum cpuhp_smt_control ctrlval) { return 0; } + #endif + +-/* +- * These are used for a global "mitigations=" cmdline option for toggling +- * optional CPU mitigations. +- */ +-enum cpu_mitigations { +- CPU_MITIGATIONS_OFF, +- CPU_MITIGATIONS_AUTO, +- CPU_MITIGATIONS_AUTO_NOSMT, +-}; +- +-extern enum cpu_mitigations cpu_mitigations; +- +-/* mitigations=off */ +-static inline bool cpu_mitigations_off(void) +-{ +- return cpu_mitigations == CPU_MITIGATIONS_OFF; +-} +- +-/* mitigations=auto,nosmt */ +-static inline bool cpu_mitigations_auto_nosmt(void) +-{ +- return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; +-} ++extern bool cpu_mitigations_off(void); ++extern bool cpu_mitigations_auto_nosmt(void); + + #endif /* _LINUX_CPU_H_ */ +--- a/kernel/cpu.c ++++ b/kernel/cpu.c +@@ -2282,7 +2282,18 @@ void __init boot_cpu_hotplug_init(void) + this_cpu_write(cpuhp_state.state, CPUHP_ONLINE); + } + +-enum cpu_mitigations cpu_mitigations __ro_after_init = CPU_MITIGATIONS_AUTO; ++/* ++ * These are used for a global "mitigations=" cmdline option for toggling ++ * optional CPU mitigations. ++ */ ++enum cpu_mitigations { ++ CPU_MITIGATIONS_OFF, ++ CPU_MITIGATIONS_AUTO, ++ CPU_MITIGATIONS_AUTO_NOSMT, ++}; ++ ++static enum cpu_mitigations cpu_mitigations __ro_after_init = ++ CPU_MITIGATIONS_AUTO; + + static int __init mitigations_parse_cmdline(char *arg) + { +@@ -2299,3 +2310,17 @@ static int __init mitigations_parse_cmdl + return 0; + } + early_param("mitigations", mitigations_parse_cmdline); ++ ++/* mitigations=off */ ++bool cpu_mitigations_off(void) ++{ ++ return cpu_mitigations == CPU_MITIGATIONS_OFF; ++} ++EXPORT_SYMBOL_GPL(cpu_mitigations_off); ++ ++/* mitigations=auto,nosmt */ ++bool cpu_mitigations_auto_nosmt(void) ++{ ++ return cpu_mitigations == CPU_MITIGATIONS_AUTO_NOSMT; ++} ++EXPORT_SYMBOL_GPL(cpu_mitigations_auto_nosmt); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch b/debian/patches/bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch new file mode 100644 index 000000000..79e454194 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch @@ -0,0 +1,193 @@ +From: "Gomez Iglesias, Antonio" +Date: Mon, 4 Nov 2019 12:22:03 +0100 +Subject: Documentation: Add ITLB_MULTIHIT documentation + +commit 7f00cc8d4a51074eb0ad4c3f16c15757b1ddfb7d upstream + +Add the initial ITLB_MULTIHIT documentation. + +[ tglx: Add it to the index so it gets actually built. ] + +Signed-off-by: Antonio Gomez Iglesias +Signed-off-by: Nelson D'Souza +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + Documentation/admin-guide/hw-vuln/index.rst | 1 + + .../admin-guide/hw-vuln/multihit.rst | 163 ++++++++++++++++++ + 2 files changed, 164 insertions(+) + create mode 100644 Documentation/admin-guide/hw-vuln/multihit.rst + +--- a/Documentation/admin-guide/hw-vuln/index.rst ++++ b/Documentation/admin-guide/hw-vuln/index.rst +@@ -13,3 +13,4 @@ are configurable at compile, boot or run + l1tf + mds + tsx_async_abort ++ multihit.rst +--- /dev/null ++++ b/Documentation/admin-guide/hw-vuln/multihit.rst +@@ -0,0 +1,163 @@ ++iTLB multihit ++============= ++ ++iTLB multihit is an erratum where some processors may incur a machine check ++error, possibly resulting in an unrecoverable CPU lockup, when an ++instruction fetch hits multiple entries in the instruction TLB. This can ++occur when the page size is changed along with either the physical address ++or cache type. A malicious guest running on a virtualized system can ++exploit this erratum to perform a denial of service attack. ++ ++ ++Affected processors ++------------------- ++ ++Variations of this erratum are present on most Intel Core and Xeon processor ++models. The erratum is not present on: ++ ++ - non-Intel processors ++ ++ - Some Atoms (Airmont, Bonnell, Goldmont, GoldmontPlus, Saltwell, Silvermont) ++ ++ - Intel processors that have the PSCHANGE_MC_NO bit set in the ++ IA32_ARCH_CAPABILITIES MSR. ++ ++ ++Related CVEs ++------------ ++ ++The following CVE entry is related to this issue: ++ ++ ============== ================================================= ++ CVE-2018-12207 Machine Check Error Avoidance on Page Size Change ++ ============== ================================================= ++ ++ ++Problem ++------- ++ ++Privileged software, including OS and virtual machine managers (VMM), are in ++charge of memory management. A key component in memory management is the control ++of the page tables. Modern processors use virtual memory, a technique that creates ++the illusion of a very large memory for processors. This virtual space is split ++into pages of a given size. Page tables translate virtual addresses to physical ++addresses. ++ ++To reduce latency when performing a virtual to physical address translation, ++processors include a structure, called TLB, that caches recent translations. ++There are separate TLBs for instruction (iTLB) and data (dTLB). ++ ++Under this errata, instructions are fetched from a linear address translated ++using a 4 KB translation cached in the iTLB. Privileged software modifies the ++paging structure so that the same linear address using large page size (2 MB, 4 ++MB, 1 GB) with a different physical address or memory type. After the page ++structure modification but before the software invalidates any iTLB entries for ++the linear address, a code fetch that happens on the same linear address may ++cause a machine-check error which can result in a system hang or shutdown. ++ ++ ++Attack scenarios ++---------------- ++ ++Attacks against the iTLB multihit erratum can be mounted from malicious ++guests in a virtualized system. ++ ++ ++iTLB multihit system information ++-------------------------------- ++ ++The Linux kernel provides a sysfs interface to enumerate the current iTLB ++multihit status of the system:whether the system is vulnerable and which ++mitigations are active. The relevant sysfs file is: ++ ++/sys/devices/system/cpu/vulnerabilities/itlb_multihit ++ ++The possible values in this file are: ++ ++.. list-table:: ++ ++ * - Not affected ++ - The processor is not vulnerable. ++ * - KVM: Mitigation: Split huge pages ++ - Software changes mitigate this issue. ++ * - KVM: Vulnerable ++ - The processor is vulnerable, but no mitigation enabled ++ ++ ++Enumeration of the erratum ++-------------------------------- ++ ++A new bit has been allocated in the IA32_ARCH_CAPABILITIES (PSCHANGE_MC_NO) msr ++and will be set on CPU's which are mitigated against this issue. ++ ++ ======================================= =========== =============================== ++ IA32_ARCH_CAPABILITIES MSR Not present Possibly vulnerable,check model ++ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '0' Likely vulnerable,check model ++ IA32_ARCH_CAPABILITIES[PSCHANGE_MC_NO] '1' Not vulnerable ++ ======================================= =========== =============================== ++ ++ ++Mitigation mechanism ++------------------------- ++ ++This erratum can be mitigated by restricting the use of large page sizes to ++non-executable pages. This forces all iTLB entries to be 4K, and removes ++the possibility of multiple hits. ++ ++In order to mitigate the vulnerability, KVM initially marks all huge pages ++as non-executable. If the guest attempts to execute in one of those pages, ++the page is broken down into 4K pages, which are then marked executable. ++ ++If EPT is disabled or not available on the host, KVM is in control of TLB ++flushes and the problematic situation cannot happen. However, the shadow ++EPT paging mechanism used by nested virtualization is vulnerable, because ++the nested guest can trigger multiple iTLB hits by modifying its own ++(non-nested) page tables. For simplicity, KVM will make large pages ++non-executable in all shadow paging modes. ++ ++Mitigation control on the kernel command line and KVM - module parameter ++------------------------------------------------------------------------ ++ ++The KVM hypervisor mitigation mechanism for marking huge pages as ++non-executable can be controlled with a module parameter "nx_huge_pages=". ++The kernel command line allows to control the iTLB multihit mitigations at ++boot time with the option "kvm.nx_huge_pages=". ++ ++The valid arguments for these options are: ++ ++ ========== ================================================================ ++ force Mitigation is enabled. In this case, the mitigation implements ++ non-executable huge pages in Linux kernel KVM module. All huge ++ pages in the EPT are marked as non-executable. ++ If a guest attempts to execute in one of those pages, the page is ++ broken down into 4K pages, which are then marked executable. ++ ++ off Mitigation is disabled. ++ ++ auto Enable mitigation only if the platform is affected and the kernel ++ was not booted with the "mitigations=off" command line parameter. ++ This is the default option. ++ ========== ================================================================ ++ ++ ++Mitigation selection guide ++-------------------------- ++ ++1. No virtualization in use ++^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++ The system is protected by the kernel unconditionally and no further ++ action is required. ++ ++2. Virtualization with trusted guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++ If the guest comes from a trusted source, you may assume that the guest will ++ not attempt to maliciously exploit these errata and no further action is ++ required. ++ ++3. Virtualization with untrusted guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ If the guest comes from an untrusted source, the guest host kernel will need ++ to apply iTLB multihit mitigation via the kernel command line or kvm ++ module parameter. diff --git a/debian/patches/bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch b/debian/patches/bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch new file mode 100644 index 000000000..cf84618ee --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch @@ -0,0 +1,95 @@ +From: Paolo Bonzini +Date: Fri, 11 Oct 2019 11:59:48 +0200 +Subject: kvm: x86, powerpc: do not allow clearing largepages debugfs entry + +commit 833b45de69a6016c4b0cebe6765d526a31a81580 upstream + +The largepages debugfs entry is incremented/decremented as shadow +pages are created or destroyed. Clearing it will result in an +underflow, which is harmless to KVM but ugly (and could be +misinterpreted by tools that use debugfs information), so make +this particular statistic read-only. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +Cc: kvm-ppc@vger.kernel.org +--- + arch/x86/kvm/x86.c | 6 +++--- + include/linux/kvm_host.h | 2 ++ + virt/kvm/kvm_main.c | 10 +++++++--- + 3 files changed, 12 insertions(+), 6 deletions(-) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -92,8 +92,8 @@ u64 __read_mostly efer_reserved_bits = ~ + static u64 __read_mostly efer_reserved_bits = ~((u64)EFER_SCE); + #endif + +-#define VM_STAT(x) offsetof(struct kvm, stat.x), KVM_STAT_VM +-#define VCPU_STAT(x) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU ++#define VM_STAT(x, ...) offsetof(struct kvm, stat.x), KVM_STAT_VM, ## __VA_ARGS__ ++#define VCPU_STAT(x, ...) offsetof(struct kvm_vcpu, stat.x), KVM_STAT_VCPU, ## __VA_ARGS__ + + #define KVM_X2APIC_API_VALID_FLAGS (KVM_X2APIC_API_USE_32BIT_IDS | \ + KVM_X2APIC_API_DISABLE_BROADCAST_QUIRK) +@@ -205,7 +205,7 @@ struct kvm_stats_debugfs_item debugfs_en + { "mmu_cache_miss", VM_STAT(mmu_cache_miss) }, + { "mmu_unsync", VM_STAT(mmu_unsync) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, +- { "largepages", VM_STAT(lpages) }, ++ { "largepages", VM_STAT(lpages, .mode = 0444) }, + { "max_mmu_page_hash_collisions", + VM_STAT(max_mmu_page_hash_collisions) }, + { NULL } +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1034,6 +1034,7 @@ enum kvm_stat_kind { + + struct kvm_stat_data { + int offset; ++ int mode; + struct kvm *kvm; + }; + +@@ -1041,6 +1042,7 @@ struct kvm_stats_debugfs_item { + const char *name; + int offset; + enum kvm_stat_kind kind; ++ int mode; + }; + extern struct kvm_stats_debugfs_item debugfs_entries[]; + extern struct dentry *kvm_debugfs_dir; +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -616,8 +616,9 @@ static int kvm_create_vm_debugfs(struct + + stat_data->kvm = kvm; + stat_data->offset = p->offset; ++ stat_data->mode = p->mode ? p->mode : 0644; + kvm->debugfs_stat_data[p - debugfs_entries] = stat_data; +- debugfs_create_file(p->name, 0644, kvm->debugfs_dentry, ++ debugfs_create_file(p->name, stat_data->mode, kvm->debugfs_dentry, + stat_data, stat_fops_per_vm[p->kind]); + } + return 0; +@@ -3714,7 +3715,9 @@ static int kvm_debugfs_open(struct inode + if (!refcount_inc_not_zero(&stat_data->kvm->users_count)) + return -ENOENT; + +- if (simple_attr_open(inode, file, get, set, fmt)) { ++ if (simple_attr_open(inode, file, get, ++ stat_data->mode & S_IWUGO ? set : NULL, ++ fmt)) { + kvm_put_kvm(stat_data->kvm); + return -ENOMEM; + } +@@ -3962,7 +3965,8 @@ static void kvm_init_debug(void) + + kvm_debugfs_num_entries = 0; + for (p = debugfs_entries; p->name; ++p, kvm_debugfs_num_entries++) { +- debugfs_create_file(p->name, 0644, kvm_debugfs_dir, ++ int mode = p->mode ? p->mode : 0644; ++ debugfs_create_file(p->name, mode, kvm_debugfs_dir, + (void *)(long)p->offset, + stat_fops[p->kind]); + } diff --git a/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch b/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch new file mode 100644 index 000000000..e5b02c07d --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch @@ -0,0 +1,262 @@ +From: Junaid Shahid +Date: Thu, 3 Jan 2019 17:14:28 -0800 +Subject: kvm: Convert kvm_lock to a mutex + +commit 0d9ce162cf46c99628cc5da9510b959c7976735b upstream + +It doesn't seem as if there is any particular need for kvm_lock to be a +spinlock, so convert the lock to a mutex so that sleepable functions (in +particular cond_resched()) can be called while holding it. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + Documentation/virtual/kvm/locking.txt | 4 +--- + arch/s390/kvm/kvm-s390.c | 4 ++-- + arch/x86/kvm/mmu.c | 4 ++-- + arch/x86/kvm/x86.c | 14 ++++++------- + include/linux/kvm_host.h | 2 +- + virt/kvm/kvm_main.c | 30 +++++++++++++-------------- + 6 files changed, 28 insertions(+), 30 deletions(-) + +--- a/Documentation/virtual/kvm/locking.txt ++++ b/Documentation/virtual/kvm/locking.txt +@@ -15,8 +15,6 @@ The acquisition orders for mutexes are a + + On x86, vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock. + +-For spinlocks, kvm_lock is taken outside kvm->mmu_lock. +- + Everything else is a leaf: no other lock is taken inside the critical + sections. + +@@ -169,7 +167,7 @@ which time it will be set using the Dirt + ------------ + + Name: kvm_lock +-Type: spinlock_t ++Type: mutex + Arch: any + Protects: - vm_list + +--- a/arch/s390/kvm/kvm-s390.c ++++ b/arch/s390/kvm/kvm-s390.c +@@ -2108,13 +2108,13 @@ int kvm_arch_init_vm(struct kvm *kvm, un + kvm->arch.sca = (struct bsca_block *) get_zeroed_page(alloc_flags); + if (!kvm->arch.sca) + goto out_err; +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + sca_offset += 16; + if (sca_offset + sizeof(struct bsca_block) > PAGE_SIZE) + sca_offset = 0; + kvm->arch.sca = (struct bsca_block *) + ((char *) kvm->arch.sca + sca_offset); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + sprintf(debug_name, "kvm-%u", current->pid); + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -5819,7 +5819,7 @@ mmu_shrink_scan(struct shrinker *shrink, + int nr_to_scan = sc->nr_to_scan; + unsigned long freed = 0; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + + list_for_each_entry(kvm, &vm_list, vm_list) { + int idx; +@@ -5869,7 +5869,7 @@ unlock: + break; + } + +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + return freed; + } + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -6498,7 +6498,7 @@ static void kvm_hyperv_tsc_notifier(void + struct kvm_vcpu *vcpu; + int cpu; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_make_mclock_inprogress_request(kvm); + +@@ -6524,7 +6524,7 @@ static void kvm_hyperv_tsc_notifier(void + + spin_unlock(&ka->pvclock_gtod_sync_lock); + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + } + #endif + +@@ -6582,17 +6582,17 @@ static int kvmclock_cpufreq_notifier(str + + smp_call_function_single(freq->cpu, tsc_khz_changed, freq, 1); + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu->cpu != freq->cpu) + continue; + kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu); +- if (vcpu->cpu != smp_processor_id()) ++ if (vcpu->cpu != raw_smp_processor_id()) + send_ipi = 1; + } + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + if (freq->old < freq->new && send_ipi) { + /* +@@ -6718,12 +6718,12 @@ static void pvclock_gtod_update_fn(struc + struct kvm_vcpu *vcpu; + int i; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) + kvm_for_each_vcpu(i, vcpu, kvm) + kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu); + atomic_set(&kvm_guest_has_master_clock, 0); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + } + + static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn); +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -141,7 +141,7 @@ static inline bool is_error_page(struct + + extern struct kmem_cache *kvm_vcpu_cache; + +-extern spinlock_t kvm_lock; ++extern struct mutex kvm_lock; + extern struct list_head vm_list; + + struct kvm_io_range { +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -92,7 +92,7 @@ EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); + * kvm->lock --> kvm->slots_lock --> kvm->irq_lock + */ + +-DEFINE_SPINLOCK(kvm_lock); ++DEFINE_MUTEX(kvm_lock); + static DEFINE_RAW_SPINLOCK(kvm_count_lock); + LIST_HEAD(vm_list); + +@@ -685,9 +685,9 @@ static struct kvm *kvm_create_vm(unsigne + if (r) + goto out_err; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_add(&kvm->vm_list, &vm_list); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + preempt_notifier_inc(); + +@@ -733,9 +733,9 @@ static void kvm_destroy_vm(struct kvm *k + kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); + kvm_destroy_vm_debugfs(kvm); + kvm_arch_sync_events(kvm); +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_del(&kvm->vm_list); +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + kvm_free_irq_routing(kvm); + for (i = 0; i < KVM_NR_BUSES; i++) { + struct kvm_io_bus *bus = kvm_get_bus(kvm, i); +@@ -3831,13 +3831,13 @@ static int vm_stat_get(void *_offset, u6 + u64 tmp_val; + + *val = 0; +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vm_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + *val += tmp_val; + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + return 0; + } + +@@ -3850,12 +3850,12 @@ static int vm_stat_clear(void *_offset, + if (val) + return -EINVAL; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vm_stat_clear_per_vm((void *)&stat_tmp, 0); + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + return 0; + } +@@ -3870,13 +3870,13 @@ static int vcpu_stat_get(void *_offset, + u64 tmp_val; + + *val = 0; +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vcpu_stat_get_per_vm((void *)&stat_tmp, &tmp_val); + *val += tmp_val; + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + return 0; + } + +@@ -3889,12 +3889,12 @@ static int vcpu_stat_clear(void *_offset + if (val) + return -EINVAL; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + list_for_each_entry(kvm, &vm_list, vm_list) { + stat_tmp.kvm = kvm; + vcpu_stat_clear_per_vm((void *)&stat_tmp, 0); + } +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + return 0; + } +@@ -3915,7 +3915,7 @@ static void kvm_uevent_notify_change(uns + if (!kvm_dev.this_device || !kvm) + return; + +- spin_lock(&kvm_lock); ++ mutex_lock(&kvm_lock); + if (type == KVM_EVENT_CREATE_VM) { + kvm_createvm_count++; + kvm_active_vms++; +@@ -3924,7 +3924,7 @@ static void kvm_uevent_notify_change(uns + } + created = kvm_createvm_count; + active = kvm_active_vms; +- spin_unlock(&kvm_lock); ++ mutex_unlock(&kvm_lock); + + env = kzalloc(sizeof(*env), GFP_KERNEL); + if (!env) diff --git a/debian/patches/bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch b/debian/patches/bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch new file mode 100644 index 000000000..09f40e362 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch @@ -0,0 +1,133 @@ +From: Junaid Shahid +Date: Thu, 3 Jan 2019 16:22:21 -0800 +Subject: kvm: mmu: Do not release the page inside mmu_set_spte() + +commit 43fdcda96e2550c6d1c46fb8a78801aa2f7276ed upstream + +Release the page at the call-site where it was originally acquired. +This makes the exit code cleaner for most call sites, since they +do not need to duplicate code between success and the failure +label. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kvm/mmu.c | 18 +++++++----------- + arch/x86/kvm/paging_tmpl.h | 8 +++----- + 2 files changed, 10 insertions(+), 16 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3001,8 +3001,6 @@ static int mmu_set_spte(struct kvm_vcpu + } + } + +- kvm_release_pfn_clean(pfn); +- + return ret; + } + +@@ -3037,9 +3035,11 @@ static int direct_pte_prefetch_many(stru + if (ret <= 0) + return -1; + +- for (i = 0; i < ret; i++, gfn++, start++) ++ for (i = 0; i < ret; i++, gfn++, start++) { + mmu_set_spte(vcpu, start, access, 0, sp->role.level, gfn, + page_to_pfn(pages[i]), true, true); ++ put_page(pages[i]); ++ } + + return 0; + } +@@ -3445,6 +3445,7 @@ static int nonpaging_map(struct kvm_vcpu + if (handle_abnormal_pfn(vcpu, v, gfn, pfn, ACC_ALL, &r)) + return r; + ++ r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; +@@ -3453,14 +3454,11 @@ static int nonpaging_map(struct kvm_vcpu + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- spin_unlock(&vcpu->kvm->mmu_lock); +- +- return r; + + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +- return RET_PF_RETRY; ++ return r; + } + + static void mmu_free_root_page(struct kvm *kvm, hpa_t *root_hpa, +@@ -4082,6 +4080,7 @@ static int tdp_page_fault(struct kvm_vcp + if (handle_abnormal_pfn(vcpu, 0, gfn, pfn, ACC_ALL, &r)) + return r; + ++ r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; +@@ -4090,14 +4089,11 @@ static int tdp_page_fault(struct kvm_vcp + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); + r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- spin_unlock(&vcpu->kvm->mmu_lock); +- +- return r; + + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +- return RET_PF_RETRY; ++ return r; + } + + static void nonpaging_init_context(struct kvm_vcpu *vcpu, +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -522,6 +522,7 @@ FNAME(prefetch_gpte)(struct kvm_vcpu *vc + mmu_set_spte(vcpu, spte, pte_access, 0, PT_PAGE_TABLE_LEVEL, gfn, pfn, + true, true); + ++ kvm_release_pfn_clean(pfn); + return true; + } + +@@ -673,7 +674,6 @@ static int FNAME(fetch)(struct kvm_vcpu + return ret; + + out_gpte_changed: +- kvm_release_pfn_clean(pfn); + return RET_PF_RETRY; + } + +@@ -821,6 +821,7 @@ static int FNAME(page_fault)(struct kvm_ + walker.pte_access &= ~ACC_EXEC_MASK; + } + ++ r = RET_PF_RETRY; + spin_lock(&vcpu->kvm->mmu_lock); + if (mmu_notifier_retry(vcpu->kvm, mmu_seq)) + goto out_unlock; +@@ -834,14 +835,11 @@ static int FNAME(page_fault)(struct kvm_ + level, pfn, map_writable, prefault); + ++vcpu->stat.pf_fixed; + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); +- spin_unlock(&vcpu->kvm->mmu_lock); +- +- return r; + + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +- return RET_PF_RETRY; ++ return r; + } + + static gpa_t FNAME(get_level1_sp_gpa)(struct kvm_mmu_page *sp) diff --git a/debian/patches/bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch b/debian/patches/bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch new file mode 100644 index 000000000..4803be6a9 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch @@ -0,0 +1,168 @@ +From: Paolo Bonzini +Date: Mon, 24 Jun 2019 13:06:21 +0200 +Subject: KVM: x86: make FNAME(fetch) and __direct_map more similar + +commit 3fcf2d1bdeb6a513523cb2c77012a6b047aa859c upstream + +These two functions are basically doing the same thing through +kvm_mmu_get_page, link_shadow_page and mmu_set_spte; yet, for historical +reasons, their code looks very different. This patch tries to take the +best of each and make them very similar, so that it is easy to understand +changes that apply to both of them. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kvm/mmu.c | 53 ++++++++++++++++++-------------------- + arch/x86/kvm/paging_tmpl.h | 30 ++++++++++----------- + 2 files changed, 39 insertions(+), 44 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3087,40 +3087,39 @@ static void direct_pte_prefetch(struct k + __direct_pte_prefetch(vcpu, sp, sptep); + } + +-static int __direct_map(struct kvm_vcpu *vcpu, int write, int map_writable, +- int level, gfn_t gfn, kvm_pfn_t pfn, bool prefault) ++static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, ++ int map_writable, int level, kvm_pfn_t pfn, ++ bool prefault) + { +- struct kvm_shadow_walk_iterator iterator; ++ struct kvm_shadow_walk_iterator it; + struct kvm_mmu_page *sp; +- int emulate = 0; +- gfn_t pseudo_gfn; ++ int ret; ++ gfn_t gfn = gpa >> PAGE_SHIFT; ++ gfn_t base_gfn = gfn; + + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) +- return 0; ++ return RET_PF_RETRY; + +- for_each_shadow_entry(vcpu, (u64)gfn << PAGE_SHIFT, iterator) { +- if (iterator.level == level) { +- emulate = mmu_set_spte(vcpu, iterator.sptep, ACC_ALL, +- write, level, gfn, pfn, prefault, +- map_writable); +- direct_pte_prefetch(vcpu, iterator.sptep); +- ++vcpu->stat.pf_fixed; ++ for_each_shadow_entry(vcpu, gpa, it) { ++ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); ++ if (it.level == level) + break; +- } + +- drop_large_spte(vcpu, iterator.sptep); +- if (!is_shadow_present_pte(*iterator.sptep)) { +- u64 base_addr = iterator.addr; ++ drop_large_spte(vcpu, it.sptep); ++ if (!is_shadow_present_pte(*it.sptep)) { ++ sp = kvm_mmu_get_page(vcpu, base_gfn, it.addr, ++ it.level - 1, true, ACC_ALL); + +- base_addr &= PT64_LVL_ADDR_MASK(iterator.level); +- pseudo_gfn = base_addr >> PAGE_SHIFT; +- sp = kvm_mmu_get_page(vcpu, pseudo_gfn, iterator.addr, +- iterator.level - 1, 1, ACC_ALL); +- +- link_shadow_page(vcpu, iterator.sptep, sp); ++ link_shadow_page(vcpu, it.sptep, sp); + } + } +- return emulate; ++ ++ ret = mmu_set_spte(vcpu, it.sptep, ACC_ALL, ++ write, level, base_gfn, pfn, prefault, ++ map_writable); ++ direct_pte_prefetch(vcpu, it.sptep); ++ ++vcpu->stat.pf_fixed; ++ return ret; + } + + static void kvm_send_hwpoison_signal(unsigned long address, struct task_struct *tsk) +@@ -3453,8 +3452,7 @@ static int nonpaging_map(struct kvm_vcpu + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); +- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- ++ r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +@@ -4088,8 +4086,7 @@ static int tdp_page_fault(struct kvm_vcp + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); +- r = __direct_map(vcpu, write, map_writable, level, gfn, pfn, prefault); +- ++ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -602,6 +602,7 @@ static int FNAME(fetch)(struct kvm_vcpu + struct kvm_shadow_walk_iterator it; + unsigned direct_access, access = gw->pt_access; + int top_level, ret; ++ gfn_t base_gfn; + + direct_access = gw->pte_access; + +@@ -646,31 +647,29 @@ static int FNAME(fetch)(struct kvm_vcpu + link_shadow_page(vcpu, it.sptep, sp); + } + +- for (; +- shadow_walk_okay(&it) && it.level > hlevel; +- shadow_walk_next(&it)) { +- gfn_t direct_gfn; ++ base_gfn = gw->gfn; + ++ for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { + clear_sp_write_flooding_count(it.sptep); ++ base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); ++ if (it.level == hlevel) ++ break; ++ + validate_direct_spte(vcpu, it.sptep, direct_access); + + drop_large_spte(vcpu, it.sptep); + +- if (is_shadow_present_pte(*it.sptep)) +- continue; +- +- direct_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); +- +- sp = kvm_mmu_get_page(vcpu, direct_gfn, addr, it.level-1, +- true, direct_access); +- link_shadow_page(vcpu, it.sptep, sp); ++ if (!is_shadow_present_pte(*it.sptep)) { ++ sp = kvm_mmu_get_page(vcpu, base_gfn, addr, ++ it.level - 1, true, direct_access); ++ link_shadow_page(vcpu, it.sptep, sp); ++ } + } + +- clear_sp_write_flooding_count(it.sptep); + ret = mmu_set_spte(vcpu, it.sptep, gw->pte_access, write_fault, +- it.level, gw->gfn, pfn, prefault, map_writable); ++ it.level, base_gfn, pfn, prefault, map_writable); + FNAME(pte_prefetch)(vcpu, gw, it.sptep); +- ++ ++vcpu->stat.pf_fixed; + return ret; + + out_gpte_changed: +@@ -833,7 +832,6 @@ static int FNAME(page_fault)(struct kvm_ + transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, + level, pfn, map_writable, prefault); +- ++vcpu->stat.pf_fixed; + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); + + out_unlock: diff --git a/debian/patches/bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch b/debian/patches/bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch new file mode 100644 index 000000000..da7810bd7 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch @@ -0,0 +1,70 @@ +From: Paolo Bonzini +Date: Sun, 23 Jun 2019 19:15:49 +0200 +Subject: KVM: x86: remove now unneeded hugepage gfn adjustment + +commit d679b32611c0102ce33b9e1a4e4b94854ed1812a upstream + +After the previous patch, the low bits of the gfn are masked in +both FNAME(fetch) and __direct_map, so we do not need to clear them +in transparent_hugepage_adjust. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kvm/mmu.c | 9 +++------ + arch/x86/kvm/paging_tmpl.h | 2 +- + 2 files changed, 4 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -3155,11 +3155,10 @@ static int kvm_handle_bad_page(struct kv + } + + static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu, +- gfn_t *gfnp, kvm_pfn_t *pfnp, ++ gfn_t gfn, kvm_pfn_t *pfnp, + int *levelp) + { + kvm_pfn_t pfn = *pfnp; +- gfn_t gfn = *gfnp; + int level = *levelp; + + /* +@@ -3186,8 +3185,6 @@ static void transparent_hugepage_adjust( + mask = KVM_PAGES_PER_HPAGE(level) - 1; + VM_BUG_ON((gfn & mask) != (pfn & mask)); + if (pfn & mask) { +- gfn &= ~mask; +- *gfnp = gfn; + kvm_release_pfn_clean(pfn); + pfn &= ~mask; + kvm_get_pfn(pfn); +@@ -3451,7 +3448,7 @@ static int nonpaging_map(struct kvm_vcpu + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + if (likely(!force_pt_level)) +- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); ++ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); +@@ -4085,7 +4082,7 @@ static int tdp_page_fault(struct kvm_vcp + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + if (likely(!force_pt_level)) +- transparent_hugepage_adjust(vcpu, &gfn, &pfn, &level); ++ transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); + r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -829,7 +829,7 @@ static int FNAME(page_fault)(struct kvm_ + if (make_mmu_pages_available(vcpu) < 0) + goto out_unlock; + if (!force_pt_level) +- transparent_hugepage_adjust(vcpu, &walker.gfn, &pfn, &level); ++ transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, + level, pfn, map_writable, prefault); + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch b/debian/patches/bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch new file mode 100644 index 000000000..c25a411fa --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch @@ -0,0 +1,39 @@ +From: Paolo Bonzini +Date: Sun, 30 Jun 2019 08:36:21 -0400 +Subject: KVM: x86: change kvm_mmu_page_get_gfn BUG_ON to WARN_ON + +commit e9f2a760b158551bfbef6db31d2cae45ab8072e5 upstream + +Note that in such a case it is quite likely that KVM will BUG_ON +in __pte_list_remove when the VM is closed. However, there is no +immediate risk of memory corruption in the host so a WARN_ON is +enough and it lets you gather traces for debugging. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kvm/mmu.c | 12 +++++++++--- + 1 file changed, 9 insertions(+), 3 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -1027,10 +1027,16 @@ static gfn_t kvm_mmu_page_get_gfn(struct + + static void kvm_mmu_page_set_gfn(struct kvm_mmu_page *sp, int index, gfn_t gfn) + { +- if (sp->role.direct) +- BUG_ON(gfn != kvm_mmu_page_get_gfn(sp, index)); +- else ++ if (!sp->role.direct) { + sp->gfns[index] = gfn; ++ return; ++ } ++ ++ if (WARN_ON(gfn != kvm_mmu_page_get_gfn(sp, index))) ++ pr_err_ratelimited("gfn mismatch under direct page %llx " ++ "(expected %llx, got %llx)\n", ++ sp->gfn, ++ kvm_mmu_page_get_gfn(sp, index), gfn); + } + + /* diff --git a/debian/patches/bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch b/debian/patches/bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch new file mode 100644 index 000000000..7d0d7e42a --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch @@ -0,0 +1,142 @@ +From: Paolo Bonzini +Date: Thu, 4 Jul 2019 05:14:13 -0400 +Subject: KVM: x86: add tracepoints around __direct_map and FNAME(fetch) + +commit 335e192a3fa415e1202c8b9ecdaaecd643f823cc upstream + +These are useful in debugging shadow paging. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kvm/mmu.c | 14 ++++----- + arch/x86/kvm/mmutrace.h | 59 ++++++++++++++++++++++++++++++++++++++ + arch/x86/kvm/paging_tmpl.h | 2 ++ + 3 files changed, 68 insertions(+), 7 deletions(-) + +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -140,9 +140,6 @@ module_param(dbg, bool, 0644); + + #include + +-#define CREATE_TRACE_POINTS +-#include "mmutrace.h" +- + #define SPTE_HOST_WRITEABLE (1ULL << PT_FIRST_AVAIL_BITS_SHIFT) + #define SPTE_MMU_WRITEABLE (1ULL << (PT_FIRST_AVAIL_BITS_SHIFT + 1)) + +@@ -261,9 +258,14 @@ static u64 __read_mostly shadow_nonprese + + + static void mmu_spte_set(u64 *sptep, u64 spte); ++static bool is_executable_pte(u64 spte); + static union kvm_mmu_page_role + kvm_mmu_calc_root_page_role(struct kvm_vcpu *vcpu); + ++#define CREATE_TRACE_POINTS ++#include "mmutrace.h" ++ ++ + void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask, u64 mmio_value) + { + BUG_ON((mmio_mask & mmio_value) != mmio_value); +@@ -2992,10 +2994,7 @@ static int mmu_set_spte(struct kvm_vcpu + ret = RET_PF_EMULATE; + + pgprintk("%s: setting spte %llx\n", __func__, *sptep); +- pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", +- is_large_pte(*sptep)? "2MB" : "4kB", +- *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, +- *sptep, sptep); ++ trace_kvm_mmu_set_spte(level, gfn, sptep); + if (!was_rmapped && is_large_pte(*sptep)) + ++vcpu->kvm->stat.lpages; + +@@ -3106,6 +3105,7 @@ static int __direct_map(struct kvm_vcpu + if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) + return RET_PF_RETRY; + ++ trace_kvm_mmu_spte_requested(gpa, level, pfn); + for_each_shadow_entry(vcpu, gpa, it) { + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == level) +--- a/arch/x86/kvm/mmutrace.h ++++ b/arch/x86/kvm/mmutrace.h +@@ -325,6 +325,65 @@ TRACE_EVENT( + __entry->kvm_gen == __entry->spte_gen + ) + ); ++ ++TRACE_EVENT( ++ kvm_mmu_set_spte, ++ TP_PROTO(int level, gfn_t gfn, u64 *sptep), ++ TP_ARGS(level, gfn, sptep), ++ ++ TP_STRUCT__entry( ++ __field(u64, gfn) ++ __field(u64, spte) ++ __field(u64, sptep) ++ __field(u8, level) ++ /* These depend on page entry type, so compute them now. */ ++ __field(bool, r) ++ __field(bool, x) ++ __field(u8, u) ++ ), ++ ++ TP_fast_assign( ++ __entry->gfn = gfn; ++ __entry->spte = *sptep; ++ __entry->sptep = virt_to_phys(sptep); ++ __entry->level = level; ++ __entry->r = shadow_present_mask || (__entry->spte & PT_PRESENT_MASK); ++ __entry->x = is_executable_pte(__entry->spte); ++ __entry->u = shadow_user_mask ? !!(__entry->spte & shadow_user_mask) : -1; ++ ), ++ ++ TP_printk("gfn %llx spte %llx (%s%s%s%s) level %d at %llx", ++ __entry->gfn, __entry->spte, ++ __entry->r ? "r" : "-", ++ __entry->spte & PT_WRITABLE_MASK ? "w" : "-", ++ __entry->x ? "x" : "-", ++ __entry->u == -1 ? "" : (__entry->u ? "u" : "-"), ++ __entry->level, __entry->sptep ++ ) ++); ++ ++TRACE_EVENT( ++ kvm_mmu_spte_requested, ++ TP_PROTO(gpa_t addr, int level, kvm_pfn_t pfn), ++ TP_ARGS(addr, level, pfn), ++ ++ TP_STRUCT__entry( ++ __field(u64, gfn) ++ __field(u64, pfn) ++ __field(u8, level) ++ ), ++ ++ TP_fast_assign( ++ __entry->gfn = addr >> PAGE_SHIFT; ++ __entry->pfn = pfn | (__entry->gfn & (KVM_PAGES_PER_HPAGE(level) - 1)); ++ __entry->level = level; ++ ), ++ ++ TP_printk("gfn %llx pfn %llx level %d", ++ __entry->gfn, __entry->pfn, __entry->level ++ ) ++); ++ + #endif /* _TRACE_KVMMMU_H */ + + #undef TRACE_INCLUDE_PATH +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -649,6 +649,8 @@ static int FNAME(fetch)(struct kvm_vcpu + + base_gfn = gw->gfn; + ++ trace_kvm_mmu_spte_requested(addr, gw->level, pfn); ++ + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { + clear_sp_write_flooding_count(it.sptep); + base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); diff --git a/debian/patches/bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch b/debian/patches/bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch new file mode 100644 index 000000000..ee63fbe1e --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch @@ -0,0 +1,67 @@ +From: Paolo Bonzini +Date: Sun, 27 Oct 2019 09:36:37 +0100 +Subject: KVM: vmx, svm: always run with EFER.NXE=1 when shadow paging is + active + +commit 9167ab79936206118cc60e47dcb926c3489f3bd5 upstream + +VMX already does so if the host has SMEP, in order to support the combination of +CR0.WP=1 and CR4.SMEP=1. However, it is perfectly safe to always do so, and in +fact VMX also ends up running with EFER.NXE=1 on old processors that lack the +"load EFER" controls, because it may help avoiding a slow MSR write. + +SVM does not have similar code, but it should since recent AMD processors do +support SMEP. So this patch makes the code for the two vendors simpler and +more similar, while fixing an issue with CR0.WP=1 and CR4.SMEP=1 on AMD. + +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +Cc: Joerg Roedel +Cc: stable@vger.kernel.org +--- + arch/x86/kvm/svm.c | 10 ++++++++-- + arch/x86/kvm/vmx.c | 14 +++----------- + 2 files changed, 11 insertions(+), 13 deletions(-) + +--- a/arch/x86/kvm/svm.c ++++ b/arch/x86/kvm/svm.c +@@ -736,8 +736,14 @@ static int get_npt_level(struct kvm_vcpu + static void svm_set_efer(struct kvm_vcpu *vcpu, u64 efer) + { + vcpu->arch.efer = efer; +- if (!npt_enabled && !(efer & EFER_LMA)) +- efer &= ~EFER_LME; ++ ++ if (!npt_enabled) { ++ /* Shadow paging assumes NX to be available. */ ++ efer |= EFER_NX; ++ ++ if (!(efer & EFER_LMA)) ++ efer &= ~EFER_LME; ++ } + + to_svm(vcpu)->vmcb->save.efer = efer | EFER_SVME; + mark_dirty(to_svm(vcpu)->vmcb, VMCB_CR); +--- a/arch/x86/kvm/vmx.c ++++ b/arch/x86/kvm/vmx.c +@@ -2785,17 +2785,9 @@ static bool update_transition_efer(struc + u64 guest_efer = vmx->vcpu.arch.efer; + u64 ignore_bits = 0; + +- if (!enable_ept) { +- /* +- * NX is needed to handle CR0.WP=1, CR4.SMEP=1. Testing +- * host CPUID is more efficient than testing guest CPUID +- * or CR4. Host SMEP is anyway a requirement for guest SMEP. +- */ +- if (boot_cpu_has(X86_FEATURE_SMEP)) +- guest_efer |= EFER_NX; +- else if (!(guest_efer & EFER_NX)) +- ignore_bits |= EFER_NX; +- } ++ /* Shadow paging assumes NX to be available. */ ++ if (!enable_ept) ++ guest_efer |= EFER_NX; + + /* + * LMA and LME handled by hardware; SCE meaningless outside long mode. diff --git a/debian/patches/bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch b/debian/patches/bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch new file mode 100644 index 000000000..d97596d8b --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch @@ -0,0 +1,492 @@ +From: Paolo Bonzini +Date: Mon, 4 Nov 2019 12:22:02 +0100 +Subject: kvm: mmu: ITLB_MULTIHIT mitigation + +commit b8e8c8303ff28c61046a4d0f6ea99aea609a7dc0 upstream + +With some Intel processors, putting the same virtual address in the TLB +as both a 4 KiB and 2 MiB page can confuse the instruction fetch unit +and cause the processor to issue a machine check resulting in a CPU lockup. + +Unfortunately when EPT page tables use huge pages, it is possible for a +malicious guest to cause this situation. + +Add a knob to mark huge pages as non-executable. When the nx_huge_pages +parameter is enabled (and we are using EPT), all huge pages are marked as +NX. If the guest attempts to execute in one of those pages, the page is +broken down into 4K pages, which are then marked executable. + +This is not an issue for shadow paging (except nested EPT), because then +the host is in control of TLB flushes and the problematic situation cannot +happen. With nested EPT, again the nested guest can cause problems shadow +and direct EPT is treated in the same way. + +[ tglx: Fixup default to auto and massage wording a bit ] + +Originally-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + .../admin-guide/kernel-parameters.txt | 19 +++ + arch/x86/include/asm/kvm_host.h | 2 + + arch/x86/kernel/cpu/bugs.c | 13 +- + arch/x86/kvm/mmu.c | 141 +++++++++++++++++- + arch/x86/kvm/paging_tmpl.h | 29 +++- + arch/x86/kvm/x86.c | 9 ++ + 6 files changed, 200 insertions(+), 13 deletions(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -1956,6 +1956,19 @@ + KVM MMU at runtime. + Default is 0 (off) + ++ kvm.nx_huge_pages= ++ [KVM] Controls the software workaround for the ++ X86_BUG_ITLB_MULTIHIT bug. ++ force : Always deploy workaround. ++ off : Never deploy workaround. ++ auto : Deploy workaround based on the presence of ++ X86_BUG_ITLB_MULTIHIT. ++ ++ Default is 'auto'. ++ ++ If the software workaround is enabled for the host, ++ guests do need not to enable it for nested guests. ++ + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. + Default is 1 (enabled) + +@@ -2522,6 +2535,12 @@ + l1tf=off [X86] + mds=off [X86] + tsx_async_abort=off [X86] ++ kvm.nx_huge_pages=off [X86] ++ ++ Exceptions: ++ This does not have any effect on ++ kvm.nx_huge_pages when ++ kvm.nx_huge_pages=force. + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -293,6 +293,7 @@ struct kvm_mmu_page { + /* hold the gfn of each spte inside spt */ + gfn_t *gfns; + bool unsync; ++ bool lpage_disallowed; /* Can't be replaced by an equiv large page */ + int root_count; /* Currently serving as active root */ + unsigned int unsync_children; + struct kvm_rmap_head parent_ptes; /* rmap pointers to parent sptes */ +@@ -887,6 +888,7 @@ struct kvm_vm_stat { + ulong mmu_unsync; + ulong remote_tlb_flush; + ulong lpages; ++ ulong nx_lpage_splits; + ulong max_mmu_page_hash_collisions; + }; + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1225,6 +1225,9 @@ void x86_spec_ctrl_setup_ap(void) + x86_amd_ssb_disable(); + } + ++bool itlb_multihit_kvm_mitigation; ++EXPORT_SYMBOL_GPL(itlb_multihit_kvm_mitigation); ++ + #undef pr_fmt + #define pr_fmt(fmt) "L1TF: " fmt + +@@ -1380,17 +1383,25 @@ static ssize_t l1tf_show_state(char *buf + l1tf_vmx_states[l1tf_vmx_mitigation], + sched_smt_active() ? "vulnerable" : "disabled"); + } ++ ++static ssize_t itlb_multihit_show_state(char *buf) ++{ ++ if (itlb_multihit_kvm_mitigation) ++ return sprintf(buf, "KVM: Mitigation: Split huge pages\n"); ++ else ++ return sprintf(buf, "KVM: Vulnerable\n"); ++} + #else + static ssize_t l1tf_show_state(char *buf) + { + return sprintf(buf, "%s\n", L1TF_DEFAULT_MSG); + } +-#endif + + static ssize_t itlb_multihit_show_state(char *buf) + { + return sprintf(buf, "Processor vulnerable\n"); + } ++#endif + + static ssize_t mds_show_state(char *buf) + { +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -49,6 +49,20 @@ + #include + #include "trace.h" + ++extern bool itlb_multihit_kvm_mitigation; ++ ++static int __read_mostly nx_huge_pages = -1; ++ ++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); ++ ++static struct kernel_param_ops nx_huge_pages_ops = { ++ .set = set_nx_huge_pages, ++ .get = param_get_bool, ++}; ++ ++module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); ++__MODULE_PARM_TYPE(nx_huge_pages, "bool"); ++ + /* + * When setting this variable to true it enables Two-Dimensional-Paging + * where the hardware walks 2 page tables: +@@ -285,6 +299,11 @@ static inline bool spte_ad_enabled(u64 s + return !(spte & shadow_acc_track_value); + } + ++static bool is_nx_huge_page_enabled(void) ++{ ++ return READ_ONCE(nx_huge_pages); ++} ++ + static inline u64 spte_shadow_accessed_mask(u64 spte) + { + MMU_WARN_ON((spte & shadow_mmio_mask) == shadow_mmio_value); +@@ -1097,6 +1116,15 @@ static void account_shadowed(struct kvm + kvm_mmu_gfn_disallow_lpage(slot, gfn); + } + ++static void account_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) ++{ ++ if (sp->lpage_disallowed) ++ return; ++ ++ ++kvm->stat.nx_lpage_splits; ++ sp->lpage_disallowed = true; ++} ++ + static void unaccount_shadowed(struct kvm *kvm, struct kvm_mmu_page *sp) + { + struct kvm_memslots *slots; +@@ -1114,6 +1142,12 @@ static void unaccount_shadowed(struct kv + kvm_mmu_gfn_allow_lpage(slot, gfn); + } + ++static void unaccount_huge_nx_page(struct kvm *kvm, struct kvm_mmu_page *sp) ++{ ++ --kvm->stat.nx_lpage_splits; ++ sp->lpage_disallowed = false; ++} ++ + static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, + struct kvm_memory_slot *slot) + { +@@ -2666,6 +2700,9 @@ static int kvm_mmu_prepare_zap_page(stru + kvm_reload_remote_mmus(kvm); + } + ++ if (sp->lpage_disallowed) ++ unaccount_huge_nx_page(kvm, sp); ++ + sp->role.invalid = 1; + return ret; + } +@@ -2874,6 +2911,11 @@ static int set_spte(struct kvm_vcpu *vcp + if (!speculative) + spte |= spte_shadow_accessed_mask(spte); + ++ if (level > PT_PAGE_TABLE_LEVEL && (pte_access & ACC_EXEC_MASK) && ++ is_nx_huge_page_enabled()) { ++ pte_access &= ~ACC_EXEC_MASK; ++ } ++ + if (pte_access & ACC_EXEC_MASK) + spte |= shadow_x_mask; + else +@@ -3092,9 +3134,32 @@ static void direct_pte_prefetch(struct k + __direct_pte_prefetch(vcpu, sp, sptep); + } + ++static void disallowed_hugepage_adjust(struct kvm_shadow_walk_iterator it, ++ gfn_t gfn, kvm_pfn_t *pfnp, int *levelp) ++{ ++ int level = *levelp; ++ u64 spte = *it.sptep; ++ ++ if (it.level == level && level > PT_PAGE_TABLE_LEVEL && ++ is_nx_huge_page_enabled() && ++ is_shadow_present_pte(spte) && ++ !is_large_pte(spte)) { ++ /* ++ * A small SPTE exists for this pfn, but FNAME(fetch) ++ * and __direct_map would like to create a large PTE ++ * instead: just force them to go down another level, ++ * patching back for them into pfn the next 9 bits of ++ * the address. ++ */ ++ u64 page_mask = KVM_PAGES_PER_HPAGE(level) - KVM_PAGES_PER_HPAGE(level - 1); ++ *pfnp |= gfn & page_mask; ++ (*levelp)--; ++ } ++} ++ + static int __direct_map(struct kvm_vcpu *vcpu, gpa_t gpa, int write, + int map_writable, int level, kvm_pfn_t pfn, +- bool prefault) ++ bool prefault, bool lpage_disallowed) + { + struct kvm_shadow_walk_iterator it; + struct kvm_mmu_page *sp; +@@ -3107,6 +3172,12 @@ static int __direct_map(struct kvm_vcpu + + trace_kvm_mmu_spte_requested(gpa, level, pfn); + for_each_shadow_entry(vcpu, gpa, it) { ++ /* ++ * We cannot overwrite existing page tables with an NX ++ * large page, as the leaf could be executable. ++ */ ++ disallowed_hugepage_adjust(it, gfn, &pfn, &level); ++ + base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == level) + break; +@@ -3117,6 +3188,8 @@ static int __direct_map(struct kvm_vcpu + it.level - 1, true, ACC_ALL); + + link_shadow_page(vcpu, it.sptep, sp); ++ if (lpage_disallowed) ++ account_huge_nx_page(vcpu->kvm, sp); + } + } + +@@ -3417,11 +3490,14 @@ static int nonpaging_map(struct kvm_vcpu + { + int r; + int level; +- bool force_pt_level = false; ++ bool force_pt_level; + kvm_pfn_t pfn; + unsigned long mmu_seq; + bool map_writable, write = error_code & PFERR_WRITE_MASK; ++ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && ++ is_nx_huge_page_enabled(); + ++ force_pt_level = lpage_disallowed; + level = mapping_level(vcpu, gfn, &force_pt_level); + if (likely(!force_pt_level)) { + /* +@@ -3455,7 +3531,8 @@ static int nonpaging_map(struct kvm_vcpu + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); +- r = __direct_map(vcpu, v, write, map_writable, level, pfn, prefault); ++ r = __direct_map(vcpu, v, write, map_writable, level, pfn, ++ prefault, false); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +@@ -4049,6 +4126,8 @@ static int tdp_page_fault(struct kvm_vcp + unsigned long mmu_seq; + int write = error_code & PFERR_WRITE_MASK; + bool map_writable; ++ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && ++ is_nx_huge_page_enabled(); + + MMU_WARN_ON(!VALID_PAGE(vcpu->arch.mmu.root_hpa)); + +@@ -4059,8 +4138,9 @@ static int tdp_page_fault(struct kvm_vcp + if (r) + return r; + +- force_pt_level = !check_hugepage_cache_consistency(vcpu, gfn, +- PT_DIRECTORY_LEVEL); ++ force_pt_level = ++ lpage_disallowed || ++ !check_hugepage_cache_consistency(vcpu, gfn, PT_DIRECTORY_LEVEL); + level = mapping_level(vcpu, gfn, &force_pt_level); + if (likely(!force_pt_level)) { + if (level > PT_DIRECTORY_LEVEL && +@@ -4089,7 +4169,8 @@ static int tdp_page_fault(struct kvm_vcp + goto out_unlock; + if (likely(!force_pt_level)) + transparent_hugepage_adjust(vcpu, gfn, &pfn, &level); +- r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, prefault); ++ r = __direct_map(vcpu, gpa, write, map_writable, level, pfn, ++ prefault, lpage_disallowed); + out_unlock: + spin_unlock(&vcpu->kvm->mmu_lock); + kvm_release_pfn_clean(pfn); +@@ -5887,10 +5968,58 @@ static void mmu_destroy_caches(void) + kmem_cache_destroy(mmu_page_header_cache); + } + ++static bool get_nx_auto_mode(void) ++{ ++ /* Return true when CPU has the bug, and mitigations are ON */ ++ return boot_cpu_has_bug(X86_BUG_ITLB_MULTIHIT) && !cpu_mitigations_off(); ++} ++ ++static void __set_nx_huge_pages(bool val) ++{ ++ nx_huge_pages = itlb_multihit_kvm_mitigation = val; ++} ++ ++static int set_nx_huge_pages(const char *val, const struct kernel_param *kp) ++{ ++ bool old_val = nx_huge_pages; ++ bool new_val; ++ ++ /* In "auto" mode deploy workaround only if CPU has the bug. */ ++ if (sysfs_streq(val, "off")) ++ new_val = 0; ++ else if (sysfs_streq(val, "force")) ++ new_val = 1; ++ else if (sysfs_streq(val, "auto")) ++ new_val = get_nx_auto_mode(); ++ else if (strtobool(val, &new_val) < 0) ++ return -EINVAL; ++ ++ __set_nx_huge_pages(new_val); ++ ++ if (new_val != old_val) { ++ struct kvm *kvm; ++ int idx; ++ ++ mutex_lock(&kvm_lock); ++ ++ list_for_each_entry(kvm, &vm_list, vm_list) { ++ idx = srcu_read_lock(&kvm->srcu); ++ kvm_mmu_invalidate_zap_all_pages(kvm); ++ srcu_read_unlock(&kvm->srcu, idx); ++ } ++ mutex_unlock(&kvm_lock); ++ } ++ ++ return 0; ++} ++ + int kvm_mmu_module_init(void) + { + int ret = -ENOMEM; + ++ if (nx_huge_pages == -1) ++ __set_nx_huge_pages(get_nx_auto_mode()); ++ + kvm_mmu_reset_all_pte_masks(); + + pte_list_desc_cache = kmem_cache_create("pte_list_desc", +--- a/arch/x86/kvm/paging_tmpl.h ++++ b/arch/x86/kvm/paging_tmpl.h +@@ -596,13 +596,14 @@ static void FNAME(pte_prefetch)(struct k + static int FNAME(fetch)(struct kvm_vcpu *vcpu, gva_t addr, + struct guest_walker *gw, + int write_fault, int hlevel, +- kvm_pfn_t pfn, bool map_writable, bool prefault) ++ kvm_pfn_t pfn, bool map_writable, bool prefault, ++ bool lpage_disallowed) + { + struct kvm_mmu_page *sp = NULL; + struct kvm_shadow_walk_iterator it; + unsigned direct_access, access = gw->pt_access; + int top_level, ret; +- gfn_t base_gfn; ++ gfn_t gfn, base_gfn; + + direct_access = gw->pte_access; + +@@ -647,13 +648,25 @@ static int FNAME(fetch)(struct kvm_vcpu + link_shadow_page(vcpu, it.sptep, sp); + } + +- base_gfn = gw->gfn; ++ /* ++ * FNAME(page_fault) might have clobbered the bottom bits of ++ * gw->gfn, restore them from the virtual address. ++ */ ++ gfn = gw->gfn | ((addr & PT_LVL_OFFSET_MASK(gw->level)) >> PAGE_SHIFT); ++ base_gfn = gfn; + + trace_kvm_mmu_spte_requested(addr, gw->level, pfn); + + for (; shadow_walk_okay(&it); shadow_walk_next(&it)) { + clear_sp_write_flooding_count(it.sptep); +- base_gfn = gw->gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); ++ ++ /* ++ * We cannot overwrite existing page tables with an NX ++ * large page, as the leaf could be executable. ++ */ ++ disallowed_hugepage_adjust(it, gfn, &pfn, &hlevel); ++ ++ base_gfn = gfn & ~(KVM_PAGES_PER_HPAGE(it.level) - 1); + if (it.level == hlevel) + break; + +@@ -665,6 +678,8 @@ static int FNAME(fetch)(struct kvm_vcpu + sp = kvm_mmu_get_page(vcpu, base_gfn, addr, + it.level - 1, true, direct_access); + link_shadow_page(vcpu, it.sptep, sp); ++ if (lpage_disallowed) ++ account_huge_nx_page(vcpu->kvm, sp); + } + } + +@@ -741,9 +756,11 @@ static int FNAME(page_fault)(struct kvm_ + int r; + kvm_pfn_t pfn; + int level = PT_PAGE_TABLE_LEVEL; +- bool force_pt_level = false; + unsigned long mmu_seq; + bool map_writable, is_self_change_mapping; ++ bool lpage_disallowed = (error_code & PFERR_FETCH_MASK) && ++ is_nx_huge_page_enabled(); ++ bool force_pt_level = lpage_disallowed; + + pgprintk("%s: addr %lx err %x\n", __func__, addr, error_code); + +@@ -833,7 +850,7 @@ static int FNAME(page_fault)(struct kvm_ + if (!force_pt_level) + transparent_hugepage_adjust(vcpu, walker.gfn, &pfn, &level); + r = FNAME(fetch)(vcpu, addr, &walker, write_fault, +- level, pfn, map_writable, prefault); ++ level, pfn, map_writable, prefault, lpage_disallowed); + kvm_mmu_audit(vcpu, AUDIT_POST_PAGE_FAULT); + + out_unlock: +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -206,6 +206,7 @@ struct kvm_stats_debugfs_item debugfs_en + { "mmu_unsync", VM_STAT(mmu_unsync) }, + { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, + { "largepages", VM_STAT(lpages, .mode = 0444) }, ++ { "nx_largepages_splitted", VM_STAT(nx_lpage_splits, .mode = 0444) }, + { "max_mmu_page_hash_collisions", + VM_STAT(max_mmu_page_hash_collisions) }, + { NULL } +@@ -1116,6 +1117,14 @@ u64 kvm_get_arch_capabilities(void) + rdmsrl_safe(MSR_IA32_ARCH_CAPABILITIES, &data); + + /* ++ * If nx_huge_pages is enabled, KVM's shadow paging will ensure that ++ * the nested hypervisor runs with NX huge pages. If it is not, ++ * L1 is anyway vulnerable to ITLB_MULTIHIT explots from other ++ * L1 guests, so it need not worry about its own (L2) guests. ++ */ ++ data |= ARCH_CAP_PSCHANGE_MC_NO; ++ ++ /* + * If we're doing cache flushes (either "always" or "cond") + * we will do one whenever the guest does a vmlaunch/vmresume. + * If an outer hypervisor is doing the cache flush for us diff --git a/debian/patches/bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch b/debian/patches/bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch new file mode 100644 index 000000000..7396e3992 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch @@ -0,0 +1,130 @@ +From: Junaid Shahid +Date: Fri, 1 Nov 2019 00:14:08 +0100 +Subject: kvm: Add helper function for creating VM worker threads + +commit c57c80467f90e5504c8df9ad3555d2c78800bf94 upstream + +Add a function to create a kernel thread associated with a given VM. In +particular, it ensures that the worker thread inherits the priority and +cgroups of the calling thread. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + include/linux/kvm_host.h | 6 +++ + virt/kvm/kvm_main.c | 84 ++++++++++++++++++++++++++++++++++++++++ + 2 files changed, 90 insertions(+) + +--- a/include/linux/kvm_host.h ++++ b/include/linux/kvm_host.h +@@ -1305,4 +1305,10 @@ static inline int kvm_arch_vcpu_run_pid_ + } + #endif /* CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE */ + ++typedef int (*kvm_vm_thread_fn_t)(struct kvm *kvm, uintptr_t data); ++ ++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, ++ uintptr_t data, const char *name, ++ struct task_struct **thread_ptr); ++ + #endif +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -51,6 +51,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -4142,3 +4143,86 @@ void kvm_exit(void) + kvm_vfio_ops_exit(); + } + EXPORT_SYMBOL_GPL(kvm_exit); ++ ++struct kvm_vm_worker_thread_context { ++ struct kvm *kvm; ++ struct task_struct *parent; ++ struct completion init_done; ++ kvm_vm_thread_fn_t thread_fn; ++ uintptr_t data; ++ int err; ++}; ++ ++static int kvm_vm_worker_thread(void *context) ++{ ++ /* ++ * The init_context is allocated on the stack of the parent thread, so ++ * we have to locally copy anything that is needed beyond initialization ++ */ ++ struct kvm_vm_worker_thread_context *init_context = context; ++ struct kvm *kvm = init_context->kvm; ++ kvm_vm_thread_fn_t thread_fn = init_context->thread_fn; ++ uintptr_t data = init_context->data; ++ int err; ++ ++ err = kthread_park(current); ++ /* kthread_park(current) is never supposed to return an error */ ++ WARN_ON(err != 0); ++ if (err) ++ goto init_complete; ++ ++ err = cgroup_attach_task_all(init_context->parent, current); ++ if (err) { ++ kvm_err("%s: cgroup_attach_task_all failed with err %d\n", ++ __func__, err); ++ goto init_complete; ++ } ++ ++ set_user_nice(current, task_nice(init_context->parent)); ++ ++init_complete: ++ init_context->err = err; ++ complete(&init_context->init_done); ++ init_context = NULL; ++ ++ if (err) ++ return err; ++ ++ /* Wait to be woken up by the spawner before proceeding. */ ++ kthread_parkme(); ++ ++ if (!kthread_should_stop()) ++ err = thread_fn(kvm, data); ++ ++ return err; ++} ++ ++int kvm_vm_create_worker_thread(struct kvm *kvm, kvm_vm_thread_fn_t thread_fn, ++ uintptr_t data, const char *name, ++ struct task_struct **thread_ptr) ++{ ++ struct kvm_vm_worker_thread_context init_context = {}; ++ struct task_struct *thread; ++ ++ *thread_ptr = NULL; ++ init_context.kvm = kvm; ++ init_context.parent = current; ++ init_context.thread_fn = thread_fn; ++ init_context.data = data; ++ init_completion(&init_context.init_done); ++ ++ thread = kthread_run(kvm_vm_worker_thread, &init_context, ++ "%s-%d", name, task_pid_nr(current)); ++ if (IS_ERR(thread)) ++ return PTR_ERR(thread); ++ ++ /* kthread_run is never supposed to return NULL */ ++ WARN_ON(thread == NULL); ++ ++ wait_for_completion(&init_context.init_done); ++ ++ if (!init_context.err) ++ *thread_ptr = thread; ++ ++ return init_context.err; ++} diff --git a/debian/patches/bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch b/debian/patches/bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch new file mode 100644 index 000000000..af180b791 --- /dev/null +++ b/debian/patches/bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch @@ -0,0 +1,359 @@ +From: Junaid Shahid +Date: Fri, 1 Nov 2019 00:14:14 +0100 +Subject: kvm: x86: mmu: Recovery of shattered NX large pages + +commit 1aa9b9572b10529c2e64e2b8f44025d86e124308 upstream + +The page table pages corresponding to broken down large pages are zapped in +FIFO order, so that the large page can potentially be recovered, if it is +not longer being used for execution. This removes the performance penalty +for walking deeper EPT page tables. + +By default, one large page will last about one hour once the guest +reaches a steady state. + +Signed-off-by: Junaid Shahid +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + .../admin-guide/kernel-parameters.txt | 6 + + arch/x86/include/asm/kvm_host.h | 4 + + arch/x86/kvm/mmu.c | 129 ++++++++++++++++++ + arch/x86/kvm/mmu.h | 4 + + arch/x86/kvm/x86.c | 11 ++ + virt/kvm/kvm_main.c | 30 +++- + 6 files changed, 183 insertions(+), 1 deletion(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -1969,6 +1969,12 @@ + If the software workaround is enabled for the host, + guests do need not to enable it for nested guests. + ++ kvm.nx_huge_pages_recovery_ratio= ++ [KVM] Controls how many 4KiB pages are periodically zapped ++ back to huge pages. 0 disables the recovery, otherwise if ++ the value is N KVM will zap 1/Nth of the 4KiB pages every ++ minute. The default is 60. ++ + kvm-amd.nested= [KVM,AMD] Allow nested virtualization in KVM/SVM. + Default is 1 (enabled) + +--- a/arch/x86/include/asm/kvm_host.h ++++ b/arch/x86/include/asm/kvm_host.h +@@ -281,6 +281,7 @@ struct kvm_rmap_head { + struct kvm_mmu_page { + struct list_head link; + struct hlist_node hash_link; ++ struct list_head lpage_disallowed_link; + + /* + * The following two entries are used to key the shadow page in the +@@ -805,6 +806,7 @@ struct kvm_arch { + */ + struct list_head active_mmu_pages; + struct list_head zapped_obsolete_pages; ++ struct list_head lpage_disallowed_mmu_pages; + struct kvm_page_track_notifier_node mmu_sp_tracker; + struct kvm_page_track_notifier_head track_notifier_head; + +@@ -875,6 +877,8 @@ struct kvm_arch { + bool x2apic_broadcast_quirk_disabled; + + bool guest_can_read_msr_platform_info; ++ ++ struct task_struct *nx_lpage_recovery_thread; + }; + + struct kvm_vm_stat { +--- a/arch/x86/kvm/mmu.c ++++ b/arch/x86/kvm/mmu.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + + #include + #include +@@ -52,16 +53,26 @@ + extern bool itlb_multihit_kvm_mitigation; + + static int __read_mostly nx_huge_pages = -1; ++static uint __read_mostly nx_huge_pages_recovery_ratio = 60; + + static int set_nx_huge_pages(const char *val, const struct kernel_param *kp); ++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp); + + static struct kernel_param_ops nx_huge_pages_ops = { + .set = set_nx_huge_pages, + .get = param_get_bool, + }; + ++static struct kernel_param_ops nx_huge_pages_recovery_ratio_ops = { ++ .set = set_nx_huge_pages_recovery_ratio, ++ .get = param_get_uint, ++}; ++ + module_param_cb(nx_huge_pages, &nx_huge_pages_ops, &nx_huge_pages, 0644); + __MODULE_PARM_TYPE(nx_huge_pages, "bool"); ++module_param_cb(nx_huge_pages_recovery_ratio, &nx_huge_pages_recovery_ratio_ops, ++ &nx_huge_pages_recovery_ratio, 0644); ++__MODULE_PARM_TYPE(nx_huge_pages_recovery_ratio, "uint"); + + /* + * When setting this variable to true it enables Two-Dimensional-Paging +@@ -1122,6 +1133,8 @@ static void account_huge_nx_page(struct + return; + + ++kvm->stat.nx_lpage_splits; ++ list_add_tail(&sp->lpage_disallowed_link, ++ &kvm->arch.lpage_disallowed_mmu_pages); + sp->lpage_disallowed = true; + } + +@@ -1146,6 +1159,7 @@ static void unaccount_huge_nx_page(struc + { + --kvm->stat.nx_lpage_splits; + sp->lpage_disallowed = false; ++ list_del(&sp->lpage_disallowed_link); + } + + static bool __mmu_gfn_lpage_is_disallowed(gfn_t gfn, int level, +@@ -6006,6 +6020,8 @@ static int set_nx_huge_pages(const char + idx = srcu_read_lock(&kvm->srcu); + kvm_mmu_invalidate_zap_all_pages(kvm); + srcu_read_unlock(&kvm->srcu, idx); ++ ++ wake_up_process(kvm->arch.nx_lpage_recovery_thread); + } + mutex_unlock(&kvm_lock); + } +@@ -6087,3 +6103,116 @@ void kvm_mmu_module_exit(void) + unregister_shrinker(&mmu_shrinker); + mmu_audit_disable(); + } ++ ++static int set_nx_huge_pages_recovery_ratio(const char *val, const struct kernel_param *kp) ++{ ++ unsigned int old_val; ++ int err; ++ ++ old_val = nx_huge_pages_recovery_ratio; ++ err = param_set_uint(val, kp); ++ if (err) ++ return err; ++ ++ if (READ_ONCE(nx_huge_pages) && ++ !old_val && nx_huge_pages_recovery_ratio) { ++ struct kvm *kvm; ++ ++ mutex_lock(&kvm_lock); ++ ++ list_for_each_entry(kvm, &vm_list, vm_list) ++ wake_up_process(kvm->arch.nx_lpage_recovery_thread); ++ ++ mutex_unlock(&kvm_lock); ++ } ++ ++ return err; ++} ++ ++static void kvm_recover_nx_lpages(struct kvm *kvm) ++{ ++ int rcu_idx; ++ struct kvm_mmu_page *sp; ++ unsigned int ratio; ++ LIST_HEAD(invalid_list); ++ ulong to_zap; ++ ++ rcu_idx = srcu_read_lock(&kvm->srcu); ++ spin_lock(&kvm->mmu_lock); ++ ++ ratio = READ_ONCE(nx_huge_pages_recovery_ratio); ++ to_zap = ratio ? DIV_ROUND_UP(kvm->stat.nx_lpage_splits, ratio) : 0; ++ while (to_zap && !list_empty(&kvm->arch.lpage_disallowed_mmu_pages)) { ++ /* ++ * We use a separate list instead of just using active_mmu_pages ++ * because the number of lpage_disallowed pages is expected to ++ * be relatively small compared to the total. ++ */ ++ sp = list_first_entry(&kvm->arch.lpage_disallowed_mmu_pages, ++ struct kvm_mmu_page, ++ lpage_disallowed_link); ++ WARN_ON_ONCE(!sp->lpage_disallowed); ++ kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list); ++ WARN_ON_ONCE(sp->lpage_disallowed); ++ ++ if (!--to_zap || need_resched() || spin_needbreak(&kvm->mmu_lock)) { ++ kvm_mmu_commit_zap_page(kvm, &invalid_list); ++ if (to_zap) ++ cond_resched_lock(&kvm->mmu_lock); ++ } ++ } ++ ++ spin_unlock(&kvm->mmu_lock); ++ srcu_read_unlock(&kvm->srcu, rcu_idx); ++} ++ ++static long get_nx_lpage_recovery_timeout(u64 start_time) ++{ ++ return READ_ONCE(nx_huge_pages) && READ_ONCE(nx_huge_pages_recovery_ratio) ++ ? start_time + 60 * HZ - get_jiffies_64() ++ : MAX_SCHEDULE_TIMEOUT; ++} ++ ++static int kvm_nx_lpage_recovery_worker(struct kvm *kvm, uintptr_t data) ++{ ++ u64 start_time; ++ long remaining_time; ++ ++ while (true) { ++ start_time = get_jiffies_64(); ++ remaining_time = get_nx_lpage_recovery_timeout(start_time); ++ ++ set_current_state(TASK_INTERRUPTIBLE); ++ while (!kthread_should_stop() && remaining_time > 0) { ++ schedule_timeout(remaining_time); ++ remaining_time = get_nx_lpage_recovery_timeout(start_time); ++ set_current_state(TASK_INTERRUPTIBLE); ++ } ++ ++ set_current_state(TASK_RUNNING); ++ ++ if (kthread_should_stop()) ++ return 0; ++ ++ kvm_recover_nx_lpages(kvm); ++ } ++} ++ ++int kvm_mmu_post_init_vm(struct kvm *kvm) ++{ ++ int err; ++ ++ err = kvm_vm_create_worker_thread(kvm, kvm_nx_lpage_recovery_worker, 0, ++ "kvm-nx-lpage-recovery", ++ &kvm->arch.nx_lpage_recovery_thread); ++ if (!err) ++ kthread_unpark(kvm->arch.nx_lpage_recovery_thread); ++ ++ return err; ++} ++ ++void kvm_mmu_pre_destroy_vm(struct kvm *kvm) ++{ ++ if (kvm->arch.nx_lpage_recovery_thread) ++ kthread_stop(kvm->arch.nx_lpage_recovery_thread); ++} +--- a/arch/x86/kvm/mmu.h ++++ b/arch/x86/kvm/mmu.h +@@ -216,4 +216,8 @@ void kvm_mmu_gfn_allow_lpage(struct kvm_ + bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm, + struct kvm_memory_slot *slot, u64 gfn); + int kvm_arch_write_log_dirty(struct kvm_vcpu *vcpu); ++ ++int kvm_mmu_post_init_vm(struct kvm *kvm); ++void kvm_mmu_pre_destroy_vm(struct kvm *kvm); ++ + #endif +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -8960,6 +8960,7 @@ int kvm_arch_init_vm(struct kvm *kvm, un + INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list); + INIT_LIST_HEAD(&kvm->arch.active_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages); ++ INIT_LIST_HEAD(&kvm->arch.lpage_disallowed_mmu_pages); + INIT_LIST_HEAD(&kvm->arch.assigned_dev_head); + atomic_set(&kvm->arch.noncoherent_dma_count, 0); + +@@ -8991,6 +8992,11 @@ int kvm_arch_init_vm(struct kvm *kvm, un + return 0; + } + ++int kvm_arch_post_init_vm(struct kvm *kvm) ++{ ++ return kvm_mmu_post_init_vm(kvm); ++} ++ + static void kvm_unload_vcpu_mmu(struct kvm_vcpu *vcpu) + { + vcpu_load(vcpu); +@@ -9092,6 +9098,11 @@ int x86_set_memory_region(struct kvm *kv + } + EXPORT_SYMBOL_GPL(x86_set_memory_region); + ++void kvm_arch_pre_destroy_vm(struct kvm *kvm) ++{ ++ kvm_mmu_pre_destroy_vm(kvm); ++} ++ + void kvm_arch_destroy_vm(struct kvm *kvm) + { + if (current->mm == kvm->mm) { +--- a/virt/kvm/kvm_main.c ++++ b/virt/kvm/kvm_main.c +@@ -625,6 +625,23 @@ static int kvm_create_vm_debugfs(struct + return 0; + } + ++/* ++ * Called after the VM is otherwise initialized, but just before adding it to ++ * the vm_list. ++ */ ++int __weak kvm_arch_post_init_vm(struct kvm *kvm) ++{ ++ return 0; ++} ++ ++/* ++ * Called just after removing the VM from the vm_list, but before doing any ++ * other destruction. ++ */ ++void __weak kvm_arch_pre_destroy_vm(struct kvm *kvm) ++{ ++} ++ + static struct kvm *kvm_create_vm(unsigned long type) + { + int r, i; +@@ -679,11 +696,15 @@ static struct kvm *kvm_create_vm(unsigne + rcu_assign_pointer(kvm->buses[i], + kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); + if (!kvm->buses[i]) +- goto out_err; ++ goto out_err_no_mmu_notifier; + } + + r = kvm_init_mmu_notifier(kvm); + if (r) ++ goto out_err_no_mmu_notifier; ++ ++ r = kvm_arch_post_init_vm(kvm); ++ if (r) + goto out_err; + + mutex_lock(&kvm_lock); +@@ -695,6 +716,11 @@ static struct kvm *kvm_create_vm(unsigne + return kvm; + + out_err: ++#if defined(CONFIG_MMU_NOTIFIER) && defined(KVM_ARCH_WANT_MMU_NOTIFIER) ++ if (kvm->mmu_notifier.ops) ++ mmu_notifier_unregister(&kvm->mmu_notifier, current->mm); ++#endif ++out_err_no_mmu_notifier: + cleanup_srcu_struct(&kvm->irq_srcu); + out_err_no_irq_srcu: + cleanup_srcu_struct(&kvm->srcu); +@@ -737,6 +763,8 @@ static void kvm_destroy_vm(struct kvm *k + mutex_lock(&kvm_lock); + list_del(&kvm->vm_list); + mutex_unlock(&kvm_lock); ++ kvm_arch_pre_destroy_vm(kvm); ++ + kvm_free_irq_routing(kvm); + for (i = 0; i < KVM_NR_BUSES; i++) { + struct kvm_io_bus *bus = kvm_get_bus(kvm, i); diff --git a/debian/patches/bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch b/debian/patches/bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch new file mode 100644 index 000000000..605dbae1d --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch @@ -0,0 +1,58 @@ +From: Paolo Bonzini +Date: Mon, 19 Aug 2019 17:24:07 +0200 +Subject: KVM: x86: use Intel speculation bugs and features as derived in + generic x86 code + +commit 0c54914d0c52a15db9954a76ce80fee32cf318f4 upstream + +Similar to AMD bits, set the Intel bits from the vendor-independent +feature and bug flags, because KVM_GET_SUPPORTED_CPUID does not care +about the vendor and they should be set on AMD processors as well. + +Suggested-by: Jim Mattson +Reviewed-by: Jim Mattson +Signed-off-by: Paolo Bonzini +Signed-off-by: Thomas Gleixner +--- + arch/x86/kvm/cpuid.c | 8 ++++++++ + arch/x86/kvm/x86.c | 8 ++++++++ + 2 files changed, 16 insertions(+) + +--- a/arch/x86/kvm/cpuid.c ++++ b/arch/x86/kvm/cpuid.c +@@ -501,8 +501,16 @@ static inline int __do_cpuid_ent(struct + /* PKU is not yet implemented for shadow paging. */ + if (!tdp_enabled || !boot_cpu_has(X86_FEATURE_OSPKE)) + entry->ecx &= ~F(PKU); ++ + entry->edx &= kvm_cpuid_7_0_edx_x86_features; + cpuid_mask(&entry->edx, CPUID_7_EDX); ++ if (boot_cpu_has(X86_FEATURE_IBPB) && ++ boot_cpu_has(X86_FEATURE_IBRS)) ++ entry->edx |= F(SPEC_CTRL); ++ if (boot_cpu_has(X86_FEATURE_STIBP)) ++ entry->edx |= F(INTEL_STIBP); ++ if (boot_cpu_has(X86_FEATURE_SSBD)) ++ entry->edx |= F(SPEC_CTRL_SSBD); + /* + * We emulate ARCH_CAPABILITIES in software even + * if the host doesn't support it. +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1127,8 +1127,16 @@ u64 kvm_get_arch_capabilities(void) + if (l1tf_vmx_mitigation != VMENTER_L1D_FLUSH_NEVER) + data |= ARCH_CAP_SKIP_VMENTRY_L1DFLUSH; + ++ if (!boot_cpu_has_bug(X86_BUG_CPU_MELTDOWN)) ++ data |= ARCH_CAP_RDCL_NO; ++ if (!boot_cpu_has_bug(X86_BUG_SPEC_STORE_BYPASS)) ++ data |= ARCH_CAP_SSB_NO; ++ if (!boot_cpu_has_bug(X86_BUG_MDS)) ++ data |= ARCH_CAP_MDS_NO; ++ + return data; + } ++ + EXPORT_SYMBOL_GPL(kvm_get_arch_capabilities); + + static int kvm_get_msr_feature(struct kvm_msr_entry *msr) diff --git a/debian/patches/bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch b/debian/patches/bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch new file mode 100644 index 000000000..ca6ffcec0 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch @@ -0,0 +1,79 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 10:45:50 +0200 +Subject: x86/msr: Add the IA32_TSX_CTRL MSR + +commit c2955f270a84762343000f103e0640d29c7a96f3 upstream + +Transactional Synchronization Extensions (TSX) may be used on certain +processors as part of a speculative side channel attack. A microcode +update for existing processors that are vulnerable to this attack will +add a new MSR - IA32_TSX_CTRL to allow the system administrator the +option to disable TSX as one of the possible mitigations. + +The CPUs which get this new MSR after a microcode upgrade are the ones +which do not set MSR_IA32_ARCH_CAPABILITIES.MDS_NO (bit 5) because those +CPUs have CPUID.MD_CLEAR, i.e., the VERW implementation which clears all +CPU buffers takes care of the TAA case as well. + + [ Note that future processors that are not vulnerable will also + support the IA32_TSX_CTRL MSR. ] + +Add defines for the new IA32_TSX_CTRL MSR and its bits. + +TSX has two sub-features: + +1. Restricted Transactional Memory (RTM) is an explicitly-used feature + where new instructions begin and end TSX transactions. +2. Hardware Lock Elision (HLE) is implicitly used when certain kinds of + "old" style locks are used by software. + +Bit 7 of the IA32_ARCH_CAPABILITIES indicates the presence of the +IA32_TSX_CTRL MSR. + +There are two control bits in IA32_TSX_CTRL MSR: + + Bit 0: When set, it disables the Restricted Transactional Memory (RTM) + sub-feature of TSX (will force all transactions to abort on the + XBEGIN instruction). + + Bit 1: When set, it disables the enumeration of the RTM and HLE feature + (i.e. it will make CPUID(EAX=7).EBX{bit4} and + CPUID(EAX=7).EBX{bit11} read as 0). + +The other TSX sub-feature, Hardware Lock Elision (HLE), is +unconditionally disabled by the new microcode but still enumerated +as present by CPUID(EAX=7).EBX{bit4}, unless disabled by +IA32_TSX_CTRL_MSR[1] - TSX_CTRL_CPUID_CLEAR. + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Neelima Krishnan +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Reviewed-by: Josh Poimboeuf +--- + arch/x86/include/asm/msr-index.h | 5 +++++ + 1 file changed, 5 insertions(+) + +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -84,6 +84,7 @@ + * Microarchitectural Data + * Sampling (MDS) vulnerabilities. + */ ++#define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +@@ -94,6 +95,10 @@ + #define MSR_IA32_BBL_CR_CTL 0x00000119 + #define MSR_IA32_BBL_CR_CTL3 0x0000011e + ++#define MSR_IA32_TSX_CTRL 0x00000122 ++#define TSX_CTRL_RTM_DISABLE BIT(0) /* Disable RTM feature */ ++#define TSX_CTRL_CPUID_CLEAR BIT(1) /* Disable TSX enumeration */ ++ + #define MSR_IA32_SYSENTER_CS 0x00000174 + #define MSR_IA32_SYSENTER_ESP 0x00000175 + #define MSR_IA32_SYSENTER_EIP 0x00000176 diff --git a/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch b/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch new file mode 100644 index 000000000..29cd36f2c --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch @@ -0,0 +1,62 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 10:52:35 +0200 +Subject: x86/cpu: Add a helper function x86_read_arch_cap_msr() + +commit 286836a70433fb64131d2590f4bf512097c255e1 upstream + +Add a helper function to read the IA32_ARCH_CAPABILITIES MSR. + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Neelima Krishnan +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Reviewed-by: Josh Poimboeuf +--- + arch/x86/kernel/cpu/common.c | 15 +++++++++++---- + arch/x86/kernel/cpu/cpu.h | 2 ++ + 2 files changed, 13 insertions(+), 4 deletions(-) + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1017,19 +1017,26 @@ static bool __init cpu_matches(unsigned + return m && !!(m->driver_data & which); + } + +-static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) ++u64 x86_read_arch_cap_msr(void) + { + u64 ia32_cap = 0; + ++ if (boot_cpu_has(X86_FEATURE_ARCH_CAPABILITIES)) ++ rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); ++ ++ return ia32_cap; ++} ++ ++static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c) ++{ ++ u64 ia32_cap = x86_read_arch_cap_msr(); ++ + if (cpu_matches(NO_SPECULATION)) + return; + + setup_force_cpu_bug(X86_BUG_SPECTRE_V1); + setup_force_cpu_bug(X86_BUG_SPECTRE_V2); + +- if (cpu_has(c, X86_FEATURE_ARCH_CAPABILITIES)) +- rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap); +- + if (!cpu_matches(NO_SSB) && !(ia32_cap & ARCH_CAP_SSB_NO) && + !cpu_has(c, X86_FEATURE_AMD_SSB_NO)) + setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS); +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -65,4 +65,6 @@ unsigned int aperfmperf_get_khz(int cpu) + + extern void x86_spec_ctrl_setup_ap(void); + ++extern u64 x86_read_arch_cap_msr(void); ++ + #endif /* ARCH_X86_CPU_H */ diff --git a/debian/patches/bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch b/debian/patches/bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch new file mode 100644 index 000000000..87373788c --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch @@ -0,0 +1,257 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 11:01:53 +0200 +Subject: x86/cpu: Add a "tsx=" cmdline option with TSX disabled by default + +commit 95c5824f75f3ba4c9e8e5a4b1a623c95390ac266 upstream + +Add a kernel cmdline parameter "tsx" to control the Transactional +Synchronization Extensions (TSX) feature. On CPUs that support TSX +control, use "tsx=on|off" to enable or disable TSX. Not specifying this +option is equivalent to "tsx=off". This is because on certain processors +TSX may be used as a part of a speculative side channel attack. + +Carve out the TSX controlling functionality into a separate compilation +unit because TSX is a CPU feature while the TSX async abort control +machinery will go to cpu/bugs.c. + + [ bp: - Massage, shorten and clear the arg buffer. + - Clarifications of the tsx= possible options - Josh. + - Expand on TSX_CTRL availability - Pawan. ] + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Reviewed-by: Josh Poimboeuf +--- + .../admin-guide/kernel-parameters.txt | 26 ++++ + arch/x86/kernel/cpu/Makefile | 2 +- + arch/x86/kernel/cpu/common.c | 1 + + arch/x86/kernel/cpu/cpu.h | 16 +++ + arch/x86/kernel/cpu/intel.c | 5 + + arch/x86/kernel/cpu/tsx.c | 125 ++++++++++++++++++ + 6 files changed, 174 insertions(+), 1 deletion(-) + create mode 100644 arch/x86/kernel/cpu/tsx.c + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4685,6 +4685,32 @@ + marks the TSC unconditionally unstable at bootup and + avoids any further wobbles once the TSC watchdog notices. + ++ tsx= [X86] Control Transactional Synchronization ++ Extensions (TSX) feature in Intel processors that ++ support TSX control. ++ ++ This parameter controls the TSX feature. The options are: ++ ++ on - Enable TSX on the system. Although there are ++ mitigations for all known security vulnerabilities, ++ TSX has been known to be an accelerator for ++ several previous speculation-related CVEs, and ++ so there may be unknown security risks associated ++ with leaving it enabled. ++ ++ off - Disable TSX on the system. (Note that this ++ option takes effect only on newer CPUs which are ++ not vulnerable to MDS, i.e., have ++ MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 and which get ++ the new IA32_TSX_CTRL MSR through a microcode ++ update. This new MSR allows for the reliable ++ deactivation of the TSX functionality.) ++ ++ Not specifying this option is equivalent to tsx=off. ++ ++ See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst ++ for more details. ++ + turbografx.map[2|3]= [HW,JOY] + TurboGraFX parallel port interface + Format: +--- a/arch/x86/kernel/cpu/Makefile ++++ b/arch/x86/kernel/cpu/Makefile +@@ -28,7 +28,7 @@ obj-y += cpuid-deps.o + obj-$(CONFIG_PROC_FS) += proc.o + obj-$(CONFIG_X86_FEATURE_NAMES) += capflags.o powerflags.o + +-obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o ++obj-$(CONFIG_CPU_SUP_INTEL) += intel.o intel_pconfig.o tsx.o + obj-$(CONFIG_CPU_SUP_AMD) += amd.o + obj-$(CONFIG_CPU_SUP_CYRIX_32) += cyrix.o + obj-$(CONFIG_CPU_SUP_CENTAUR) += centaur.o +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1482,6 +1482,7 @@ void __init identify_boot_cpu(void) + enable_sep_cpu(); + #endif + cpu_detect_tlb(&boot_cpu_data); ++ tsx_init(); + } + + void identify_secondary_cpu(struct cpuinfo_x86 *c) +--- a/arch/x86/kernel/cpu/cpu.h ++++ b/arch/x86/kernel/cpu/cpu.h +@@ -45,6 +45,22 @@ struct _tlb_table { + extern const struct cpu_dev *const __x86_cpu_dev_start[], + *const __x86_cpu_dev_end[]; + ++#ifdef CONFIG_CPU_SUP_INTEL ++enum tsx_ctrl_states { ++ TSX_CTRL_ENABLE, ++ TSX_CTRL_DISABLE, ++ TSX_CTRL_NOT_SUPPORTED, ++}; ++ ++extern __ro_after_init enum tsx_ctrl_states tsx_ctrl_state; ++ ++extern void __init tsx_init(void); ++extern void tsx_enable(void); ++extern void tsx_disable(void); ++#else ++static inline void tsx_init(void) { } ++#endif /* CONFIG_CPU_SUP_INTEL */ ++ + extern void get_cpu_cap(struct cpuinfo_x86 *c); + extern void get_cpu_address_sizes(struct cpuinfo_x86 *c); + extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c); +--- a/arch/x86/kernel/cpu/intel.c ++++ b/arch/x86/kernel/cpu/intel.c +@@ -766,6 +766,11 @@ static void init_intel(struct cpuinfo_x8 + init_intel_energy_perf(c); + + init_intel_misc_features(c); ++ ++ if (tsx_ctrl_state == TSX_CTRL_ENABLE) ++ tsx_enable(); ++ if (tsx_ctrl_state == TSX_CTRL_DISABLE) ++ tsx_disable(); + } + + #ifdef CONFIG_X86_32 +--- /dev/null ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -0,0 +1,125 @@ ++// SPDX-License-Identifier: GPL-2.0 ++/* ++ * Intel Transactional Synchronization Extensions (TSX) control. ++ * ++ * Copyright (C) 2019 Intel Corporation ++ * ++ * Author: ++ * Pawan Gupta ++ */ ++ ++#include ++ ++#include ++ ++#include "cpu.h" ++ ++enum tsx_ctrl_states tsx_ctrl_state __ro_after_init = TSX_CTRL_NOT_SUPPORTED; ++ ++void tsx_disable(void) ++{ ++ u64 tsx; ++ ++ rdmsrl(MSR_IA32_TSX_CTRL, tsx); ++ ++ /* Force all transactions to immediately abort */ ++ tsx |= TSX_CTRL_RTM_DISABLE; ++ ++ /* ++ * Ensure TSX support is not enumerated in CPUID. ++ * This is visible to userspace and will ensure they ++ * do not waste resources trying TSX transactions that ++ * will always abort. ++ */ ++ tsx |= TSX_CTRL_CPUID_CLEAR; ++ ++ wrmsrl(MSR_IA32_TSX_CTRL, tsx); ++} ++ ++void tsx_enable(void) ++{ ++ u64 tsx; ++ ++ rdmsrl(MSR_IA32_TSX_CTRL, tsx); ++ ++ /* Enable the RTM feature in the cpu */ ++ tsx &= ~TSX_CTRL_RTM_DISABLE; ++ ++ /* ++ * Ensure TSX support is enumerated in CPUID. ++ * This is visible to userspace and will ensure they ++ * can enumerate and use the TSX feature. ++ */ ++ tsx &= ~TSX_CTRL_CPUID_CLEAR; ++ ++ wrmsrl(MSR_IA32_TSX_CTRL, tsx); ++} ++ ++static bool __init tsx_ctrl_is_supported(void) ++{ ++ u64 ia32_cap = x86_read_arch_cap_msr(); ++ ++ /* ++ * TSX is controlled via MSR_IA32_TSX_CTRL. However, support for this ++ * MSR is enumerated by ARCH_CAP_TSX_MSR bit in MSR_IA32_ARCH_CAPABILITIES. ++ * ++ * TSX control (aka MSR_IA32_TSX_CTRL) is only available after a ++ * microcode update on CPUs that have their MSR_IA32_ARCH_CAPABILITIES ++ * bit MDS_NO=1. CPUs with MDS_NO=0 are not planned to get ++ * MSR_IA32_TSX_CTRL support even after a microcode update. Thus, ++ * tsx= cmdline requests will do nothing on CPUs without ++ * MSR_IA32_TSX_CTRL support. ++ */ ++ return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); ++} ++ ++void __init tsx_init(void) ++{ ++ char arg[4] = {}; ++ int ret; ++ ++ if (!tsx_ctrl_is_supported()) ++ return; ++ ++ ret = cmdline_find_option(boot_command_line, "tsx", arg, sizeof(arg)); ++ if (ret >= 0) { ++ if (!strcmp(arg, "on")) { ++ tsx_ctrl_state = TSX_CTRL_ENABLE; ++ } else if (!strcmp(arg, "off")) { ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ } else { ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ pr_err("tsx: invalid option, defaulting to off\n"); ++ } ++ } else { ++ /* tsx= not provided, defaulting to off */ ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ } ++ ++ if (tsx_ctrl_state == TSX_CTRL_DISABLE) { ++ tsx_disable(); ++ ++ /* ++ * tsx_disable() will change the state of the ++ * RTM CPUID bit. Clear it here since it is now ++ * expected to be not set. ++ */ ++ setup_clear_cpu_cap(X86_FEATURE_RTM); ++ } else if (tsx_ctrl_state == TSX_CTRL_ENABLE) { ++ ++ /* ++ * HW defaults TSX to be enabled at bootup. ++ * We may still need the TSX enable support ++ * during init for special cases like ++ * kexec after TSX is disabled. ++ */ ++ tsx_enable(); ++ ++ /* ++ * tsx_enable() will change the state of the ++ * RTM CPUID bit. Force it here since it is now ++ * expected to be set. ++ */ ++ setup_force_cpu_cap(X86_FEATURE_RTM); ++ } ++} diff --git a/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch b/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch new file mode 100644 index 000000000..91ece79d8 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch @@ -0,0 +1,299 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 11:30:45 +0200 +Subject: x86/speculation/taa: Add mitigation for TSX Async Abort + +commit 1b42f017415b46c317e71d41c34ec088417a1883 upstream + +TSX Async Abort (TAA) is a side channel vulnerability to the internal +buffers in some Intel processors similar to Microachitectural Data +Sampling (MDS). In this case, certain loads may speculatively pass +invalid data to dependent operations when an asynchronous abort +condition is pending in a TSX transaction. + +This includes loads with no fault or assist condition. Such loads may +speculatively expose stale data from the uarch data structures as in +MDS. Scope of exposure is within the same-thread and cross-thread. This +issue affects all current processors that support TSX, but do not have +ARCH_CAP_TAA_NO (bit 8) set in MSR_IA32_ARCH_CAPABILITIES. + +On CPUs which have their IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0, +CPUID.MD_CLEAR=1 and the MDS mitigation is clearing the CPU buffers +using VERW or L1D_FLUSH, there is no additional mitigation needed for +TAA. On affected CPUs with MDS_NO=1 this issue can be mitigated by +disabling the Transactional Synchronization Extensions (TSX) feature. + +A new MSR IA32_TSX_CTRL in future and current processors after a +microcode update can be used to control the TSX feature. There are two +bits in that MSR: + +* TSX_CTRL_RTM_DISABLE disables the TSX sub-feature Restricted +Transactional Memory (RTM). + +* TSX_CTRL_CPUID_CLEAR clears the RTM enumeration in CPUID. The other +TSX sub-feature, Hardware Lock Elision (HLE), is unconditionally +disabled with updated microcode but still enumerated as present by +CPUID(EAX=7).EBX{bit4}. + +The second mitigation approach is similar to MDS which is clearing the +affected CPU buffers on return to user space and when entering a guest. +Relevant microcode update is required for the mitigation to work. More +details on this approach can be found here: + + https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html + +The TSX feature can be controlled by the "tsx" command line parameter. +If it is force-enabled then "Clear CPU buffers" (MDS mitigation) is +deployed. The effective mitigation state can be read from sysfs. + + [ bp: + - massage + comments cleanup + - s/TAA_MITIGATION_TSX_DISABLE/TAA_MITIGATION_TSX_DISABLED/g - Josh. + - remove partial TAA mitigation in update_mds_branch_idle() - Josh. + - s/tsx_async_abort_cmdline/tsx_async_abort_parse_cmdline/g + ] + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Reviewed-by: Josh Poimboeuf +--- + arch/x86/include/asm/cpufeatures.h | 1 + + arch/x86/include/asm/msr-index.h | 4 + + arch/x86/include/asm/nospec-branch.h | 4 +- + arch/x86/include/asm/processor.h | 7 ++ + arch/x86/kernel/cpu/bugs.c | 110 +++++++++++++++++++++++++++ + arch/x86/kernel/cpu/common.c | 15 ++++ + 6 files changed, 139 insertions(+), 2 deletions(-) + +--- a/arch/x86/include/asm/cpufeatures.h ++++ b/arch/x86/include/asm/cpufeatures.h +@@ -389,5 +389,6 @@ + #define X86_BUG_MDS X86_BUG(19) /* CPU is affected by Microarchitectural data sampling */ + #define X86_BUG_MSBDS_ONLY X86_BUG(20) /* CPU is only affected by the MSDBS variant of BUG_MDS */ + #define X86_BUG_SWAPGS X86_BUG(21) /* CPU is affected by speculation through SWAPGS */ ++#define X86_BUG_TAA X86_BUG(22) /* CPU is affected by TSX Async Abort(TAA) */ + + #endif /* _ASM_X86_CPUFEATURES_H */ +--- a/arch/x86/include/asm/msr-index.h ++++ b/arch/x86/include/asm/msr-index.h +@@ -85,6 +85,10 @@ + * Sampling (MDS) vulnerabilities. + */ + #define ARCH_CAP_TSX_CTRL_MSR BIT(7) /* MSR for TSX control is available. */ ++#define ARCH_CAP_TAA_NO BIT(8) /* ++ * Not susceptible to ++ * TSX Async Abort (TAA) vulnerabilities. ++ */ + + #define MSR_IA32_FLUSH_CMD 0x0000010b + #define L1D_FLUSH BIT(0) /* +--- a/arch/x86/include/asm/nospec-branch.h ++++ b/arch/x86/include/asm/nospec-branch.h +@@ -323,7 +323,7 @@ DECLARE_STATIC_KEY_FALSE(mds_idle_clear) + #include + + /** +- * mds_clear_cpu_buffers - Mitigation for MDS vulnerability ++ * mds_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * This uses the otherwise unused and obsolete VERW instruction in + * combination with microcode which triggers a CPU buffer flush when the +@@ -346,7 +346,7 @@ static inline void mds_clear_cpu_buffers + } + + /** +- * mds_user_clear_cpu_buffers - Mitigation for MDS vulnerability ++ * mds_user_clear_cpu_buffers - Mitigation for MDS and TAA vulnerability + * + * Clear CPU buffers if the corresponding static key is enabled + */ +--- a/arch/x86/include/asm/processor.h ++++ b/arch/x86/include/asm/processor.h +@@ -1003,4 +1003,11 @@ enum mds_mitigations { + MDS_MITIGATION_VMWERV, + }; + ++enum taa_mitigations { ++ TAA_MITIGATION_OFF, ++ TAA_MITIGATION_UCODE_NEEDED, ++ TAA_MITIGATION_VERW, ++ TAA_MITIGATION_TSX_DISABLED, ++}; ++ + #endif /* _ASM_X86_PROCESSOR_H */ +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -32,11 +32,14 @@ + #include + #include + ++#include "cpu.h" ++ + static void __init spectre_v1_select_mitigation(void); + static void __init spectre_v2_select_mitigation(void); + static void __init ssb_select_mitigation(void); + static void __init l1tf_select_mitigation(void); + static void __init mds_select_mitigation(void); ++static void __init taa_select_mitigation(void); + + /* The base value of the SPEC_CTRL MSR that always has to be preserved. */ + u64 x86_spec_ctrl_base; +@@ -103,6 +106,7 @@ void __init check_bugs(void) + ssb_select_mitigation(); + l1tf_select_mitigation(); + mds_select_mitigation(); ++ taa_select_mitigation(); + + arch_smt_update(); + +@@ -267,6 +271,100 @@ static int __init mds_cmdline(char *str) + early_param("mds", mds_cmdline); + + #undef pr_fmt ++#define pr_fmt(fmt) "TAA: " fmt ++ ++/* Default mitigation for TAA-affected CPUs */ ++static enum taa_mitigations taa_mitigation __ro_after_init = TAA_MITIGATION_VERW; ++static bool taa_nosmt __ro_after_init; ++ ++static const char * const taa_strings[] = { ++ [TAA_MITIGATION_OFF] = "Vulnerable", ++ [TAA_MITIGATION_UCODE_NEEDED] = "Vulnerable: Clear CPU buffers attempted, no microcode", ++ [TAA_MITIGATION_VERW] = "Mitigation: Clear CPU buffers", ++ [TAA_MITIGATION_TSX_DISABLED] = "Mitigation: TSX disabled", ++}; ++ ++static void __init taa_select_mitigation(void) ++{ ++ u64 ia32_cap; ++ ++ if (!boot_cpu_has_bug(X86_BUG_TAA)) { ++ taa_mitigation = TAA_MITIGATION_OFF; ++ return; ++ } ++ ++ /* TSX previously disabled by tsx=off */ ++ if (!boot_cpu_has(X86_FEATURE_RTM)) { ++ taa_mitigation = TAA_MITIGATION_TSX_DISABLED; ++ goto out; ++ } ++ ++ if (cpu_mitigations_off()) { ++ taa_mitigation = TAA_MITIGATION_OFF; ++ return; ++ } ++ ++ /* TAA mitigation is turned off on the cmdline (tsx_async_abort=off) */ ++ if (taa_mitigation == TAA_MITIGATION_OFF) ++ goto out; ++ ++ if (boot_cpu_has(X86_FEATURE_MD_CLEAR)) ++ taa_mitigation = TAA_MITIGATION_VERW; ++ else ++ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; ++ ++ /* ++ * VERW doesn't clear the CPU buffers when MD_CLEAR=1 and MDS_NO=1. ++ * A microcode update fixes this behavior to clear CPU buffers. It also ++ * adds support for MSR_IA32_TSX_CTRL which is enumerated by the ++ * ARCH_CAP_TSX_CTRL_MSR bit. ++ * ++ * On MDS_NO=1 CPUs if ARCH_CAP_TSX_CTRL_MSR is not set, microcode ++ * update is required. ++ */ ++ ia32_cap = x86_read_arch_cap_msr(); ++ if ( (ia32_cap & ARCH_CAP_MDS_NO) && ++ !(ia32_cap & ARCH_CAP_TSX_CTRL_MSR)) ++ taa_mitigation = TAA_MITIGATION_UCODE_NEEDED; ++ ++ /* ++ * TSX is enabled, select alternate mitigation for TAA which is ++ * the same as MDS. Enable MDS static branch to clear CPU buffers. ++ * ++ * For guests that can't determine whether the correct microcode is ++ * present on host, enable the mitigation for UCODE_NEEDED as well. ++ */ ++ static_branch_enable(&mds_user_clear); ++ ++ if (taa_nosmt || cpu_mitigations_auto_nosmt()) ++ cpu_smt_disable(false); ++ ++out: ++ pr_info("%s\n", taa_strings[taa_mitigation]); ++} ++ ++static int __init tsx_async_abort_parse_cmdline(char *str) ++{ ++ if (!boot_cpu_has_bug(X86_BUG_TAA)) ++ return 0; ++ ++ if (!str) ++ return -EINVAL; ++ ++ if (!strcmp(str, "off")) { ++ taa_mitigation = TAA_MITIGATION_OFF; ++ } else if (!strcmp(str, "full")) { ++ taa_mitigation = TAA_MITIGATION_VERW; ++ } else if (!strcmp(str, "full,nosmt")) { ++ taa_mitigation = TAA_MITIGATION_VERW; ++ taa_nosmt = true; ++ } ++ ++ return 0; ++} ++early_param("tsx_async_abort", tsx_async_abort_parse_cmdline); ++ ++#undef pr_fmt + #define pr_fmt(fmt) "Spectre V1 : " fmt + + enum spectre_v1_mitigation { +@@ -772,6 +870,7 @@ static void update_mds_branch_idle(void) + } + + #define MDS_MSG_SMT "MDS CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/mds.html for more details.\n" ++#define TAA_MSG_SMT "TAA CPU bug present and SMT on, data leak possible. See https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html for more details.\n" + + void arch_smt_update(void) + { +@@ -804,6 +903,17 @@ void arch_smt_update(void) + break; + } + ++ switch (taa_mitigation) { ++ case TAA_MITIGATION_VERW: ++ case TAA_MITIGATION_UCODE_NEEDED: ++ if (sched_smt_active()) ++ pr_warn_once(TAA_MSG_SMT); ++ break; ++ case TAA_MITIGATION_TSX_DISABLED: ++ case TAA_MITIGATION_OFF: ++ break; ++ } ++ + mutex_unlock(&spec_ctrl_mutex); + } + +--- a/arch/x86/kernel/cpu/common.c ++++ b/arch/x86/kernel/cpu/common.c +@@ -1053,6 +1053,21 @@ static void __init cpu_set_bug_bits(stru + if (!cpu_matches(NO_SWAPGS)) + setup_force_cpu_bug(X86_BUG_SWAPGS); + ++ /* ++ * When the CPU is not mitigated for TAA (TAA_NO=0) set TAA bug when: ++ * - TSX is supported or ++ * - TSX_CTRL is present ++ * ++ * TSX_CTRL check is needed for cases when TSX could be disabled before ++ * the kernel boot e.g. kexec. ++ * TSX_CTRL check alone is not sufficient for cases when the microcode ++ * update is not present or running as guest that don't get TSX_CTRL. ++ */ ++ if (!(ia32_cap & ARCH_CAP_TAA_NO) && ++ (cpu_has(c, X86_FEATURE_RTM) || ++ (ia32_cap & ARCH_CAP_TSX_CTRL_MSR))) ++ setup_force_cpu_bug(X86_BUG_TAA); ++ + if (cpu_matches(NO_MELTDOWN)) + return; + diff --git a/debian/patches/bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch b/debian/patches/bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch new file mode 100644 index 000000000..7f1c4f982 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch @@ -0,0 +1,114 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:19:51 +0200 +Subject: x86/speculation/taa: Add sysfs reporting for TSX Async Abort + +commit 6608b45ac5ecb56f9e171252229c39580cc85f0f upstream + +Add the sysfs reporting file for TSX Async Abort. It exposes the +vulnerability and the mitigation state similar to the existing files for +the other hardware vulnerabilities. + +Sysfs file path is: +/sys/devices/system/cpu/vulnerabilities/tsx_async_abort + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Neelima Krishnan +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Reviewed-by: Greg Kroah-Hartman +Reviewed-by: Josh Poimboeuf +--- + arch/x86/kernel/cpu/bugs.c | 23 +++++++++++++++++++++++ + drivers/base/cpu.c | 9 +++++++++ + include/linux/cpu.h | 3 +++ + 3 files changed, 35 insertions(+) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -1408,6 +1408,21 @@ static ssize_t mds_show_state(char *buf) + sched_smt_active() ? "vulnerable" : "disabled"); + } + ++static ssize_t tsx_async_abort_show_state(char *buf) ++{ ++ if ((taa_mitigation == TAA_MITIGATION_TSX_DISABLED) || ++ (taa_mitigation == TAA_MITIGATION_OFF)) ++ return sprintf(buf, "%s\n", taa_strings[taa_mitigation]); ++ ++ if (boot_cpu_has(X86_FEATURE_HYPERVISOR)) { ++ return sprintf(buf, "%s; SMT Host state unknown\n", ++ taa_strings[taa_mitigation]); ++ } ++ ++ return sprintf(buf, "%s; SMT %s\n", taa_strings[taa_mitigation], ++ sched_smt_active() ? "vulnerable" : "disabled"); ++} ++ + static char *stibp_state(void) + { + if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) +@@ -1476,6 +1491,9 @@ static ssize_t cpu_show_common(struct de + case X86_BUG_MDS: + return mds_show_state(buf); + ++ case X86_BUG_TAA: ++ return tsx_async_abort_show_state(buf); ++ + default: + break; + } +@@ -1512,4 +1530,9 @@ ssize_t cpu_show_mds(struct device *dev, + { + return cpu_show_common(dev, attr, buf, X86_BUG_MDS); + } ++ ++ssize_t cpu_show_tsx_async_abort(struct device *dev, struct device_attribute *attr, char *buf) ++{ ++ return cpu_show_common(dev, attr, buf, X86_BUG_TAA); ++} + #endif +--- a/drivers/base/cpu.c ++++ b/drivers/base/cpu.c +@@ -552,12 +552,20 @@ ssize_t __weak cpu_show_mds(struct devic + return sprintf(buf, "Not affected\n"); + } + ++ssize_t __weak cpu_show_tsx_async_abort(struct device *dev, ++ struct device_attribute *attr, ++ char *buf) ++{ ++ return sprintf(buf, "Not affected\n"); ++} ++ + static DEVICE_ATTR(meltdown, 0444, cpu_show_meltdown, NULL); + static DEVICE_ATTR(spectre_v1, 0444, cpu_show_spectre_v1, NULL); + static DEVICE_ATTR(spectre_v2, 0444, cpu_show_spectre_v2, NULL); + static DEVICE_ATTR(spec_store_bypass, 0444, cpu_show_spec_store_bypass, NULL); + static DEVICE_ATTR(l1tf, 0444, cpu_show_l1tf, NULL); + static DEVICE_ATTR(mds, 0444, cpu_show_mds, NULL); ++static DEVICE_ATTR(tsx_async_abort, 0444, cpu_show_tsx_async_abort, NULL); + + static struct attribute *cpu_root_vulnerabilities_attrs[] = { + &dev_attr_meltdown.attr, +@@ -566,6 +574,7 @@ static struct attribute *cpu_root_vulner + &dev_attr_spec_store_bypass.attr, + &dev_attr_l1tf.attr, + &dev_attr_mds.attr, ++ &dev_attr_tsx_async_abort.attr, + NULL + }; + +--- a/include/linux/cpu.h ++++ b/include/linux/cpu.h +@@ -59,6 +59,9 @@ extern ssize_t cpu_show_l1tf(struct devi + struct device_attribute *attr, char *buf); + extern ssize_t cpu_show_mds(struct device *dev, + struct device_attribute *attr, char *buf); ++extern ssize_t cpu_show_tsx_async_abort(struct device *dev, ++ struct device_attribute *attr, ++ char *buf); + + extern __printf(4, 5) + struct device *cpu_device_create(struct device *parent, void *drvdata, diff --git a/debian/patches/bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch b/debian/patches/bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch new file mode 100644 index 000000000..869858647 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch @@ -0,0 +1,60 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:23:33 +0200 +Subject: kvm/x86: Export MDS_NO=0 to guests when TSX is enabled + +commit e1d38b63acd843cfdd4222bf19a26700fd5c699e upstream + +Export the IA32_ARCH_CAPABILITIES MSR bit MDS_NO=0 to guests on TSX +Async Abort(TAA) affected hosts that have TSX enabled and updated +microcode. This is required so that the guests don't complain, + + "Vulnerable: Clear CPU buffers attempted, no microcode" + +when the host has the updated microcode to clear CPU buffers. + +Microcode update also adds support for MSR_IA32_TSX_CTRL which is +enumerated by the ARCH_CAP_TSX_CTRL bit in IA32_ARCH_CAPABILITIES MSR. +Guests can't do this check themselves when the ARCH_CAP_TSX_CTRL bit is +not exported to the guests. + +In this case export MDS_NO=0 to the guests. When guests have +CPUID.MD_CLEAR=1, they deploy MDS mitigation which also mitigates TAA. + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Tested-by: Neelima Krishnan +Reviewed-by: Tony Luck +Reviewed-by: Josh Poimboeuf +--- + arch/x86/kvm/x86.c | 19 +++++++++++++++++++ + 1 file changed, 19 insertions(+) + +--- a/arch/x86/kvm/x86.c ++++ b/arch/x86/kvm/x86.c +@@ -1134,6 +1134,25 @@ u64 kvm_get_arch_capabilities(void) + if (!boot_cpu_has_bug(X86_BUG_MDS)) + data |= ARCH_CAP_MDS_NO; + ++ /* ++ * On TAA affected systems, export MDS_NO=0 when: ++ * - TSX is enabled on the host, i.e. X86_FEATURE_RTM=1. ++ * - Updated microcode is present. This is detected by ++ * the presence of ARCH_CAP_TSX_CTRL_MSR and ensures ++ * that VERW clears CPU buffers. ++ * ++ * When MDS_NO=0 is exported, guests deploy clear CPU buffer ++ * mitigation and don't complain: ++ * ++ * "Vulnerable: Clear CPU buffers attempted, no microcode" ++ * ++ * If TSX is disabled on the system, guests are also mitigated against ++ * TAA and clear CPU buffer mitigation is not required for guests. ++ */ ++ if (boot_cpu_has_bug(X86_BUG_TAA) && boot_cpu_has(X86_FEATURE_RTM) && ++ (data & ARCH_CAP_TSX_CTRL_MSR)) ++ data &= ~ARCH_CAP_MDS_NO; ++ + return data; + } + diff --git a/debian/patches/bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch b/debian/patches/bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch new file mode 100644 index 000000000..1f668f4cb --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch @@ -0,0 +1,60 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:28:57 +0200 +Subject: x86/tsx: Add "auto" option to the tsx= cmdline parameter + +commit 7531a3596e3272d1f6841e0d601a614555dc6b65 upstream + +Platforms which are not affected by X86_BUG_TAA may want the TSX feature +enabled. Add "auto" option to the TSX cmdline parameter. When tsx=auto +disable TSX when X86_BUG_TAA is present, otherwise enable TSX. + +More details on X86_BUG_TAA can be found here: +https://www.kernel.org/doc/html/latest/admin-guide/hw-vuln/tsx_async_abort.html + + [ bp: Extend the arg buffer to accommodate "auto\0". ] + +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Reviewed-by: Tony Luck +Reviewed-by: Josh Poimboeuf +--- + Documentation/admin-guide/kernel-parameters.txt | 3 +++ + arch/x86/kernel/cpu/tsx.c | 7 ++++++- + 2 files changed, 9 insertions(+), 1 deletion(-) + +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -4706,6 +4706,9 @@ + update. This new MSR allows for the reliable + deactivation of the TSX functionality.) + ++ auto - Disable TSX if X86_BUG_TAA is present, ++ otherwise enable TSX on the system. ++ + Not specifying this option is equivalent to tsx=off. + + See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst +--- a/arch/x86/kernel/cpu/tsx.c ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -75,7 +75,7 @@ static bool __init tsx_ctrl_is_supported + + void __init tsx_init(void) + { +- char arg[4] = {}; ++ char arg[5] = {}; + int ret; + + if (!tsx_ctrl_is_supported()) +@@ -87,6 +87,11 @@ void __init tsx_init(void) + tsx_ctrl_state = TSX_CTRL_ENABLE; + } else if (!strcmp(arg, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; ++ } else if (!strcmp(arg, "auto")) { ++ if (boot_cpu_has_bug(X86_BUG_TAA)) ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ else ++ tsx_ctrl_state = TSX_CTRL_ENABLE; + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("tsx: invalid option, defaulting to off\n"); diff --git a/debian/patches/bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch b/debian/patches/bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch new file mode 100644 index 000000000..5b474ca9b --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch @@ -0,0 +1,516 @@ +From: Pawan Gupta +Date: Wed, 23 Oct 2019 12:32:55 +0200 +Subject: x86/speculation/taa: Add documentation for TSX Async Abort + +commit a7a248c593e4fd7a67c50b5f5318fe42a0db335e upstream + +Add the documenation for TSX Async Abort. Include the description of +the issue, how to check the mitigation state, control the mitigation, +guidance for system administrators. + + [ bp: Add proper SPDX tags, touch ups by Josh and me. ] + +Co-developed-by: Antonio Gomez Iglesias + +Signed-off-by: Pawan Gupta +Signed-off-by: Antonio Gomez Iglesias +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Reviewed-by: Mark Gross +Reviewed-by: Tony Luck +Reviewed-by: Josh Poimboeuf +--- + .../ABI/testing/sysfs-devices-system-cpu | 1 + + Documentation/admin-guide/hw-vuln/index.rst | 1 + + .../admin-guide/hw-vuln/tsx_async_abort.rst | 276 ++++++++++++++++++ + .../admin-guide/kernel-parameters.txt | 38 +++ + Documentation/x86/index.rst | 1 + + Documentation/x86/tsx_async_abort.rst | 117 ++++++++ + 6 files changed, 434 insertions(+) + create mode 100644 Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + create mode 100644 Documentation/x86/tsx_async_abort.rst + +--- a/Documentation/ABI/testing/sysfs-devices-system-cpu ++++ b/Documentation/ABI/testing/sysfs-devices-system-cpu +@@ -478,6 +478,7 @@ What: /sys/devices/system/cpu/vulnerabi + /sys/devices/system/cpu/vulnerabilities/spec_store_bypass + /sys/devices/system/cpu/vulnerabilities/l1tf + /sys/devices/system/cpu/vulnerabilities/mds ++ /sys/devices/system/cpu/vulnerabilities/tsx_async_abort + Date: January 2018 + Contact: Linux kernel mailing list + Description: Information about CPU vulnerabilities +--- a/Documentation/admin-guide/hw-vuln/index.rst ++++ b/Documentation/admin-guide/hw-vuln/index.rst +@@ -12,3 +12,4 @@ are configurable at compile, boot or run + spectre + l1tf + mds ++ tsx_async_abort +--- /dev/null ++++ b/Documentation/admin-guide/hw-vuln/tsx_async_abort.rst +@@ -0,0 +1,276 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++ ++TAA - TSX Asynchronous Abort ++====================================== ++ ++TAA is a hardware vulnerability that allows unprivileged speculative access to ++data which is available in various CPU internal buffers by using asynchronous ++aborts within an Intel TSX transactional region. ++ ++Affected processors ++------------------- ++ ++This vulnerability only affects Intel processors that support Intel ++Transactional Synchronization Extensions (TSX) when the TAA_NO bit (bit 8) ++is 0 in the IA32_ARCH_CAPABILITIES MSR. On processors where the MDS_NO bit ++(bit 5) is 0 in the IA32_ARCH_CAPABILITIES MSR, the existing MDS mitigations ++also mitigate against TAA. ++ ++Whether a processor is affected or not can be read out from the TAA ++vulnerability file in sysfs. See :ref:`tsx_async_abort_sys_info`. ++ ++Related CVEs ++------------ ++ ++The following CVE entry is related to this TAA issue: ++ ++ ============== ===== =================================================== ++ CVE-2019-11135 TAA TSX Asynchronous Abort (TAA) condition on some ++ microprocessors utilizing speculative execution may ++ allow an authenticated user to potentially enable ++ information disclosure via a side channel with ++ local access. ++ ============== ===== =================================================== ++ ++Problem ++------- ++ ++When performing store, load or L1 refill operations, processors write ++data into temporary microarchitectural structures (buffers). The data in ++those buffers can be forwarded to load operations as an optimization. ++ ++Intel TSX is an extension to the x86 instruction set architecture that adds ++hardware transactional memory support to improve performance of multi-threaded ++software. TSX lets the processor expose and exploit concurrency hidden in an ++application due to dynamically avoiding unnecessary synchronization. ++ ++TSX supports atomic memory transactions that are either committed (success) or ++aborted. During an abort, operations that happened within the transactional region ++are rolled back. An asynchronous abort takes place, among other options, when a ++different thread accesses a cache line that is also used within the transactional ++region when that access might lead to a data race. ++ ++Immediately after an uncompleted asynchronous abort, certain speculatively ++executed loads may read data from those internal buffers and pass it to dependent ++operations. This can be then used to infer the value via a cache side channel ++attack. ++ ++Because the buffers are potentially shared between Hyper-Threads cross ++Hyper-Thread attacks are possible. ++ ++The victim of a malicious actor does not need to make use of TSX. Only the ++attacker needs to begin a TSX transaction and raise an asynchronous abort ++which in turn potenitally leaks data stored in the buffers. ++ ++More detailed technical information is available in the TAA specific x86 ++architecture section: :ref:`Documentation/x86/tsx_async_abort.rst `. ++ ++ ++Attack scenarios ++---------------- ++ ++Attacks against the TAA vulnerability can be implemented from unprivileged ++applications running on hosts or guests. ++ ++As for MDS, the attacker has no control over the memory addresses that can ++be leaked. Only the victim is responsible for bringing data to the CPU. As ++a result, the malicious actor has to sample as much data as possible and ++then postprocess it to try to infer any useful information from it. ++ ++A potential attacker only has read access to the data. Also, there is no direct ++privilege escalation by using this technique. ++ ++ ++.. _tsx_async_abort_sys_info: ++ ++TAA system information ++----------------------- ++ ++The Linux kernel provides a sysfs interface to enumerate the current TAA status ++of mitigated systems. The relevant sysfs file is: ++ ++/sys/devices/system/cpu/vulnerabilities/tsx_async_abort ++ ++The possible values in this file are: ++ ++.. list-table:: ++ ++ * - 'Vulnerable' ++ - The CPU is affected by this vulnerability and the microcode and kernel mitigation are not applied. ++ * - 'Vulnerable: Clear CPU buffers attempted, no microcode' ++ - The system tries to clear the buffers but the microcode might not support the operation. ++ * - 'Mitigation: Clear CPU buffers' ++ - The microcode has been updated to clear the buffers. TSX is still enabled. ++ * - 'Mitigation: TSX disabled' ++ - TSX is disabled. ++ * - 'Not affected' ++ - The CPU is not affected by this issue. ++ ++.. _ucode_needed: ++ ++Best effort mitigation mode ++^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++If the processor is vulnerable, but the availability of the microcode-based ++mitigation mechanism is not advertised via CPUID the kernel selects a best ++effort mitigation mode. This mode invokes the mitigation instructions ++without a guarantee that they clear the CPU buffers. ++ ++This is done to address virtualization scenarios where the host has the ++microcode update applied, but the hypervisor is not yet updated to expose the ++CPUID to the guest. If the host has updated microcode the protection takes ++effect; otherwise a few CPU cycles are wasted pointlessly. ++ ++The state in the tsx_async_abort sysfs file reflects this situation ++accordingly. ++ ++ ++Mitigation mechanism ++-------------------- ++ ++The kernel detects the affected CPUs and the presence of the microcode which is ++required. If a CPU is affected and the microcode is available, then the kernel ++enables the mitigation by default. ++ ++ ++The mitigation can be controlled at boot time via a kernel command line option. ++See :ref:`taa_mitigation_control_command_line`. ++ ++.. _virt_mechanism: ++ ++Virtualization mitigation ++^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++Affected systems where the host has TAA microcode and TAA is mitigated by ++having disabled TSX previously, are not vulnerable regardless of the status ++of the VMs. ++ ++In all other cases, if the host either does not have the TAA microcode or ++the kernel is not mitigated, the system might be vulnerable. ++ ++ ++.. _taa_mitigation_control_command_line: ++ ++Mitigation control on the kernel command line ++--------------------------------------------- ++ ++The kernel command line allows to control the TAA mitigations at boot time with ++the option "tsx_async_abort=". The valid arguments for this option are: ++ ++ ============ ============================================================= ++ off This option disables the TAA mitigation on affected platforms. ++ If the system has TSX enabled (see next parameter) and the CPU ++ is affected, the system is vulnerable. ++ ++ full TAA mitigation is enabled. If TSX is enabled, on an affected ++ system it will clear CPU buffers on ring transitions. On ++ systems which are MDS-affected and deploy MDS mitigation, ++ TAA is also mitigated. Specifying this option on those ++ systems will have no effect. ++ ++ full,nosmt The same as tsx_async_abort=full, with SMT disabled on ++ vulnerable CPUs that have TSX enabled. This is the complete ++ mitigation. When TSX is disabled, SMT is not disabled because ++ CPU is not vulnerable to cross-thread TAA attacks. ++ ============ ============================================================= ++ ++Not specifying this option is equivalent to "tsx_async_abort=full". ++ ++The kernel command line also allows to control the TSX feature using the ++parameter "tsx=" on CPUs which support TSX control. MSR_IA32_TSX_CTRL is used ++to control the TSX feature and the enumeration of the TSX feature bits (RTM ++and HLE) in CPUID. ++ ++The valid options are: ++ ++ ============ ============================================================= ++ off Disables TSX on the system. ++ ++ Note that this option takes effect only on newer CPUs which are ++ not vulnerable to MDS, i.e., have MSR_IA32_ARCH_CAPABILITIES.MDS_NO=1 ++ and which get the new IA32_TSX_CTRL MSR through a microcode ++ update. This new MSR allows for the reliable deactivation of ++ the TSX functionality. ++ ++ on Enables TSX. ++ ++ Although there are mitigations for all known security ++ vulnerabilities, TSX has been known to be an accelerator for ++ several previous speculation-related CVEs, and so there may be ++ unknown security risks associated with leaving it enabled. ++ ++ auto Disables TSX if X86_BUG_TAA is present, otherwise enables TSX ++ on the system. ++ ============ ============================================================= ++ ++Not specifying this option is equivalent to "tsx=off". ++ ++The following combinations of the "tsx_async_abort" and "tsx" are possible. For ++affected platforms tsx=auto is equivalent to tsx=off and the result will be: ++ ++ ========= ========================== ========================================= ++ tsx=on tsx_async_abort=full The system will use VERW to clear CPU ++ buffers. Cross-thread attacks are still ++ possible on SMT machines. ++ tsx=on tsx_async_abort=full,nosmt As above, cross-thread attacks on SMT ++ mitigated. ++ tsx=on tsx_async_abort=off The system is vulnerable. ++ tsx=off tsx_async_abort=full TSX might be disabled if microcode ++ provides a TSX control MSR. If so, ++ system is not vulnerable. ++ tsx=off tsx_async_abort=full,nosmt Ditto ++ tsx=off tsx_async_abort=off ditto ++ ========= ========================== ========================================= ++ ++ ++For unaffected platforms "tsx=on" and "tsx_async_abort=full" does not clear CPU ++buffers. For platforms without TSX control (MSR_IA32_ARCH_CAPABILITIES.MDS_NO=0) ++"tsx" command line argument has no effect. ++ ++For the affected platforms below table indicates the mitigation status for the ++combinations of CPUID bit MD_CLEAR and IA32_ARCH_CAPABILITIES MSR bits MDS_NO ++and TSX_CTRL_MSR. ++ ++ ======= ========= ============= ======================================== ++ MDS_NO MD_CLEAR TSX_CTRL_MSR Status ++ ======= ========= ============= ======================================== ++ 0 0 0 Vulnerable (needs microcode) ++ 0 1 0 MDS and TAA mitigated via VERW ++ 1 1 0 MDS fixed, TAA vulnerable if TSX enabled ++ because MD_CLEAR has no meaning and ++ VERW is not guaranteed to clear buffers ++ 1 X 1 MDS fixed, TAA can be mitigated by ++ VERW or TSX_CTRL_MSR ++ ======= ========= ============= ======================================== ++ ++Mitigation selection guide ++-------------------------- ++ ++1. Trusted userspace and guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++If all user space applications are from a trusted source and do not execute ++untrusted code which is supplied externally, then the mitigation can be ++disabled. The same applies to virtualized environments with trusted guests. ++ ++ ++2. Untrusted userspace and guests ++^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ++ ++If there are untrusted applications or guests on the system, enabling TSX ++might allow a malicious actor to leak data from the host or from other ++processes running on the same physical core. ++ ++If the microcode is available and the TSX is disabled on the host, attacks ++are prevented in a virtualized environment as well, even if the VMs do not ++explicitly enable the mitigation. ++ ++ ++.. _taa_default_mitigations: ++ ++Default mitigations ++------------------- ++ ++The kernel's default action for vulnerable processors is: ++ ++ - Deploy TSX disable mitigation (tsx_async_abort=full tsx=off). +--- a/Documentation/admin-guide/kernel-parameters.txt ++++ b/Documentation/admin-guide/kernel-parameters.txt +@@ -2521,6 +2521,7 @@ + spec_store_bypass_disable=off [X86,PPC] + l1tf=off [X86] + mds=off [X86] ++ tsx_async_abort=off [X86] + + auto (default) + Mitigate all CPU vulnerabilities, but leave SMT +@@ -2536,6 +2537,7 @@ + be fully mitigated, even if it means losing SMT. + Equivalent to: l1tf=flush,nosmt [X86] + mds=full,nosmt [X86] ++ tsx_async_abort=full,nosmt [X86] + + mminit_loglevel= + [KNL] When CONFIG_DEBUG_MEMORY_INIT is set, this +@@ -4714,6 +4716,42 @@ + See Documentation/admin-guide/hw-vuln/tsx_async_abort.rst + for more details. + ++ tsx_async_abort= [X86,INTEL] Control mitigation for the TSX Async ++ Abort (TAA) vulnerability. ++ ++ Similar to Micro-architectural Data Sampling (MDS) ++ certain CPUs that support Transactional ++ Synchronization Extensions (TSX) are vulnerable to an ++ exploit against CPU internal buffers which can forward ++ information to a disclosure gadget under certain ++ conditions. ++ ++ In vulnerable processors, the speculatively forwarded ++ data can be used in a cache side channel attack, to ++ access data to which the attacker does not have direct ++ access. ++ ++ This parameter controls the TAA mitigation. The ++ options are: ++ ++ full - Enable TAA mitigation on vulnerable CPUs ++ if TSX is enabled. ++ ++ full,nosmt - Enable TAA mitigation and disable SMT on ++ vulnerable CPUs. If TSX is disabled, SMT ++ is not disabled because CPU is not ++ vulnerable to cross-thread TAA attacks. ++ off - Unconditionally disable TAA mitigation ++ ++ Not specifying this option is equivalent to ++ tsx_async_abort=full. On CPUs which are MDS affected ++ and deploy MDS mitigation, TAA mitigation is not ++ required and doesn't provide any additional ++ mitigation. ++ ++ For details see: ++ Documentation/admin-guide/hw-vuln/tsx_async_abort.rst ++ + turbografx.map[2|3]= [HW,JOY] + TurboGraFX parallel port interface + Format: +--- a/Documentation/x86/index.rst ++++ b/Documentation/x86/index.rst +@@ -6,3 +6,4 @@ x86 architecture specifics + :maxdepth: 1 + + mds ++ tsx_async_abort +--- /dev/null ++++ b/Documentation/x86/tsx_async_abort.rst +@@ -0,0 +1,117 @@ ++.. SPDX-License-Identifier: GPL-2.0 ++ ++TSX Async Abort (TAA) mitigation ++================================ ++ ++.. _tsx_async_abort: ++ ++Overview ++-------- ++ ++TSX Async Abort (TAA) is a side channel attack on internal buffers in some ++Intel processors similar to Microachitectural Data Sampling (MDS). In this ++case certain loads may speculatively pass invalid data to dependent operations ++when an asynchronous abort condition is pending in a Transactional ++Synchronization Extensions (TSX) transaction. This includes loads with no ++fault or assist condition. Such loads may speculatively expose stale data from ++the same uarch data structures as in MDS, with same scope of exposure i.e. ++same-thread and cross-thread. This issue affects all current processors that ++support TSX. ++ ++Mitigation strategy ++------------------- ++ ++a) TSX disable - one of the mitigations is to disable TSX. A new MSR ++IA32_TSX_CTRL will be available in future and current processors after ++microcode update which can be used to disable TSX. In addition, it ++controls the enumeration of the TSX feature bits (RTM and HLE) in CPUID. ++ ++b) Clear CPU buffers - similar to MDS, clearing the CPU buffers mitigates this ++vulnerability. More details on this approach can be found in ++:ref:`Documentation/admin-guide/hw-vuln/mds.rst `. ++ ++Kernel internal mitigation modes ++-------------------------------- ++ ++ ============= ============================================================ ++ off Mitigation is disabled. Either the CPU is not affected or ++ tsx_async_abort=off is supplied on the kernel command line. ++ ++ tsx disabled Mitigation is enabled. TSX feature is disabled by default at ++ bootup on processors that support TSX control. ++ ++ verw Mitigation is enabled. CPU is affected and MD_CLEAR is ++ advertised in CPUID. ++ ++ ucode needed Mitigation is enabled. CPU is affected and MD_CLEAR is not ++ advertised in CPUID. That is mainly for virtualization ++ scenarios where the host has the updated microcode but the ++ hypervisor does not expose MD_CLEAR in CPUID. It's a best ++ effort approach without guarantee. ++ ============= ============================================================ ++ ++If the CPU is affected and the "tsx_async_abort" kernel command line parameter is ++not provided then the kernel selects an appropriate mitigation depending on the ++status of RTM and MD_CLEAR CPUID bits. ++ ++Below tables indicate the impact of tsx=on|off|auto cmdline options on state of ++TAA mitigation, VERW behavior and TSX feature for various combinations of ++MSR_IA32_ARCH_CAPABILITIES bits. ++ ++1. "tsx=off" ++ ++========= ========= ============ ============ ============== =================== ====================== ++MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=off ++---------------------------------- ------------------------------------------------------------------------- ++TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation ++ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full ++========= ========= ============ ============ ============== =================== ====================== ++ 0 0 0 HW default Yes Same as MDS Same as MDS ++ 0 0 1 Invalid case Invalid case Invalid case Invalid case ++ 0 1 0 HW default No Need ucode update Need ucode update ++ 0 1 1 Disabled Yes TSX disabled TSX disabled ++ 1 X 1 Disabled X None needed None needed ++========= ========= ============ ============ ============== =================== ====================== ++ ++2. "tsx=on" ++ ++========= ========= ============ ============ ============== =================== ====================== ++MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=on ++---------------------------------- ------------------------------------------------------------------------- ++TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation ++ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full ++========= ========= ============ ============ ============== =================== ====================== ++ 0 0 0 HW default Yes Same as MDS Same as MDS ++ 0 0 1 Invalid case Invalid case Invalid case Invalid case ++ 0 1 0 HW default No Need ucode update Need ucode update ++ 0 1 1 Enabled Yes None Same as MDS ++ 1 X 1 Enabled X None needed None needed ++========= ========= ============ ============ ============== =================== ====================== ++ ++3. "tsx=auto" ++ ++========= ========= ============ ============ ============== =================== ====================== ++MSR_IA32_ARCH_CAPABILITIES bits Result with cmdline tsx=auto ++---------------------------------- ------------------------------------------------------------------------- ++TAA_NO MDS_NO TSX_CTRL_MSR TSX state VERW can clear TAA mitigation TAA mitigation ++ after bootup CPU buffers tsx_async_abort=off tsx_async_abort=full ++========= ========= ============ ============ ============== =================== ====================== ++ 0 0 0 HW default Yes Same as MDS Same as MDS ++ 0 0 1 Invalid case Invalid case Invalid case Invalid case ++ 0 1 0 HW default No Need ucode update Need ucode update ++ 0 1 1 Disabled Yes TSX disabled TSX disabled ++ 1 X 1 Enabled X None needed None needed ++========= ========= ============ ============ ============== =================== ====================== ++ ++In the tables, TSX_CTRL_MSR is a new bit in MSR_IA32_ARCH_CAPABILITIES that ++indicates whether MSR_IA32_TSX_CTRL is supported. ++ ++There are two control bits in IA32_TSX_CTRL MSR: ++ ++ Bit 0: When set it disables the Restricted Transactional Memory (RTM) ++ sub-feature of TSX (will force all transactions to abort on the ++ XBEGIN instruction). ++ ++ Bit 1: When set it disables the enumeration of the RTM and HLE feature ++ (i.e. it will make CPUID(EAX=7).EBX{bit4} and ++ CPUID(EAX=7).EBX{bit11} read as 0). diff --git a/debian/patches/bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch b/debian/patches/bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch new file mode 100644 index 000000000..ae61df728 --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch @@ -0,0 +1,131 @@ +From: Michal Hocko +Date: Wed, 23 Oct 2019 12:35:50 +0200 +Subject: x86/tsx: Add config options to set tsx=on|off|auto + +commit db616173d787395787ecc93eef075fa975227b10 upstream + +There is a general consensus that TSX usage is not largely spread while +the history shows there is a non trivial space for side channel attacks +possible. Therefore the tsx is disabled by default even on platforms +that might have a safe implementation of TSX according to the current +knowledge. This is a fair trade off to make. + +There are, however, workloads that really do benefit from using TSX and +updating to a newer kernel with TSX disabled might introduce a +noticeable regressions. This would be especially a problem for Linux +distributions which will provide TAA mitigations. + +Introduce config options X86_INTEL_TSX_MODE_OFF, X86_INTEL_TSX_MODE_ON +and X86_INTEL_TSX_MODE_AUTO to control the TSX feature. The config +setting can be overridden by the tsx cmdline options. + + [ bp: Text cleanups from Josh. ] + +Suggested-by: Borislav Petkov +Signed-off-by: Michal Hocko +Signed-off-by: Pawan Gupta +Signed-off-by: Borislav Petkov +Signed-off-by: Thomas Gleixner +Reviewed-by: Josh Poimboeuf +--- + arch/x86/Kconfig | 45 +++++++++++++++++++++++++++++++++++++++ + arch/x86/kernel/cpu/tsx.c | 22 +++++++++++++------ + 2 files changed, 61 insertions(+), 6 deletions(-) + +--- a/arch/x86/Kconfig ++++ b/arch/x86/Kconfig +@@ -1903,6 +1903,51 @@ config X86_INTEL_MEMORY_PROTECTION_KEYS + + If unsure, say y. + ++choice ++ prompt "TSX enable mode" ++ depends on CPU_SUP_INTEL ++ default X86_INTEL_TSX_MODE_OFF ++ help ++ Intel's TSX (Transactional Synchronization Extensions) feature ++ allows to optimize locking protocols through lock elision which ++ can lead to a noticeable performance boost. ++ ++ On the other hand it has been shown that TSX can be exploited ++ to form side channel attacks (e.g. TAA) and chances are there ++ will be more of those attacks discovered in the future. ++ ++ Therefore TSX is not enabled by default (aka tsx=off). An admin ++ might override this decision by tsx=on the command line parameter. ++ Even with TSX enabled, the kernel will attempt to enable the best ++ possible TAA mitigation setting depending on the microcode available ++ for the particular machine. ++ ++ This option allows to set the default tsx mode between tsx=on, =off ++ and =auto. See Documentation/admin-guide/kernel-parameters.txt for more ++ details. ++ ++ Say off if not sure, auto if TSX is in use but it should be used on safe ++ platforms or on if TSX is in use and the security aspect of tsx is not ++ relevant. ++ ++config X86_INTEL_TSX_MODE_OFF ++ bool "off" ++ help ++ TSX is disabled if possible - equals to tsx=off command line parameter. ++ ++config X86_INTEL_TSX_MODE_ON ++ bool "on" ++ help ++ TSX is always enabled on TSX capable HW - equals the tsx=on command ++ line parameter. ++ ++config X86_INTEL_TSX_MODE_AUTO ++ bool "auto" ++ help ++ TSX is enabled on TSX capable HW that is believed to be safe against ++ side channel attacks- equals the tsx=auto command line parameter. ++endchoice ++ + config EFI + bool "EFI runtime service support" + depends on ACPI +--- a/arch/x86/kernel/cpu/tsx.c ++++ b/arch/x86/kernel/cpu/tsx.c +@@ -73,6 +73,14 @@ static bool __init tsx_ctrl_is_supported + return !!(ia32_cap & ARCH_CAP_TSX_CTRL_MSR); + } + ++static enum tsx_ctrl_states x86_get_tsx_auto_mode(void) ++{ ++ if (boot_cpu_has_bug(X86_BUG_TAA)) ++ return TSX_CTRL_DISABLE; ++ ++ return TSX_CTRL_ENABLE; ++} ++ + void __init tsx_init(void) + { + char arg[5] = {}; +@@ -88,17 +96,19 @@ void __init tsx_init(void) + } else if (!strcmp(arg, "off")) { + tsx_ctrl_state = TSX_CTRL_DISABLE; + } else if (!strcmp(arg, "auto")) { +- if (boot_cpu_has_bug(X86_BUG_TAA)) +- tsx_ctrl_state = TSX_CTRL_DISABLE; +- else +- tsx_ctrl_state = TSX_CTRL_ENABLE; ++ tsx_ctrl_state = x86_get_tsx_auto_mode(); + } else { + tsx_ctrl_state = TSX_CTRL_DISABLE; + pr_err("tsx: invalid option, defaulting to off\n"); + } + } else { +- /* tsx= not provided, defaulting to off */ +- tsx_ctrl_state = TSX_CTRL_DISABLE; ++ /* tsx= not provided */ ++ if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_AUTO)) ++ tsx_ctrl_state = x86_get_tsx_auto_mode(); ++ else if (IS_ENABLED(CONFIG_X86_INTEL_TSX_MODE_OFF)) ++ tsx_ctrl_state = TSX_CTRL_DISABLE; ++ else ++ tsx_ctrl_state = TSX_CTRL_ENABLE; + } + + if (tsx_ctrl_state == TSX_CTRL_DISABLE) { diff --git a/debian/patches/bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch b/debian/patches/bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch new file mode 100644 index 000000000..7b58d708b --- /dev/null +++ b/debian/patches/bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch @@ -0,0 +1,44 @@ +From: Josh Poimboeuf +Date: Wed, 6 Nov 2019 20:26:46 -0600 +Subject: x86/speculation/taa: Fix printing of TAA_MSG_SMT on IBRS_ALL CPUs + +commit 012206a822a8b6ac09125bfaa210a95b9eb8f1c1 upstream + +For new IBRS_ALL CPUs, the Enhanced IBRS check at the beginning of +cpu_bugs_smt_update() causes the function to return early, unintentionally +skipping the MDS and TAA logic. + +This is not a problem for MDS, because there appears to be no overlap +between IBRS_ALL and MDS-affected CPUs. So the MDS mitigation would be +disabled and nothing would need to be done in this function anyway. + +But for TAA, the TAA_MSG_SMT string will never get printed on Cascade +Lake and newer. + +The check is superfluous anyway: when 'spectre_v2_enabled' is +SPECTRE_V2_IBRS_ENHANCED, 'spectre_v2_user' is always +SPECTRE_V2_USER_NONE, and so the 'spectre_v2_user' switch statement +handles it appropriately by doing nothing. So just remove the check. + +Fixes: 1b42f017415b ("x86/speculation/taa: Add mitigation for TSX Async Abort") +Signed-off-by: Josh Poimboeuf +Signed-off-by: Thomas Gleixner +Reviewed-by: Tyler Hicks +Reviewed-by: Borislav Petkov +--- + arch/x86/kernel/cpu/bugs.c | 4 ---- + 1 file changed, 4 deletions(-) + +--- a/arch/x86/kernel/cpu/bugs.c ++++ b/arch/x86/kernel/cpu/bugs.c +@@ -874,10 +874,6 @@ static void update_mds_branch_idle(void) + + void arch_smt_update(void) + { +- /* Enhanced IBRS implies STIBP. No update required. */ +- if (spectre_v2_enabled == SPECTRE_V2_IBRS_ENHANCED) +- return; +- + mutex_lock(&spec_ctrl_mutex); + + switch (spectre_v2_user) { diff --git a/debian/patches/series b/debian/patches/series index 27b16c88b..d59fd8b2d 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -159,16 +159,6 @@ features/all/db-mok-keyring/0003-MODSIGN-checking-the-blacklisted-hash-before-lo features/all/db-mok-keyring/0004-MODSIGN-check-the-attributes-of-db-and-mok.patch features/all/db-mok-keyring/modsign-make-shash-allocation-failure-fatal.patch -# Security fixes -debian/i386-686-pae-pci-set-pci-nobios-by-default.patch -debian/ntfs-mark-it-as-broken.patch -bugfix/all/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch -bugfix/all/ALSA-usb-audio-Fix-an-OOB-bug-in-parse_audio_mixer_unit.patch -bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.patch -bugfix/all/vhost-make-sure-log_num-in_num.patch -bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch -bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch - # Fix exported symbol versions bugfix/all/module-disable-matching-missing-version-crc.patch @@ -304,5 +294,53 @@ features/arm64/arm64-dts-broadcom-Add-reference-to-RPi-3-A-Plus.patch features/arm/ARM-dts-bcm283x-Correct-vchiq-compatible-string.patch features/arm/staging-vc04_services-Use-correct-cache-line-size.patch +# Security fixes +debian/i386-686-pae-pci-set-pci-nobios-by-default.patch +debian/ntfs-mark-it-as-broken.patch +bugfix/all/netfilter-conntrack-use-consistent-ct-id-hash-calcul.patch +bugfix/all/ALSA-usb-audio-Fix-an-OOB-bug-in-parse_audio_mixer_unit.patch +bugfix/all/ALSA-usb-audio-Fix-a-stack-buffer-overflow-bug-in-check_input_term.patch +bugfix/all/vhost-make-sure-log_num-in_num.patch +bugfix/x86/x86-ptrace-fix-up-botched-merge-of-spectrev1-fix.patch +bugfix/all/KVM-coalesced_mmio-add-bounds-checking.patch +bugfix/x86/taa/0001-KVM-x86-use-Intel-speculation-bugs-and-features-as-d.patch +bugfix/x86/taa/0002-x86-msr-Add-the-IA32_TSX_CTRL-MSR.patch +bugfix/x86/taa/0003-x86-cpu-Add-a-helper-function-x86_read_arch_cap_msr.patch +bugfix/x86/taa/0004-x86-cpu-Add-a-tsx-cmdline-option-with-TSX-disabled-b.patch +bugfix/x86/taa/0005-x86-speculation-taa-Add-mitigation-for-TSX-Async-Abo.patch +bugfix/x86/taa/0006-x86-speculation-taa-Add-sysfs-reporting-for-TSX-Asyn.patch +bugfix/x86/taa/0007-kvm-x86-Export-MDS_NO-0-to-guests-when-TSX-is-enable.patch +bugfix/x86/taa/0008-x86-tsx-Add-auto-option-to-the-tsx-cmdline-parameter.patch +bugfix/x86/taa/0009-x86-speculation-taa-Add-documentation-for-TSX-Async-.patch +bugfix/x86/taa/0010-x86-tsx-Add-config-options-to-set-tsx-on-off-auto.patch +bugfix/x86/taa/0015-x86-speculation-taa-Fix-printing-of-TAA_MSG_SMT-on-I.patch +bugfix/x86/itlb_multihit/0011-x86-bugs-Add-ITLB_MULTIHIT-bug-infrastructure.patch +bugfix/x86/itlb_multihit/0013-cpu-speculation-Uninline-and-export-CPU-mitigations-.patch +bugfix/x86/itlb_multihit/0014-Documentation-Add-ITLB_MULTIHIT-documentation.patch +bugfix/x86/itlb_multihit/0016-kvm-x86-powerpc-do-not-allow-clearing-largepages-deb.patch +bugfix/x86/itlb_multihit/0017-kvm-Convert-kvm_lock-to-a-mutex.patch +bugfix/x86/itlb_multihit/0018-kvm-mmu-Do-not-release-the-page-inside-mmu_set_spte.patch +bugfix/x86/itlb_multihit/0019-KVM-x86-make-FNAME-fetch-and-__direct_map-more-simil.patch +bugfix/x86/itlb_multihit/0020-KVM-x86-remove-now-unneeded-hugepage-gfn-adjustment.patch +bugfix/x86/itlb_multihit/0021-KVM-x86-change-kvm_mmu_page_get_gfn-BUG_ON-to-WARN_O.patch +bugfix/x86/itlb_multihit/0022-KVM-x86-add-tracepoints-around-__direct_map-and-FNAM.patch +bugfix/x86/itlb_multihit/0023-KVM-vmx-svm-always-run-with-EFER.NXE-1-when-shadow-p.patch +bugfix/x86/itlb_multihit/0024-kvm-mmu-ITLB_MULTIHIT-mitigation.patch +bugfix/x86/itlb_multihit/0025-kvm-Add-helper-function-for-creating-VM-worker-threa.patch +bugfix/x86/itlb_multihit/0026-kvm-x86-mmu-Recovery-of-shattered-NX-large-pages.patch +bugfix/x86/i915/0001-drm-i915-Rename-gen7-cmdparser-tables.patch +bugfix/x86/i915/0002-drm-i915-Disable-Secure-Batches-for-gen6.patch +bugfix/x86/i915/0003-drm-i915-Remove-Master-tables-from-cmdparser.patch +bugfix/x86/i915/0004-drm-i915-Add-support-for-mandatory-cmdparsing.patch +bugfix/x86/i915/0005-drm-i915-Support-ro-ppgtt-mapped-cmdparser-shadow-bu.patch +bugfix/x86/i915/0006-drm-i915-Allow-parsing-of-unsized-batches.patch +bugfix/x86/i915/0007-drm-i915-Add-gen9-BCS-cmdparsing.patch +bugfix/x86/i915/0008-drm-i915-cmdparser-Use-explicit-goto-for-error-paths.patch +bugfix/x86/i915/0009-drm-i915-cmdparser-Add-support-for-backward-jumps.patch +bugfix/x86/i915/0010-drm-i915-cmdparser-Ignore-Length-operands-during-com.patch +bugfix/x86/i915/0011-drm-i915-Lower-RM-timeout-to-avoid-DSI-hard-hangs.patch +bugfix/x86/i915/0012-drm-i915-gen8-Add-RC6-CTX-corruption-WA.patch +bugfix/x86/i915/drm-i915-cmdparser-fix-jump-whitelist-clearing.patch + # ABI maintenance debian/abi/powerpc-avoid-abi-change-for-disabling-tm.patch