From 5b76884dc265e8166262bc834bdad7af13f57881 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Mon, 15 Feb 2016 22:56:18 +0000 Subject: [PATCH] [amd64] efi: Build our own page table structure This avoids adding W+X pages to the default page table, which is not only bad for security but also now triggers a warning on boot. --- debian/changelog | 2 + ...-build-our-own-page-table-structures.patch | 319 ++++++++++++++++++ ...ge-table-switching-code-into-efi_cal.patch | 215 ++++++++++++ ...into-the-identity-page-table-for-mix.patch | 71 ++++ ...parate-efi-page-tables-in-kexec-path.patch | 83 +++++ ...n-the-_end-symbol-to-avoid-pfn-conve.patch | 53 +++ ...e-cpa-pfn-only-contains-page-frame-n.patch | 144 ++++++++ debian/patches/series | 6 + 8 files changed, 893 insertions(+) create mode 100644 debian/patches/bugfix/x86/x86-efi-build-our-own-page-table-structures.patch create mode 100644 debian/patches/bugfix/x86/x86-efi-hoist-page-table-switching-code-into-efi_cal.patch create mode 100644 debian/patches/bugfix/x86/x86-efi-map-ram-into-the-identity-page-table-for-mix.patch create mode 100644 debian/patches/bugfix/x86/x86-efi-setup-separate-efi-page-tables-in-kexec-path.patch create mode 100644 debian/patches/bugfix/x86/x86-mm-page-align-the-_end-symbol-to-avoid-pfn-conve.patch create mode 100644 debian/patches/bugfix/x86/x86-mm-pat-ensure-cpa-pfn-only-contains-page-frame-n.patch diff --git a/debian/changelog b/debian/changelog index dd26cde3b..25f873941 100644 --- a/debian/changelog +++ b/debian/changelog @@ -31,6 +31,8 @@ linux (4.4.1-1) UNRELEASED; urgency=medium * udeb: Move most USB wireless drivers from nic-usb-modules to nic-wireless-modules * udeb: Really add virtio_input to virtio-modules (not input-modules) + * [x86] Fix issues resulting in W+X pages: + - [amd64] efi: Build our own page table structure [ Roger Shimizu ] * Enable TTY_PRINTK as module (Closes: #814540). diff --git a/debian/patches/bugfix/x86/x86-efi-build-our-own-page-table-structures.patch b/debian/patches/bugfix/x86/x86-efi-build-our-own-page-table-structures.patch new file mode 100644 index 000000000..2c61994ef --- /dev/null +++ b/debian/patches/bugfix/x86/x86-efi-build-our-own-page-table-structures.patch @@ -0,0 +1,319 @@ +From: Matt Fleming +Date: Fri, 27 Nov 2015 21:09:34 +0000 +Subject: [5/5] x86/efi: Build our own page table structures +Origin: https://git.kernel.org/cgit/linux/kernel/git/mfleming/efi.git/commit?id=67a9108ed4313b85a9c53406d80dc1ae3f8c3e36 + +With commit e1a58320a38d ("x86/mm: Warn on W^X mappings") all +users booting on 64-bit UEFI machines see the following warning, + + ------------[ cut here ]------------ + WARNING: CPU: 7 PID: 1 at arch/x86/mm/dump_pagetables.c:225 note_page+0x5dc/0x780() + x86/mm: Found insecure W+X mapping at address ffff88000005f000/0xffff88000005f000 + ... + x86/mm: Checked W+X mappings: FAILED, 165660 W+X pages found. + ... + +This is caused by mapping EFI regions with RWX permissions. +There isn't much we can do to restrict the permissions for these +regions due to the way the firmware toolchains mix code and +data, but we can at least isolate these mappings so that they do +not appear in the regular kernel page tables. + +In commit d2f7cbe7b26a ("x86/efi: Runtime services virtual +mapping") we started using 'trampoline_pgd' to map the EFI +regions because there was an existing identity mapping there +which we use during the SetVirtualAddressMap() call and for +broken firmware that accesses those addresses. + +But 'trampoline_pgd' shares some PGD entries with +'swapper_pg_dir' and does not provide the isolation we require. +Notably the virtual address for __START_KERNEL_map and +MODULES_START are mapped by the same PGD entry so we need to be +more careful when copying changes over in +efi_sync_low_kernel_mappings(). + +This patch doesn't go the full mile, we still want to share some +PGD entries with 'swapper_pg_dir'. Having completely separate +page tables brings its own issues such as synchronising new +mappings after memory hotplug and module loading. Sharing also +keeps memory usage down. + +Signed-off-by: Matt Fleming +Reviewed-by: Borislav Petkov +Acked-by: Borislav Petkov +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Andy Lutomirski +Cc: Ard Biesheuvel +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Jones +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sai Praneeth Prakhya +Cc: Stephen Smalley +Cc: Thomas Gleixner +Cc: Toshi Kani +Cc: linux-efi@vger.kernel.org +Link: http://lkml.kernel.org/r/1448658575-17029-6-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +--- + arch/x86/include/asm/efi.h | 1 + + arch/x86/platform/efi/efi.c | 39 ++++++----------- + arch/x86/platform/efi/efi_32.c | 5 +++ + arch/x86/platform/efi/efi_64.c | 97 +++++++++++++++++++++++++++++++++++------- + 4 files changed, 102 insertions(+), 40 deletions(-) + +diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h +index 347eeacb06a8..8fd9e637629a 100644 +--- a/arch/x86/include/asm/efi.h ++++ b/arch/x86/include/asm/efi.h +@@ -136,6 +136,7 @@ extern void __init efi_memory_uc(u64 addr, unsigned long size); + extern void __init efi_map_region(efi_memory_desc_t *md); + extern void __init efi_map_region_fixed(efi_memory_desc_t *md); + extern void efi_sync_low_kernel_mappings(void); ++extern int __init efi_alloc_page_tables(void); + extern int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages); + extern void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages); + extern void __init old_map_region(efi_memory_desc_t *md); +diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c +index ad285404ea7f..3c1f3cd7b2ba 100644 +--- a/arch/x86/platform/efi/efi.c ++++ b/arch/x86/platform/efi/efi.c +@@ -869,7 +869,7 @@ static void __init kexec_enter_virtual_mode(void) + * This function will switch the EFI runtime services to virtual mode. + * Essentially, we look through the EFI memmap and map every region that + * has the runtime attribute bit set in its memory descriptor into the +- * ->trampoline_pgd page table using a top-down VA allocation scheme. ++ * efi_pgd page table. + * + * The old method which used to update that memory descriptor with the + * virtual address obtained from ioremap() is still supported when the +@@ -879,8 +879,8 @@ static void __init kexec_enter_virtual_mode(void) + * + * The new method does a pagetable switch in a preemption-safe manner + * so that we're in a different address space when calling a runtime +- * function. For function arguments passing we do copy the PGDs of the +- * kernel page table into ->trampoline_pgd prior to each call. ++ * function. For function arguments passing we do copy the PUDs of the ++ * kernel page table into efi_pgd prior to each call. + * + * Specially for kexec boot, efi runtime maps in previous kernel should + * be passed in via setup_data. In that case runtime ranges will be mapped +@@ -895,6 +895,12 @@ static void __init __efi_enter_virtual_mode(void) + + efi.systab = NULL; + ++ if (efi_alloc_page_tables()) { ++ pr_err("Failed to allocate EFI page tables\n"); ++ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); ++ return; ++ } ++ + efi_merge_regions(); + new_memmap = efi_map_regions(&count, &pg_shift); + if (!new_memmap) { +@@ -954,28 +960,11 @@ static void __init __efi_enter_virtual_mode(void) + efi_runtime_mkexec(); + + /* +- * We mapped the descriptor array into the EFI pagetable above but we're +- * not unmapping it here. Here's why: +- * +- * We're copying select PGDs from the kernel page table to the EFI page +- * table and when we do so and make changes to those PGDs like unmapping +- * stuff from them, those changes appear in the kernel page table and we +- * go boom. +- * +- * From setup_real_mode(): +- * +- * ... +- * trampoline_pgd[0] = init_level4_pgt[pgd_index(__PAGE_OFFSET)].pgd; +- * +- * In this particular case, our allocation is in PGD 0 of the EFI page +- * table but we've copied that PGD from PGD[272] of the EFI page table: +- * +- * pgd_index(__PAGE_OFFSET = 0xffff880000000000) = 272 +- * +- * where the direct memory mapping in kernel space is. +- * +- * new_memmap's VA comes from that direct mapping and thus clearing it, +- * it would get cleared in the kernel page table too. ++ * We mapped the descriptor array into the EFI pagetable above ++ * but we're not unmapping it here because if we're running in ++ * EFI mixed mode we need all of memory to be accessible when ++ * we pass parameters to the EFI runtime services in the ++ * thunking code. + * + * efi_cleanup_page_tables(__pa(new_memmap), 1 << pg_shift); + */ +diff --git a/arch/x86/platform/efi/efi_32.c b/arch/x86/platform/efi/efi_32.c +index ed5b67338294..58d669bc8250 100644 +--- a/arch/x86/platform/efi/efi_32.c ++++ b/arch/x86/platform/efi/efi_32.c +@@ -38,6 +38,11 @@ + * say 0 - 3G. + */ + ++int __init efi_alloc_page_tables(void) ++{ ++ return 0; ++} ++ + void efi_sync_low_kernel_mappings(void) {} + void __init efi_dump_pagetable(void) {} + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c +index b19cdac959b2..4897f518760f 100644 +--- a/arch/x86/platform/efi/efi_64.c ++++ b/arch/x86/platform/efi/efi_64.c +@@ -40,6 +40,7 @@ + #include + #include + #include ++#include + + /* + * We allocate runtime services regions bottom-up, starting from -4G, i.e. +@@ -121,22 +122,92 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) + early_code_mapping_set_exec(0); + } + ++static pgd_t *efi_pgd; ++ ++/* ++ * We need our own copy of the higher levels of the page tables ++ * because we want to avoid inserting EFI region mappings (EFI_VA_END ++ * to EFI_VA_START) into the standard kernel page tables. Everything ++ * else can be shared, see efi_sync_low_kernel_mappings(). ++ */ ++int __init efi_alloc_page_tables(void) ++{ ++ pgd_t *pgd; ++ pud_t *pud; ++ gfp_t gfp_mask; ++ ++ if (efi_enabled(EFI_OLD_MEMMAP)) ++ return 0; ++ ++ gfp_mask = GFP_KERNEL | __GFP_NOTRACK | __GFP_REPEAT | __GFP_ZERO; ++ efi_pgd = (pgd_t *)__get_free_page(gfp_mask); ++ if (!efi_pgd) ++ return -ENOMEM; ++ ++ pgd = efi_pgd + pgd_index(EFI_VA_END); ++ ++ pud = pud_alloc_one(NULL, 0); ++ if (!pud) { ++ free_page((unsigned long)efi_pgd); ++ return -ENOMEM; ++ } ++ ++ pgd_populate(NULL, pgd, pud); ++ ++ return 0; ++} ++ + /* + * Add low kernel mappings for passing arguments to EFI functions. + */ + void efi_sync_low_kernel_mappings(void) + { +- unsigned num_pgds; +- pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); ++ unsigned num_entries; ++ pgd_t *pgd_k, *pgd_efi; ++ pud_t *pud_k, *pud_efi; + + if (efi_enabled(EFI_OLD_MEMMAP)) + return; + +- num_pgds = pgd_index(MODULES_END - 1) - pgd_index(PAGE_OFFSET); ++ /* ++ * We can share all PGD entries apart from the one entry that ++ * covers the EFI runtime mapping space. ++ * ++ * Make sure the EFI runtime region mappings are guaranteed to ++ * only span a single PGD entry and that the entry also maps ++ * other important kernel regions. ++ */ ++ BUILD_BUG_ON(pgd_index(EFI_VA_END) != pgd_index(MODULES_END)); ++ BUILD_BUG_ON((EFI_VA_START & PGDIR_MASK) != ++ (EFI_VA_END & PGDIR_MASK)); ++ ++ pgd_efi = efi_pgd + pgd_index(PAGE_OFFSET); ++ pgd_k = pgd_offset_k(PAGE_OFFSET); ++ ++ num_entries = pgd_index(EFI_VA_END) - pgd_index(PAGE_OFFSET); ++ memcpy(pgd_efi, pgd_k, sizeof(pgd_t) * num_entries); + +- memcpy(pgd + pgd_index(PAGE_OFFSET), +- init_mm.pgd + pgd_index(PAGE_OFFSET), +- sizeof(pgd_t) * num_pgds); ++ /* ++ * We share all the PUD entries apart from those that map the ++ * EFI regions. Copy around them. ++ */ ++ BUILD_BUG_ON((EFI_VA_START & ~PUD_MASK) != 0); ++ BUILD_BUG_ON((EFI_VA_END & ~PUD_MASK) != 0); ++ ++ pgd_efi = efi_pgd + pgd_index(EFI_VA_END); ++ pud_efi = pud_offset(pgd_efi, 0); ++ ++ pgd_k = pgd_offset_k(EFI_VA_END); ++ pud_k = pud_offset(pgd_k, 0); ++ ++ num_entries = pud_index(EFI_VA_END); ++ memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); ++ ++ pud_efi = pud_offset(pgd_efi, EFI_VA_START); ++ pud_k = pud_offset(pgd_k, EFI_VA_START); ++ ++ num_entries = PTRS_PER_PUD - pud_index(EFI_VA_START); ++ memcpy(pud_efi, pud_k, sizeof(pud_t) * num_entries); + } + + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) +@@ -150,8 +221,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) + if (efi_enabled(EFI_OLD_MEMMAP)) + return 0; + +- efi_scratch.efi_pgt = (pgd_t *)(unsigned long)real_mode_header->trampoline_pgd; +- pgd = __va(efi_scratch.efi_pgt); ++ efi_scratch.efi_pgt = (pgd_t *)__pa(efi_pgd); ++ pgd = efi_pgd; + + /* + * It can happen that the physical address of new_memmap lands in memory +@@ -216,16 +287,14 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) + + void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) + { +- pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); +- +- kernel_unmap_pages_in_pgd(pgd, pa_memmap, num_pages); ++ kernel_unmap_pages_in_pgd(efi_pgd, pa_memmap, num_pages); + } + + static void __init __map_region(efi_memory_desc_t *md, u64 va) + { +- pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); + unsigned long flags = 0; + unsigned long pfn; ++ pgd_t *pgd = efi_pgd; + + if (!(md->attribute & EFI_MEMORY_WB)) + flags |= _PAGE_PCD; +@@ -334,9 +403,7 @@ void __init efi_runtime_mkexec(void) + void __init efi_dump_pagetable(void) + { + #ifdef CONFIG_EFI_PGT_DUMP +- pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); +- +- ptdump_walk_pgd_level(NULL, pgd); ++ ptdump_walk_pgd_level(NULL, efi_pgd); + #endif + } + diff --git a/debian/patches/bugfix/x86/x86-efi-hoist-page-table-switching-code-into-efi_cal.patch b/debian/patches/bugfix/x86/x86-efi-hoist-page-table-switching-code-into-efi_cal.patch new file mode 100644 index 000000000..4b3230ebc --- /dev/null +++ b/debian/patches/bugfix/x86/x86-efi-hoist-page-table-switching-code-into-efi_cal.patch @@ -0,0 +1,215 @@ +From: Matt Fleming +Date: Fri, 27 Nov 2015 21:09:33 +0000 +Subject: [4/5] x86/efi: Hoist page table switching code into efi_call_virt() +Origin: https://git.kernel.org/cgit/linux/kernel/git/mfleming/efi.git/commit?id=c9f2a9a65e4855b74d92cdad688f6ee4a1a323ff + +This change is a prerequisite for pending patches that switch to +a dedicated EFI page table, instead of using 'trampoline_pgd' +which shares PGD entries with 'swapper_pg_dir'. The pending +patches make it impossible to dereference the runtime service +function pointer without first switching %cr3. + +It's true that we now have duplicated switching code in +efi_call_virt() and efi_call_phys_{prolog,epilog}() but we are +sacrificing code duplication for a little more clarity and the +ease of writing the page table switching code in C instead of +asm. + +Signed-off-by: Matt Fleming +Reviewed-by: Borislav Petkov +Acked-by: Borislav Petkov +Cc: Andrew Morton +Cc: Andy Lutomirski +Cc: Andy Lutomirski +Cc: Ard Biesheuvel +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Jones +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sai Praneeth Prakhya +Cc: Stephen Smalley +Cc: Thomas Gleixner +Cc: Toshi Kani +Cc: linux-efi@vger.kernel.org +Link: http://lkml.kernel.org/r/1448658575-17029-5-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +--- + arch/x86/include/asm/efi.h | 25 +++++++++++++++++++++ + arch/x86/platform/efi/efi_64.c | 24 ++++++++++----------- + arch/x86/platform/efi/efi_stub_64.S | 43 ------------------------------------- + 3 files changed, 36 insertions(+), 56 deletions(-) + +diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h +index 0010c78c4998..347eeacb06a8 100644 +--- a/arch/x86/include/asm/efi.h ++++ b/arch/x86/include/asm/efi.h +@@ -3,6 +3,7 @@ + + #include + #include ++#include + + /* + * We map the EFI regions needed for runtime services non-contiguously, +@@ -64,6 +65,17 @@ extern u64 asmlinkage efi_call(void *fp, ...); + + #define efi_call_phys(f, args...) efi_call((f), args) + ++/* ++ * Scratch space used for switching the pagetable in the EFI stub ++ */ ++struct efi_scratch { ++ u64 r15; ++ u64 prev_cr3; ++ pgd_t *efi_pgt; ++ bool use_pgd; ++ u64 phys_stack; ++} __packed; ++ + #define efi_call_virt(f, ...) \ + ({ \ + efi_status_t __s; \ +@@ -71,7 +83,20 @@ extern u64 asmlinkage efi_call(void *fp, ...); + efi_sync_low_kernel_mappings(); \ + preempt_disable(); \ + __kernel_fpu_begin(); \ ++ \ ++ if (efi_scratch.use_pgd) { \ ++ efi_scratch.prev_cr3 = read_cr3(); \ ++ write_cr3((unsigned long)efi_scratch.efi_pgt); \ ++ __flush_tlb_all(); \ ++ } \ ++ \ + __s = efi_call((void *)efi.systab->runtime->f, __VA_ARGS__); \ ++ \ ++ if (efi_scratch.use_pgd) { \ ++ write_cr3(efi_scratch.prev_cr3); \ ++ __flush_tlb_all(); \ ++ } \ ++ \ + __kernel_fpu_end(); \ + preempt_enable(); \ + __s; \ +diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c +index 102976dda8c4..b19cdac959b2 100644 +--- a/arch/x86/platform/efi/efi_64.c ++++ b/arch/x86/platform/efi/efi_64.c +@@ -47,16 +47,7 @@ + */ + static u64 efi_va = EFI_VA_START; + +-/* +- * Scratch space used for switching the pagetable in the EFI stub +- */ +-struct efi_scratch { +- u64 r15; +- u64 prev_cr3; +- pgd_t *efi_pgt; +- bool use_pgd; +- u64 phys_stack; +-} __packed; ++struct efi_scratch efi_scratch; + + static void __init early_code_mapping_set_exec(int executable) + { +@@ -83,8 +74,11 @@ pgd_t * __init efi_call_phys_prolog(void) + int pgd; + int n_pgds; + +- if (!efi_enabled(EFI_OLD_MEMMAP)) +- return NULL; ++ if (!efi_enabled(EFI_OLD_MEMMAP)) { ++ save_pgd = (pgd_t *)read_cr3(); ++ write_cr3((unsigned long)efi_scratch.efi_pgt); ++ goto out; ++ } + + early_code_mapping_set_exec(1); + +@@ -96,6 +90,7 @@ pgd_t * __init efi_call_phys_prolog(void) + vaddress = (unsigned long)__va(pgd * PGDIR_SIZE); + set_pgd(pgd_offset_k(pgd * PGDIR_SIZE), *pgd_offset_k(vaddress)); + } ++out: + __flush_tlb_all(); + + return save_pgd; +@@ -109,8 +104,11 @@ void __init efi_call_phys_epilog(pgd_t *save_pgd) + int pgd_idx; + int nr_pgds; + +- if (!save_pgd) ++ if (!efi_enabled(EFI_OLD_MEMMAP)) { ++ write_cr3((unsigned long)save_pgd); ++ __flush_tlb_all(); + return; ++ } + + nr_pgds = DIV_ROUND_UP((max_pfn << PAGE_SHIFT) , PGDIR_SIZE); + +diff --git a/arch/x86/platform/efi/efi_stub_64.S b/arch/x86/platform/efi/efi_stub_64.S +index 86d0f9e08dd9..32020cb8bb08 100644 +--- a/arch/x86/platform/efi/efi_stub_64.S ++++ b/arch/x86/platform/efi/efi_stub_64.S +@@ -38,41 +38,6 @@ + mov %rsi, %cr0; \ + mov (%rsp), %rsp + +- /* stolen from gcc */ +- .macro FLUSH_TLB_ALL +- movq %r15, efi_scratch(%rip) +- movq %r14, efi_scratch+8(%rip) +- movq %cr4, %r15 +- movq %r15, %r14 +- andb $0x7f, %r14b +- movq %r14, %cr4 +- movq %r15, %cr4 +- movq efi_scratch+8(%rip), %r14 +- movq efi_scratch(%rip), %r15 +- .endm +- +- .macro SWITCH_PGT +- cmpb $0, efi_scratch+24(%rip) +- je 1f +- movq %r15, efi_scratch(%rip) # r15 +- # save previous CR3 +- movq %cr3, %r15 +- movq %r15, efi_scratch+8(%rip) # prev_cr3 +- movq efi_scratch+16(%rip), %r15 # EFI pgt +- movq %r15, %cr3 +- 1: +- .endm +- +- .macro RESTORE_PGT +- cmpb $0, efi_scratch+24(%rip) +- je 2f +- movq efi_scratch+8(%rip), %r15 +- movq %r15, %cr3 +- movq efi_scratch(%rip), %r15 +- FLUSH_TLB_ALL +- 2: +- .endm +- + ENTRY(efi_call) + SAVE_XMM + mov (%rsp), %rax +@@ -83,16 +48,8 @@ ENTRY(efi_call) + mov %r8, %r9 + mov %rcx, %r8 + mov %rsi, %rcx +- SWITCH_PGT + call *%rdi +- RESTORE_PGT + addq $48, %rsp + RESTORE_XMM + ret + ENDPROC(efi_call) +- +- .data +-ENTRY(efi_scratch) +- .fill 3,8,0 +- .byte 0 +- .quad 0 diff --git a/debian/patches/bugfix/x86/x86-efi-map-ram-into-the-identity-page-table-for-mix.patch b/debian/patches/bugfix/x86/x86-efi-map-ram-into-the-identity-page-table-for-mix.patch new file mode 100644 index 000000000..8d2bddcdc --- /dev/null +++ b/debian/patches/bugfix/x86/x86-efi-map-ram-into-the-identity-page-table-for-mix.patch @@ -0,0 +1,71 @@ +From: Matt Fleming +Date: Fri, 27 Nov 2015 21:09:32 +0000 +Subject: [3/5] x86/efi: Map RAM into the identity page table for mixed mode +Origin: https://git.kernel.org/cgit/linux/kernel/git/mfleming/efi.git/commit?id=b61a76f8850d2979550abc42d7e09154ebb8d785 + +We are relying on the pre-existing mappings in 'trampoline_pgd' +when accessing function arguments in the EFI mixed mode thunking +code. + +Instead let's map memory explicitly so that things will continue +to work when we move to a separate page table in the future. + +Signed-off-by: Matt Fleming +Reviewed-by: Borislav Petkov +Acked-by: Borislav Petkov +Cc: Andy Lutomirski +Cc: Ard Biesheuvel +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sai Praneeth Prakhya +Cc: Thomas Gleixner +Cc: Toshi Kani +Cc: linux-efi@vger.kernel.org +Link: http://lkml.kernel.org/r/1448658575-17029-4-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +--- + arch/x86/platform/efi/efi_64.c | 20 ++++++++++++++++++++ + 1 file changed, 20 insertions(+) + +diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c +index 5aa186db59e3..102976dda8c4 100644 +--- a/arch/x86/platform/efi/efi_64.c ++++ b/arch/x86/platform/efi/efi_64.c +@@ -144,6 +144,7 @@ void efi_sync_low_kernel_mappings(void) + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) + { + unsigned long pfn, text; ++ efi_memory_desc_t *md; + struct page *page; + unsigned npages; + pgd_t *pgd; +@@ -177,6 +178,25 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) + if (!IS_ENABLED(CONFIG_EFI_MIXED)) + return 0; + ++ /* ++ * Map all of RAM so that we can access arguments in the 1:1 ++ * mapping when making EFI runtime calls. ++ */ ++ for_each_efi_memory_desc(&memmap, md) { ++ if (md->type != EFI_CONVENTIONAL_MEMORY && ++ md->type != EFI_LOADER_DATA && ++ md->type != EFI_LOADER_CODE) ++ continue; ++ ++ pfn = md->phys_addr >> PAGE_SHIFT; ++ npages = md->num_pages; ++ ++ if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, npages, 0)) { ++ pr_err("Failed to map 1:1 memory\n"); ++ return 1; ++ } ++ } ++ + page = alloc_page(GFP_KERNEL|__GFP_DMA32); + if (!page) + panic("Unable to allocate EFI runtime stack < 4GB\n"); diff --git a/debian/patches/bugfix/x86/x86-efi-setup-separate-efi-page-tables-in-kexec-path.patch b/debian/patches/bugfix/x86/x86-efi-setup-separate-efi-page-tables-in-kexec-path.patch new file mode 100644 index 000000000..b95ee8f3a --- /dev/null +++ b/debian/patches/bugfix/x86/x86-efi-setup-separate-efi-page-tables-in-kexec-path.patch @@ -0,0 +1,83 @@ +From: Matt Fleming +Date: Thu, 21 Jan 2016 14:11:59 +0000 +Subject: x86/efi: Setup separate EFI page tables in kexec paths +Origin: https://git.kernel.org/cgit/linux/kernel/git/mfleming/efi.git/commit?id=753b11ef8e92a1c1bbe97f2a5ec14bdd1ef2e6fe + +The switch to using a new dedicated page table for EFI runtime +calls in commit commit 67a9108ed431 ("x86/efi: Build our own +page table structures") failed to take into account changes +required for the kexec code paths, which are unfortunately +duplicated in the EFI code. + +Call the allocation and setup functions in +kexec_enter_virtual_mode() just like we do for +__efi_enter_virtual_mode() to avoid hitting NULL-pointer +dereferences when making EFI runtime calls. + +At the very least, the call to efi_setup_page_tables() should +have existed for kexec before the following commit: + + 67a9108ed431 ("x86/efi: Build our own page table structures") + +Things just magically worked because we were actually using +the kernel's page tables that contained the required mappings. + +Reported-by: Srikar Dronamraju +Tested-by: Srikar Dronamraju +Signed-off-by: Matt Fleming +Cc: Andy Lutomirski +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Young +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Raghavendra K T +Cc: Thomas Gleixner +Link: http://lkml.kernel.org/r/1453385519-11477-1-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +--- + arch/x86/platform/efi/efi.c | 15 +++++++++++++++ + 1 file changed, 15 insertions(+) + +diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c +index 3c1f3cd7b2ba..bdd9477f937c 100644 +--- a/arch/x86/platform/efi/efi.c ++++ b/arch/x86/platform/efi/efi.c +@@ -815,6 +815,7 @@ static void __init kexec_enter_virtual_mode(void) + { + #ifdef CONFIG_KEXEC_CORE + efi_memory_desc_t *md; ++ unsigned int num_pages; + void *p; + + efi.systab = NULL; +@@ -829,6 +830,12 @@ static void __init kexec_enter_virtual_mode(void) + return; + } + ++ if (efi_alloc_page_tables()) { ++ pr_err("Failed to allocate EFI page tables\n"); ++ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); ++ return; ++ } ++ + /* + * Map efi regions which were passed via setup_data. The virt_addr is a + * fixed addr which was used in first kernel of a kexec boot. +@@ -843,6 +850,14 @@ static void __init kexec_enter_virtual_mode(void) + + BUG_ON(!efi.systab); + ++ num_pages = ALIGN(memmap.nr_map * memmap.desc_size, PAGE_SIZE); ++ num_pages >>= PAGE_SHIFT; ++ ++ if (efi_setup_page_tables(memmap.phys_map, num_pages)) { ++ clear_bit(EFI_RUNTIME_SERVICES, &efi.flags); ++ return; ++ } ++ + efi_sync_low_kernel_mappings(); + + /* diff --git a/debian/patches/bugfix/x86/x86-mm-page-align-the-_end-symbol-to-avoid-pfn-conve.patch b/debian/patches/bugfix/x86/x86-mm-page-align-the-_end-symbol-to-avoid-pfn-conve.patch new file mode 100644 index 000000000..0fb24e077 --- /dev/null +++ b/debian/patches/bugfix/x86/x86-mm-page-align-the-_end-symbol-to-avoid-pfn-conve.patch @@ -0,0 +1,53 @@ +From: Matt Fleming +Date: Fri, 27 Nov 2015 21:09:30 +0000 +Subject: [1/5] x86/mm: Page align the '_end' symbol to avoid pfn conversion + bugs +Origin: https://git.kernel.org/cgit/linux/kernel/git/mfleming/efi.git/commit?id=21cdb6b568435738cc0b303b2b3b82742396310c + +Ingo noted that if we can guarantee _end is aligned to PAGE_SIZE +we can automatically avoid bugs along the lines of, + + size = _end - _text >> PAGE_SHIFT + +which is missing a call to PFN_ALIGN(). The EFI mixed mode +contains this bug, for example. + +_text is already aligned to PAGE_SIZE through the use of +LOAD_PHYSICAL_ADDR, and the BSS and BRK sections are explicitly +aligned in the linker script, so it makes sense to align _end to +match. + +Reported-by: Ingo Molnar +Signed-off-by: Matt Fleming +Acked-by: Borislav Petkov +Cc: Andy Lutomirski +Cc: Ard Biesheuvel +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sai Praneeth Prakhya +Cc: Thomas Gleixner +Cc: Toshi Kani +Cc: linux-efi@vger.kernel.org +Link: http://lkml.kernel.org/r/1448658575-17029-2-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +--- + arch/x86/kernel/vmlinux.lds.S | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S +index 74e4bf11f562..4f1994257a18 100644 +--- a/arch/x86/kernel/vmlinux.lds.S ++++ b/arch/x86/kernel/vmlinux.lds.S +@@ -325,6 +325,7 @@ SECTIONS + __brk_limit = .; + } + ++ . = ALIGN(PAGE_SIZE); + _end = .; + + STABS_DEBUG diff --git a/debian/patches/bugfix/x86/x86-mm-pat-ensure-cpa-pfn-only-contains-page-frame-n.patch b/debian/patches/bugfix/x86/x86-mm-pat-ensure-cpa-pfn-only-contains-page-frame-n.patch new file mode 100644 index 000000000..b7db9a3be --- /dev/null +++ b/debian/patches/bugfix/x86/x86-mm-pat-ensure-cpa-pfn-only-contains-page-frame-n.patch @@ -0,0 +1,144 @@ +From: Matt Fleming +Date: Fri, 27 Nov 2015 21:09:31 +0000 +Subject: [2/5] x86/mm/pat: Ensure cpa->pfn only contains page frame numbers +Origin: https://git.kernel.org/cgit/linux/kernel/git/mfleming/efi.git/commit?id=edc3b9129cecd0f0857112136f5b8b1bc1d45918 + +The x86 pageattr code is confused about the data that is stored +in cpa->pfn, sometimes it's treated as a page frame number, +sometimes it's treated as an unshifted physical address, and in +one place it's treated as a pte. + +The result of this is that the mapping functions do not map the +intended physical address. + +This isn't a problem in practice because most of the addresses +we're mapping in the EFI code paths are already mapped in +'trampoline_pgd' and so the pageattr mapping functions don't +actually do anything in this case. But when we move to using a +separate page table for the EFI runtime this will be an issue. + +Signed-off-by: Matt Fleming +Reviewed-by: Borislav Petkov +Acked-by: Borislav Petkov +Cc: Andy Lutomirski +Cc: Ard Biesheuvel +Cc: Borislav Petkov +Cc: Brian Gerst +Cc: Dave Hansen +Cc: Denys Vlasenko +Cc: H. Peter Anvin +Cc: Linus Torvalds +Cc: Peter Zijlstra +Cc: Sai Praneeth Prakhya +Cc: Thomas Gleixner +Cc: Toshi Kani +Cc: linux-efi@vger.kernel.org +Link: http://lkml.kernel.org/r/1448658575-17029-3-git-send-email-matt@codeblueprint.co.uk +Signed-off-by: Ingo Molnar +--- + arch/x86/mm/pageattr.c | 17 ++++++----------- + arch/x86/platform/efi/efi_64.c | 16 ++++++++++------ + 2 files changed, 16 insertions(+), 17 deletions(-) + +diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c +index a3137a4feed1..c70e42014101 100644 +--- a/arch/x86/mm/pageattr.c ++++ b/arch/x86/mm/pageattr.c +@@ -905,15 +905,10 @@ static void populate_pte(struct cpa_data *cpa, + pte = pte_offset_kernel(pmd, start); + + while (num_pages-- && start < end) { +- +- /* deal with the NX bit */ +- if (!(pgprot_val(pgprot) & _PAGE_NX)) +- cpa->pfn &= ~_PAGE_NX; +- +- set_pte(pte, pfn_pte(cpa->pfn >> PAGE_SHIFT, pgprot)); ++ set_pte(pte, pfn_pte(cpa->pfn, pgprot)); + + start += PAGE_SIZE; +- cpa->pfn += PAGE_SIZE; ++ cpa->pfn++; + pte++; + } + } +@@ -969,11 +964,11 @@ static int populate_pmd(struct cpa_data *cpa, + + pmd = pmd_offset(pud, start); + +- set_pmd(pmd, __pmd(cpa->pfn | _PAGE_PSE | ++ set_pmd(pmd, __pmd(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | + massage_pgprot(pmd_pgprot))); + + start += PMD_SIZE; +- cpa->pfn += PMD_SIZE; ++ cpa->pfn += PMD_SIZE >> PAGE_SHIFT; + cur_pages += PMD_SIZE >> PAGE_SHIFT; + } + +@@ -1042,11 +1037,11 @@ static int populate_pud(struct cpa_data *cpa, unsigned long start, pgd_t *pgd, + * Map everything starting from the Gb boundary, possibly with 1G pages + */ + while (end - start >= PUD_SIZE) { +- set_pud(pud, __pud(cpa->pfn | _PAGE_PSE | ++ set_pud(pud, __pud(cpa->pfn << PAGE_SHIFT | _PAGE_PSE | + massage_pgprot(pud_pgprot))); + + start += PUD_SIZE; +- cpa->pfn += PUD_SIZE; ++ cpa->pfn += PUD_SIZE >> PAGE_SHIFT; + cur_pages += PUD_SIZE >> PAGE_SHIFT; + pud++; + } +diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c +index a0ac0f9c307f..5aa186db59e3 100644 +--- a/arch/x86/platform/efi/efi_64.c ++++ b/arch/x86/platform/efi/efi_64.c +@@ -143,7 +143,7 @@ void efi_sync_low_kernel_mappings(void) + + int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) + { +- unsigned long text; ++ unsigned long pfn, text; + struct page *page; + unsigned npages; + pgd_t *pgd; +@@ -160,7 +160,8 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) + * and ident-map those pages containing the map before calling + * phys_efi_set_virtual_address_map(). + */ +- if (kernel_map_pages_in_pgd(pgd, pa_memmap, pa_memmap, num_pages, _PAGE_NX)) { ++ pfn = pa_memmap >> PAGE_SHIFT; ++ if (kernel_map_pages_in_pgd(pgd, pfn, pa_memmap, num_pages, _PAGE_NX)) { + pr_err("Error ident-mapping new memmap (0x%lx)!\n", pa_memmap); + return 1; + } +@@ -185,8 +186,9 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) + + npages = (_end - _text) >> PAGE_SHIFT; + text = __pa(_text); ++ pfn = text >> PAGE_SHIFT; + +- if (kernel_map_pages_in_pgd(pgd, text >> PAGE_SHIFT, text, npages, 0)) { ++ if (kernel_map_pages_in_pgd(pgd, pfn, text, npages, 0)) { + pr_err("Failed to map kernel text 1:1\n"); + return 1; + } +@@ -204,12 +206,14 @@ void __init efi_cleanup_page_tables(unsigned long pa_memmap, unsigned num_pages) + static void __init __map_region(efi_memory_desc_t *md, u64 va) + { + pgd_t *pgd = (pgd_t *)__va(real_mode_header->trampoline_pgd); +- unsigned long pf = 0; ++ unsigned long flags = 0; ++ unsigned long pfn; + + if (!(md->attribute & EFI_MEMORY_WB)) +- pf |= _PAGE_PCD; ++ flags |= _PAGE_PCD; + +- if (kernel_map_pages_in_pgd(pgd, md->phys_addr, va, md->num_pages, pf)) ++ pfn = md->phys_addr >> PAGE_SHIFT; ++ if (kernel_map_pages_in_pgd(pgd, pfn, va, md->num_pages, flags)) + pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, va); + } diff --git a/debian/patches/series b/debian/patches/series index 136807678..eaf3ec4de 100644 --- a/debian/patches/series +++ b/debian/patches/series @@ -124,3 +124,9 @@ bugfix/all/revert-workqueue-make-sure-delayed-work-run-in-local-cpu.patch bugfix/all/af_unix-don-t-set-err-in-unix_stream_read_generic-unless-there-was-an-error.patch bugfix/all/bpf-fix-branch-offset-adjustment-on-backjumps-after-.patch bugfix/all/alsa-usb-audio-avoid-freeing-umidi-object-twice.patch +bugfix/x86/x86-mm-page-align-the-_end-symbol-to-avoid-pfn-conve.patch +bugfix/x86/x86-mm-pat-ensure-cpa-pfn-only-contains-page-frame-n.patch +bugfix/x86/x86-efi-map-ram-into-the-identity-page-table-for-mix.patch +bugfix/x86/x86-efi-hoist-page-table-switching-code-into-efi_cal.patch +bugfix/x86/x86-efi-build-our-own-page-table-structures.patch +bugfix/x86/x86-efi-setup-separate-efi-page-tables-in-kexec-path.patch