--- sys/amd64/amd64/pmap.c.orig +++ sys/amd64/amd64/pmap.c @@ -1805,6 +1805,51 @@ m->md.pat_mode = PAT_WRITE_BACK; } +static int pmap_allow_2m_x_ept; +SYSCTL_INT(_vm_pmap, OID_AUTO, allow_2m_x_ept, CTLFLAG_RWTUN | CTLFLAG_NOFETCH, + &pmap_allow_2m_x_ept, 0, + "Allow executable superpage mappings in EPT"); + +void +pmap_allow_2m_x_ept_recalculate(void) +{ + /* + * SKL002, SKL012S. Since the EPT format is only used by + * Intel CPUs, the vendor check is merely a formality. + */ + if (!(cpu_vendor_id != CPU_VENDOR_INTEL || + (cpu_ia32_arch_caps & IA32_ARCH_CAP_IF_PSCHANGE_MC_NO) != 0 || + (CPUID_TO_FAMILY(cpu_id) == 0x6 && + (CPUID_TO_MODEL(cpu_id) == 0x26 || /* Atoms */ + CPUID_TO_MODEL(cpu_id) == 0x27 || + CPUID_TO_MODEL(cpu_id) == 0x35 || + CPUID_TO_MODEL(cpu_id) == 0x36 || + CPUID_TO_MODEL(cpu_id) == 0x37 || + CPUID_TO_MODEL(cpu_id) == 0x86 || + CPUID_TO_MODEL(cpu_id) == 0x1c || + CPUID_TO_MODEL(cpu_id) == 0x4a || + CPUID_TO_MODEL(cpu_id) == 0x4c || + CPUID_TO_MODEL(cpu_id) == 0x4d || + CPUID_TO_MODEL(cpu_id) == 0x5a || + CPUID_TO_MODEL(cpu_id) == 0x5c || + CPUID_TO_MODEL(cpu_id) == 0x5d || + CPUID_TO_MODEL(cpu_id) == 0x5f || + CPUID_TO_MODEL(cpu_id) == 0x6e || + CPUID_TO_MODEL(cpu_id) == 0x7a || + CPUID_TO_MODEL(cpu_id) == 0x57 || /* Knights */ + CPUID_TO_MODEL(cpu_id) == 0x85)))) + pmap_allow_2m_x_ept = 1; + TUNABLE_INT_FETCH("hw.allow_2m_x_ept", &pmap_allow_2m_x_ept); +} + +static bool +pmap_allow_2m_x_page(pmap_t pmap, bool executable) +{ + + return (pmap->pm_type != PT_EPT || !executable || + !pmap_allow_2m_x_ept); +} + /* * Initialize the pmap module. * Called by vm_init, to initialize any structures that the pmap @@ -1849,6 +1894,9 @@ } } + /* IFU */ + pmap_allow_2m_x_ept_recalculate(); + /* * Initialize the vm page array entries for the kernel pmap's * page table pages. @@ -5452,6 +5500,15 @@ } #if VM_NRESERVLEVEL > 0 +static bool +pmap_pde_ept_executable(pmap_t pmap, pd_entry_t pde) +{ + + if (pmap->pm_type != PT_EPT) + return (false); + return ((pde & EPT_PG_EXECUTE) != 0); +} + /* * Tries to promote the 512, contiguous 4KB page mappings that are within a * single page table page (PTP) to a single 2MB page mapping. For promotion @@ -5487,7 +5544,9 @@ firstpte = (pt_entry_t *)PHYS_TO_DMAP(*pde & PG_FRAME); setpde: newpde = *firstpte; - if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V)) { + if ((newpde & ((PG_FRAME & PDRMASK) | PG_A | PG_V)) != (PG_A | PG_V) || + !pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, + newpde))) { atomic_add_long(&pmap_pde_p_failures, 1); CTR2(KTR_PMAP, "pmap_promote_pde: failure for va %#lx" " in pmap %p", va, pmap); @@ -5917,6 +5976,12 @@ PG_V = pmap_valid_bit(pmap); PMAP_LOCK_ASSERT(pmap, MA_OWNED); + if (!pmap_allow_2m_x_page(pmap, pmap_pde_ept_executable(pmap, + newpde))) { + CTR2(KTR_PMAP, "pmap_enter_pde: 2m x blocked for va %#lx" + " in pmap %p", va, pmap); + return (KERN_FAILURE); + } if ((pdpg = pmap_allocpde(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? NULL : lockp)) == NULL) { CTR2(KTR_PMAP, "pmap_enter_pde: failure for va %#lx" @@ -6063,6 +6128,7 @@ va = start + ptoa(diff); if ((va & PDRMASK) == 0 && va + NBPDR <= end && m->psind == 1 && pmap_ps_enabled(pmap) && + pmap_allow_2m_x_page(pmap, (prot & VM_PROT_EXECUTE) != 0) && pmap_enter_2mpage(pmap, va, m, prot, &lock)) m = &m[NBPDR / PAGE_SIZE - 1]; else --- sys/amd64/include/pmap.h.orig +++ sys/amd64/include/pmap.h @@ -424,6 +424,7 @@ void pmap_activate_boot(pmap_t pmap); void pmap_activate_sw(struct thread *); +void pmap_allow_2m_x_ept_recalculate(void); void pmap_bootstrap(vm_paddr_t *); int pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde); int pmap_change_attr(vm_offset_t, vm_size_t, int); --- sys/dev/cpuctl/cpuctl.c.orig +++ sys/dev/cpuctl/cpuctl.c @@ -50,6 +50,10 @@ #include #include +#include +#include +#include + #include #include #include @@ -539,6 +543,7 @@ hw_ssb_recalculate(true); #ifdef __amd64__ amd64_syscall_ret_flush_l1d_recalc(); + pmap_allow_2m_x_ept_recalculate(); #endif hw_mds_recalculate(); printcpuinfo(); --- sys/x86/include/specialreg.h.orig +++ sys/x86/include/specialreg.h @@ -447,6 +447,7 @@ #define IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY 0x00000008 #define IA32_ARCH_CAP_SSB_NO 0x00000010 #define IA32_ARCH_CAP_MDS_NO 0x00000020 +#define IA32_ARCH_CAP_IF_PSCHANGE_MC_NO 0x00000040 /* * CPUID manufacturers identifiers