Index: sys/amd64/amd64/exception.S =================================================================== --- sys/amd64/amd64/exception.S (revision 347593) +++ sys/amd64/amd64/exception.S (revision 347632) @@ -512,6 +512,7 @@ testl $TDF_ASTPENDING | TDF_NEEDRESCHED,TD_FLAGS(%rax) jne 3f call handle_ibrs_exit + callq *mds_handler /* Restore preserved registers. */ MEXITCOUNT movq TF_RDI(%rsp),%rdi /* bonus; preserve arg 1 */ @@ -1157,6 +1158,7 @@ jz 2f /* keep running with kernel GS.base */ cli call handle_ibrs_exit_rs + callq *mds_handler cmpq $~0,PCPU(UCR3) je 1f pushq %rdx Index: sys/amd64/amd64/genassym.c =================================================================== --- sys/amd64/amd64/genassym.c (revision 347593) +++ sys/amd64/amd64/genassym.c (revision 347632) @@ -233,6 +233,9 @@ ASSYM(PC_PTI_STACK_SZ, PC_PTI_STACK_SZ); ASSYM(PC_PTI_RSP0, offsetof(struct pcpu, pc_pti_rsp0)); ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set)); +ASSYM(PC_MDS_TMP, offsetof(struct pcpu, pc_mds_tmp)); +ASSYM(PC_MDS_BUF, offsetof(struct pcpu, pc_mds_buf)); +ASSYM(PC_MDS_BUF64, offsetof(struct pcpu, pc_mds_buf64)); ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); ASSYM(LA_ISR, LAPIC_ISR0 * LAPIC_MEM_MUL); Index: sys/amd64/amd64/initcpu.c =================================================================== --- sys/amd64/amd64/initcpu.c (revision 347593) +++ sys/amd64/amd64/initcpu.c (revision 347632) @@ -253,6 +253,7 @@ } hw_ibrs_recalculate(); hw_ssb_recalculate(false); + hw_mds_recalculate(); switch (cpu_vendor_id) { case CPU_VENDOR_AMD: init_amd(); Index: sys/amd64/amd64/machdep.c =================================================================== --- sys/amd64/amd64/machdep.c (revision 347593) +++ sys/amd64/amd64/machdep.c (revision 347632) @@ -1878,6 +1878,7 @@ TUNABLE_INT_FETCH("hw.ibrs_disable", &hw_ibrs_disable); TUNABLE_INT_FETCH("hw.spec_store_bypass_disable", &hw_ssb_disable); + TUNABLE_INT_FETCH("hw.mds_disable", &hw_mds_disable); TSEXIT(); Index: sys/amd64/amd64/support.S =================================================================== --- sys/amd64/amd64/support.S (revision 347593) +++ sys/amd64/amd64/support.S (revision 347632) @@ -1,8 +1,13 @@ /*- + * Copyright (c) 2018-2019 The FreeBSD Foundation * Copyright (c) 2003 Peter Wemm. * Copyright (c) 1993 The Regents of the University of California. * All rights reserved. * + * Portions of this software were developed by + * Konstantin Belousov under sponsorship from + * the FreeBSD Foundation. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -1586,3 +1591,246 @@ ret #undef L1D_FLUSH_SIZE END(flush_l1d_sw) + +ENTRY(mds_handler_void) + retq +END(mds_handler_void) + +ENTRY(mds_handler_verw) + subq $8, %rsp + movw %ds, (%rsp) + verw (%rsp) + addq $8, %rsp + retq +END(mds_handler_verw) + +ENTRY(mds_handler_ivb) + pushq %rax + pushq %rdx + pushq %rcx + + movq %cr0, %rax + testb $CR0_TS, %al + je 1f + clts +1: movq PCPU(MDS_BUF), %rdx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + lfence + orpd (%rdx), %xmm0 + orpd (%rdx), %xmm0 + mfence + movl $40, %ecx + addq $16, %rdx +2: movntdq %xmm0, (%rdx) + addq $16, %rdx + decl %ecx + jnz 2b + mfence + + movdqa PCPU(MDS_TMP),%xmm0 + testb $CR0_TS, %al + je 3f + movq %rax, %cr0 +3: popq %rcx + popq %rdx + popq %rax + retq +END(mds_handler_ivb) + +ENTRY(mds_handler_bdw) + pushq %rax + pushq %rbx + pushq %rcx + pushq %rdi + pushq %rsi + + movq %cr0, %rax + testb $CR0_TS, %al + je 1f + clts +1: movq PCPU(MDS_BUF), %rbx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + movq %rbx, %rdi + movq %rbx, %rsi + movl $40, %ecx +2: movntdq %xmm0, (%rbx) + addq $16, %rbx + decl %ecx + jnz 2b + mfence + movl $1536, %ecx + rep; movsb + lfence + + movdqa PCPU(MDS_TMP),%xmm0 + testb $CR0_TS, %al + je 3f + movq %rax, %cr0 +3: popq %rsi + popq %rdi + popq %rcx + popq %rbx + popq %rax + retq +END(mds_handler_bdw) + +ENTRY(mds_handler_skl_sse) + pushq %rax + pushq %rdx + pushq %rcx + pushq %rdi + + movq %cr0, %rax + testb $CR0_TS, %al + je 1f + clts +1: movq PCPU(MDS_BUF), %rdi + movq PCPU(MDS_BUF64), %rdx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + lfence + orpd (%rdx), %xmm0 + orpd (%rdx), %xmm0 + xorl %eax, %eax +2: clflushopt 5376(%rdi, %rax, 8) + addl $8, %eax + cmpl $8 * 12, %eax + jb 2b + sfence + movl $6144, %ecx + xorl %eax, %eax + rep; stosb + mfence + + movdqa PCPU(MDS_TMP), %xmm0 + testb $CR0_TS, %al + je 3f + movq %rax, %cr0 +3: popq %rdi + popq %rcx + popq %rdx + popq %rax + retq +END(mds_handler_skl_sse) + +ENTRY(mds_handler_skl_avx) + pushq %rax + pushq %rdx + pushq %rcx + pushq %rdi + + movq %cr0, %rax + testb $CR0_TS, %al + je 1f + clts +1: movq PCPU(MDS_BUF), %rdi + movq PCPU(MDS_BUF64), %rdx + vmovdqa %ymm0, PCPU(MDS_TMP) + vpxor %ymm0, %ymm0, %ymm0 + + lfence + vorpd (%rdx), %ymm0, %ymm0 + vorpd (%rdx), %ymm0, %ymm0 + xorl %eax, %eax +2: clflushopt 5376(%rdi, %rax, 8) + addl $8, %eax + cmpl $8 * 12, %eax + jb 2b + sfence + movl $6144, %ecx + xorl %eax, %eax + rep; stosb + mfence + + vmovdqa PCPU(MDS_TMP), %ymm0 + testb $CR0_TS, %al + je 3f + movq %rax, %cr0 +3: popq %rdi + popq %rcx + popq %rdx + popq %rax + retq +END(mds_handler_skl_avx) + +ENTRY(mds_handler_skl_avx512) + pushq %rax + pushq %rdx + pushq %rcx + pushq %rdi + + movq %cr0, %rax + testb $CR0_TS, %al + je 1f + clts +1: movq PCPU(MDS_BUF), %rdi + movq PCPU(MDS_BUF64), %rdx +/* vmovdqa64 %zmm0, PCPU(MDS_TMP) */ + .byte 0x65, 0x62, 0xf1, 0xfd, 0x48, 0x7f, 0x04, 0x25 + .long PC_MDS_TMP +/* vpxor %zmm0, %zmm0, %zmm0 */ + .byte 0x62, 0xf1, 0xfd, 0x48, 0xef, 0xc0 + + lfence +/* vorpd (%rdx), %zmm0, %zmm0 */ + .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02 +/* vorpd (%rdx), %zmm0, %zmm0 */ + .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02 + xorl %eax, %eax +2: clflushopt 5376(%rdi, %rax, 8) + addl $8, %eax + cmpl $8 * 12, %eax + jb 2b + sfence + movl $6144, %ecx + xorl %eax, %eax + rep; stosb + mfence + +/* vmovdqa64 PCPU(MDS_TMP), %zmm0 */ + .byte 0x65, 0x62, 0xf1, 0xfd, 0x48, 0x6f, 0x04, 0x25 + .long PC_MDS_TMP + testb $CR0_TS, %al + je 3f + movq %rax, %cr0 +3: popq %rdi + popq %rcx + popq %rdx + popq %rax + retq +END(mds_handler_skl_avx512) + +ENTRY(mds_handler_silvermont) + pushq %rax + pushq %rdx + pushq %rcx + + movq %cr0, %rax + testb $CR0_TS, %al + je 1f + clts +1: movq PCPU(MDS_BUF), %rdx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + movl $16, %ecx +2: movntdq %xmm0, (%rdx) + addq $16, %rdx + decl %ecx + jnz 2b + mfence + + movdqa PCPU(MDS_TMP),%xmm0 + testb $CR0_TS, %al + je 3f + movq %rax, %cr0 +3: popq %rcx + popq %rdx + popq %rax + retq +END(mds_handler_silvermont) Index: sys/amd64/include/pcpu.h =================================================================== --- sys/amd64/include/pcpu.h (revision 347593) +++ sys/amd64/include/pcpu.h (revision 347632) @@ -76,7 +76,11 @@ uint32_t pc_pcid_gen; \ uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \ uint32_t pc_ibpb_set; \ - char __pad[3288] /* pad to UMA_PCPU_ALLOC_SIZE */ + void *pc_mds_buf; \ + void *pc_mds_buf64; \ + uint32_t pc_pad[2]; \ + uint8_t pc_mds_tmp[64]; \ + char __pad[3176] /* pad to UMA_PCPU_ALLOC_SIZE */ #define PC_DBREG_CMD_NONE 0 #define PC_DBREG_CMD_LOAD 1 Index: sys/dev/cpuctl/cpuctl.c =================================================================== --- sys/dev/cpuctl/cpuctl.c (revision 347593) +++ sys/dev/cpuctl/cpuctl.c (revision 347632) @@ -521,6 +521,7 @@ hw_ibrs_recalculate(); restore_cpu(oldcpu, is_bound, td); hw_ssb_recalculate(true); + hw_mds_recalculate(); printcpuinfo(); return (0); } Index: sys/i386/i386/exception.s =================================================================== --- sys/i386/i386/exception.s (revision 347593) +++ sys/i386/i386/exception.s (revision 347632) @@ -518,6 +518,8 @@ 2: movl $handle_ibrs_exit,%eax pushl %ecx /* preserve enough call-used regs */ call *%eax + movl mds_handler,%eax + call *%eax popl %ecx movl %esp, %esi movl PCPU(TRAMPSTK), %edx Index: sys/i386/i386/genassym.c =================================================================== --- sys/i386/i386/genassym.c (revision 347593) +++ sys/i386/i386/genassym.c (revision 347632) @@ -222,6 +222,9 @@ ASSYM(PC_TRAMPSTK, offsetof(struct pcpu, pc_trampstk)); ASSYM(PC_COPYOUT_BUF, offsetof(struct pcpu, pc_copyout_buf)); ASSYM(PC_IBPB_SET, offsetof(struct pcpu, pc_ibpb_set)); +ASSYM(PC_MDS_TMP, offsetof(struct pcpu, pc_mds_tmp)); +ASSYM(PC_MDS_BUF, offsetof(struct pcpu, pc_mds_buf)); +ASSYM(PC_MDS_BUF64, offsetof(struct pcpu, pc_mds_buf64)); #ifdef DEV_APIC ASSYM(LA_EOI, LAPIC_EOI * LAPIC_MEM_MUL); Index: sys/i386/i386/initcpu.c =================================================================== --- sys/i386/i386/initcpu.c (revision 347593) +++ sys/i386/i386/initcpu.c (revision 347632) @@ -754,6 +754,7 @@ elf32_nxstack = 1; } #endif + hw_mds_recalculate(); } void Index: sys/i386/i386/support.s =================================================================== --- sys/i386/i386/support.s (revision 347593) +++ sys/i386/i386/support.s (revision 347632) @@ -472,3 +472,194 @@ movb $0,PCPU(IBPB_SET) 1: ret END(handle_ibrs_exit) + +ENTRY(mds_handler_void) + ret +END(mds_handler_void) + +ENTRY(mds_handler_verw) + subl $4, %esp + movw %ds, (%esp) + verw (%esp) + addl $4, %esp + ret +END(mds_handler_verw) + +ENTRY(mds_handler_ivb) + movl %cr0, %eax + testb $CR0_TS, %al + je 1f + clts +1: movl PCPU(MDS_BUF), %edx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + lfence + orpd (%edx), %xmm0 + orpd (%edx), %xmm0 + mfence + movl $40, %ecx + addl $16, %edx +2: movntdq %xmm0, (%edx) + addl $16, %edx + decl %ecx + jnz 2b + mfence + + movdqa PCPU(MDS_TMP),%xmm0 + testb $CR0_TS, %al + je 3f + movl %eax, %cr0 +3: ret +END(mds_handler_ivb) + +ENTRY(mds_handler_bdw) + movl %cr0, %eax + testb $CR0_TS, %al + je 1f + clts +1: movl PCPU(MDS_BUF), %ebx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + movl %ebx, %edi + movl %ebx, %esi + movl $40, %ecx +2: movntdq %xmm0, (%ebx) + addl $16, %ebx + decl %ecx + jnz 2b + mfence + movl $1536, %ecx + rep; movsb + lfence + + movdqa PCPU(MDS_TMP),%xmm0 + testb $CR0_TS, %al + je 3f + movl %eax, %cr0 +3: ret +END(mds_handler_bdw) + +ENTRY(mds_handler_skl_sse) + movl %cr0, %eax + testb $CR0_TS, %al + je 1f + clts +1: movl PCPU(MDS_BUF), %edi + movl PCPU(MDS_BUF64), %edx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + lfence + orpd (%edx), %xmm0 + orpd (%edx), %xmm0 + xorl %eax, %eax +2: clflushopt 5376(%edi, %eax, 8) + addl $8, %eax + cmpl $8 * 12, %eax + jb 2b + sfence + movl $6144, %ecx + xorl %eax, %eax + rep; stosb + mfence + + movdqa PCPU(MDS_TMP), %xmm0 + testb $CR0_TS, %al + je 3f + movl %eax, %cr0 +3: ret +END(mds_handler_skl_sse) + +ENTRY(mds_handler_skl_avx) + movl %cr0, %eax + testb $CR0_TS, %al + je 1f + clts +1: movl PCPU(MDS_BUF), %edi + movl PCPU(MDS_BUF64), %edx + vmovdqa %ymm0, PCPU(MDS_TMP) + vpxor %ymm0, %ymm0, %ymm0 + + lfence + vorpd (%edx), %ymm0, %ymm0 + vorpd (%edx), %ymm0, %ymm0 + xorl %eax, %eax +2: clflushopt 5376(%edi, %eax, 8) + addl $8, %eax + cmpl $8 * 12, %eax + jb 2b + sfence + movl $6144, %ecx + xorl %eax, %eax + rep; stosb + mfence + + vmovdqa PCPU(MDS_TMP), %ymm0 + testb $CR0_TS, %al + je 3f + movl %eax, %cr0 +3: ret +END(mds_handler_skl_avx) + +ENTRY(mds_handler_skl_avx512) + movl %cr0, %eax + testb $CR0_TS, %al + je 1f + clts +1: movl PCPU(MDS_BUF), %edi + movl PCPU(MDS_BUF64), %edx +/* vmovdqa64 %zmm0, PCPU(MDS_TMP) */ + .byte 0x64, 0x62, 0xf1, 0xfd, 0x48, 0x7f, 0x05 + .long PC_MDS_TMP +/* vpxor %zmm0, %zmm0, %zmm0 */ + .byte 0x62, 0xf1, 0xfd, 0x48, 0xef, 0xc0 + + lfence +/* vorpd (%edx), %zmm0, %zmm0 */ + .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02 +/* vorpd (%edx), %zmm0, %zmm0 */ + .byte 0x62, 0xf1, 0xfd, 0x48, 0x56, 0x02 + xorl %eax, %eax +2: clflushopt 5376(%edi, %eax, 8) + addl $8, %eax + cmpl $8 * 12, %eax + jb 2b + sfence + movl $6144, %ecx + xorl %eax, %eax + rep; stosb + mfence + +/* vmovdqa64 PCPU(MDS_TMP), %zmm0 */ + .byte 0x64, 0x62, 0xf1, 0xfd, 0x48, 0x6f, 0x05 + .long PC_MDS_TMP + testb $CR0_TS, %al + je 3f + movl %eax, %cr0 +3: ret +END(mds_handler_skl_avx512) + +ENTRY(mds_handler_silvermont) + movl %cr0, %eax + testb $CR0_TS, %al + je 1f + clts +1: movl PCPU(MDS_BUF), %edx + movdqa %xmm0, PCPU(MDS_TMP) + pxor %xmm0, %xmm0 + + movl $16, %ecx +2: movntdq %xmm0, (%edx) + addl $16, %edx + decl %ecx + jnz 2b + mfence + + movdqa PCPU(MDS_TMP),%xmm0 + testb $CR0_TS, %al + je 3f + movl %eax, %cr0 +3: ret +END(mds_handler_silvermont) Index: sys/i386/include/pcpu.h =================================================================== --- sys/i386/include/pcpu.h (revision 347593) +++ sys/i386/include/pcpu.h (revision 347632) @@ -77,10 +77,14 @@ struct sx pc_copyout_slock; \ char *pc_copyout_buf; \ vm_offset_t pc_pmap_eh_va; \ - caddr_t pc_pmap_eh_ptep; \ + caddr_t pc_pmap_eh_ptep; \ uint32_t pc_smp_tlb_done; /* TLB op acknowledgement */ \ uint32_t pc_ibpb_set; \ - char __pad[3610] + void *pc_mds_buf; \ + void *pc_mds_buf64; \ + uint32_t pc_pad[4]; \ + uint8_t pc_mds_tmp[64]; \ + char __pad[3522] #ifdef _KERNEL Index: sys/x86/include/specialreg.h =================================================================== --- sys/x86/include/specialreg.h (revision 347593) +++ sys/x86/include/specialreg.h (revision 347632) @@ -425,6 +425,7 @@ /* * CPUID instruction 7 Structured Extended Features, leaf 0 edx info */ +#define CPUID_STDEXT3_MD_CLEAR 0x00000400 #define CPUID_STDEXT3_IBPB 0x04000000 #define CPUID_STDEXT3_STIBP 0x08000000 #define CPUID_STDEXT3_L1D_FLUSH 0x10000000 @@ -437,6 +438,7 @@ #define IA32_ARCH_CAP_RSBA 0x00000004 #define IA32_ARCH_CAP_SKIP_L1DFL_VMENTRY 0x00000008 #define IA32_ARCH_CAP_SSB_NO 0x00000010 +#define IA32_ARCH_CAP_MDS_NO 0x00000020 /* * CPUID manufacturers identifiers Index: sys/x86/include/x86_var.h =================================================================== --- sys/x86/include/x86_var.h (revision 347593) +++ sys/x86/include/x86_var.h (revision 347632) @@ -85,6 +85,7 @@ extern u_int max_apic_id; extern int pti; extern int hw_ibrs_active; +extern int hw_mds_disable; extern int hw_ssb_active; struct pcb; @@ -139,6 +140,7 @@ void handle_ibrs_entry(void); void handle_ibrs_exit(void); void hw_ibrs_recalculate(void); +void hw_mds_recalculate(void); void hw_ssb_recalculate(bool all_cpus); void nmi_call_kdb(u_int cpu, u_int type, struct trapframe *frame); void nmi_call_kdb_smp(u_int type, struct trapframe *frame); Index: sys/x86/x86/cpu_machdep.c =================================================================== --- sys/x86/x86/cpu_machdep.c (revision 347593) +++ sys/x86/x86/cpu_machdep.c (revision 347632) @@ -61,6 +61,7 @@ #include #include #include +#include #include #include #include @@ -915,7 +916,204 @@ hw_ssb_disable_handler, "I", "Speculative Store Bypass Disable (0 - off, 1 - on, 2 - auto"); +int hw_mds_disable; + /* + * Handler for Microarchitectural Data Sampling issues. Really not a + * pointer to C function: on amd64 the code must not change any CPU + * architectural state except possibly %rflags. Also, it is always + * called with interrupts disabled. + */ +void (*mds_handler)(void); +void mds_handler_void(void); +void mds_handler_verw(void); +void mds_handler_ivb(void); +void mds_handler_bdw(void); +void mds_handler_skl_sse(void); +void mds_handler_skl_avx(void); +void mds_handler_skl_avx512(void); +void mds_handler_silvermont(void); + +static int +sysctl_hw_mds_disable_state_handler(SYSCTL_HANDLER_ARGS) +{ + const char *state; + + if (mds_handler == mds_handler_void) + state = "inactive"; + else if (mds_handler == mds_handler_verw) + state = "VERW"; + else if (mds_handler == mds_handler_ivb) + state = "software IvyBridge"; + else if (mds_handler == mds_handler_bdw) + state = "software Broadwell"; + else if (mds_handler == mds_handler_skl_sse) + state = "software Skylake SSE"; + else if (mds_handler == mds_handler_skl_avx) + state = "software Skylake AVX"; + else if (mds_handler == mds_handler_skl_avx512) + state = "software Skylake AVX512"; + else if (mds_handler == mds_handler_silvermont) + state = "software Silvermont"; + else + state = "unknown"; + return (SYSCTL_OUT(req, state, strlen(state))); +} + +SYSCTL_PROC(_hw, OID_AUTO, mds_disable_state, + CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + sysctl_hw_mds_disable_state_handler, "A", + "Microarchitectural Data Sampling Mitigation state"); + +_Static_assert(__offsetof(struct pcpu, pc_mds_tmp) % 64 == 0, "MDS AVX512"); + +void +hw_mds_recalculate(void) +{ + struct pcpu *pc; + vm_offset_t b64; + u_long xcr0; + int i; + + /* + * Allow user to force VERW variant even if MD_CLEAR is not + * reported. For instance, hypervisor might unknowingly + * filter the cap out. + * For the similar reasons, and for testing, allow to enable + * mitigation even for RDCL_NO or MDS_NO caps. + */ + if (cpu_vendor_id != CPU_VENDOR_INTEL || hw_mds_disable == 0 || + ((cpu_ia32_arch_caps & (IA32_ARCH_CAP_RDCL_NO | + IA32_ARCH_CAP_MDS_NO)) != 0 && hw_mds_disable == 3)) { + mds_handler = mds_handler_void; + } else if (((cpu_stdext_feature3 & CPUID_STDEXT3_MD_CLEAR) != 0 && + hw_mds_disable == 3) || hw_mds_disable == 1) { + mds_handler = mds_handler_verw; + } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && + (CPUID_TO_MODEL(cpu_id) == 0x2e || CPUID_TO_MODEL(cpu_id) == 0x1e || + CPUID_TO_MODEL(cpu_id) == 0x1f || CPUID_TO_MODEL(cpu_id) == 0x1a || + CPUID_TO_MODEL(cpu_id) == 0x2f || CPUID_TO_MODEL(cpu_id) == 0x25 || + CPUID_TO_MODEL(cpu_id) == 0x2c || CPUID_TO_MODEL(cpu_id) == 0x2d || + CPUID_TO_MODEL(cpu_id) == 0x2a || CPUID_TO_MODEL(cpu_id) == 0x3e || + CPUID_TO_MODEL(cpu_id) == 0x3a) && + (hw_mds_disable == 2 || hw_mds_disable == 3)) { + /* + * Nehalem, SandyBridge, IvyBridge + */ + CPU_FOREACH(i) { + pc = pcpu_find(i); + if (pc->pc_mds_buf == NULL) { + pc->pc_mds_buf = malloc_domainset(672, M_TEMP, + DOMAINSET_PREF(pc->pc_domain), M_WAITOK); + bzero(pc->pc_mds_buf, 16); + } + } + mds_handler = mds_handler_ivb; + } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && + (CPUID_TO_MODEL(cpu_id) == 0x3f || CPUID_TO_MODEL(cpu_id) == 0x3c || + CPUID_TO_MODEL(cpu_id) == 0x45 || CPUID_TO_MODEL(cpu_id) == 0x46 || + CPUID_TO_MODEL(cpu_id) == 0x56 || CPUID_TO_MODEL(cpu_id) == 0x4f || + CPUID_TO_MODEL(cpu_id) == 0x47 || CPUID_TO_MODEL(cpu_id) == 0x3d) && + (hw_mds_disable == 2 || hw_mds_disable == 3)) { + /* + * Haswell, Broadwell + */ + CPU_FOREACH(i) { + pc = pcpu_find(i); + if (pc->pc_mds_buf == NULL) { + pc->pc_mds_buf = malloc_domainset(1536, M_TEMP, + DOMAINSET_PREF(pc->pc_domain), M_WAITOK); + bzero(pc->pc_mds_buf, 16); + } + } + mds_handler = mds_handler_bdw; + } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && + ((CPUID_TO_MODEL(cpu_id) == 0x55 && (cpu_id & + CPUID_STEPPING) <= 5) || + CPUID_TO_MODEL(cpu_id) == 0x4e || CPUID_TO_MODEL(cpu_id) == 0x5e || + (CPUID_TO_MODEL(cpu_id) == 0x8e && (cpu_id & + CPUID_STEPPING) <= 0xb) || + (CPUID_TO_MODEL(cpu_id) == 0x9e && (cpu_id & + CPUID_STEPPING) <= 0xc)) && + (hw_mds_disable == 2 || hw_mds_disable == 3)) { + /* + * Skylake, KabyLake, CoffeeLake, WhiskeyLake, + * CascadeLake + */ + CPU_FOREACH(i) { + pc = pcpu_find(i); + if (pc->pc_mds_buf == NULL) { + pc->pc_mds_buf = malloc_domainset(6 * 1024, + M_TEMP, DOMAINSET_PREF(pc->pc_domain), + M_WAITOK); + b64 = (vm_offset_t)malloc_domainset(64 + 63, + M_TEMP, DOMAINSET_PREF(pc->pc_domain), + M_WAITOK); + pc->pc_mds_buf64 = (void *)roundup2(b64, 64); + bzero(pc->pc_mds_buf64, 64); + } + } + xcr0 = rxcr(0); + if ((xcr0 & XFEATURE_ENABLED_ZMM_HI256) != 0 && + (cpu_stdext_feature2 & CPUID_STDEXT_AVX512DQ) != 0) + mds_handler = mds_handler_skl_avx512; + else if ((xcr0 & XFEATURE_ENABLED_AVX) != 0 && + (cpu_feature2 & CPUID2_AVX) != 0) + mds_handler = mds_handler_skl_avx; + else + mds_handler = mds_handler_skl_sse; + } else if (CPUID_TO_FAMILY(cpu_id) == 0x6 && + ((CPUID_TO_MODEL(cpu_id) == 0x37 || + CPUID_TO_MODEL(cpu_id) == 0x4a || + CPUID_TO_MODEL(cpu_id) == 0x4c || + CPUID_TO_MODEL(cpu_id) == 0x4d || + CPUID_TO_MODEL(cpu_id) == 0x5a || + CPUID_TO_MODEL(cpu_id) == 0x5d || + CPUID_TO_MODEL(cpu_id) == 0x6e || + CPUID_TO_MODEL(cpu_id) == 0x65 || + CPUID_TO_MODEL(cpu_id) == 0x75 || + CPUID_TO_MODEL(cpu_id) == 0x1c || + CPUID_TO_MODEL(cpu_id) == 0x26 || + CPUID_TO_MODEL(cpu_id) == 0x27 || + CPUID_TO_MODEL(cpu_id) == 0x35 || + CPUID_TO_MODEL(cpu_id) == 0x36 || + CPUID_TO_MODEL(cpu_id) == 0x7a))) { + /* Silvermont, Airmont */ + CPU_FOREACH(i) { + pc = pcpu_find(i); + if (pc->pc_mds_buf == NULL) + pc->pc_mds_buf = malloc(256, M_TEMP, M_WAITOK); + } + mds_handler = mds_handler_silvermont; + } else { + hw_mds_disable = 0; + mds_handler = mds_handler_void; + } +} + +static int +sysctl_mds_disable_handler(SYSCTL_HANDLER_ARGS) +{ + int error, val; + + val = hw_mds_disable; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + if (val < 0 || val > 3) + return (EINVAL); + hw_mds_disable = val; + hw_mds_recalculate(); + return (0); +} + +SYSCTL_PROC(_hw, OID_AUTO, mds_disable, CTLTYPE_INT | + CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE, NULL, 0, + sysctl_mds_disable_handler, "I", + "Microarchitectural Data Sampling Mitigation " + "(0 - off, 1 - on VERW, 2 - on SW, 3 - on AUTO"); + +/* * Enable and restore kernel text write permissions. * Callers must ensure that disable_wp()/restore_wp() are executed * without rescheduling on the same core.