Skip to content

Commit

Permalink
Merge tag 'x86-urgent-2020-05-10' of git://git.kernel.org/pub/scm/lin…
Browse files Browse the repository at this point in the history
…ux/kernel/git/tip/tip

Pull x86 fixes from Thomas Gleixner:
 "A set of fixes for x86:

   - Ensure that direct mapping alias is always flushed when changing
     page attributes. The optimization for small ranges failed to do so
     when the virtual address was in the vmalloc or module space.

   - Unbreak the trace event registration for syscalls without arguments
     caused by the refactoring of the SYSCALL_DEFINE0() macro.

   - Move the printk in the TSC deadline timer code to a place where it
     is guaranteed to only be called once during boot and cannot be
     rearmed by clearing warn_once after boot. If it's invoked post boot
     then lockdep rightfully complains about a potential deadlock as the
     calling context is different.

   - A series of fixes for objtool and the ORC unwinder addressing
     variety of small issues:

       - Stack offset tracking for indirect CFAs in objtool ignored
         subsequent pushs and pops

       - Repair the unwind hints in the register clearing entry ASM code

       - Make the unwinding in the low level exit to usermode code stop
         after switching to the trampoline stack. The unwind hint is no
         longer valid and the ORC unwinder emits a warning as it can't
         find the registers anymore.

       - Fix unwind hints in switch_to_asm() and rewind_stack_do_exit()
         which caused objtool to generate bogus ORC data.

       - Prevent unwinder warnings when dumping the stack of a
         non-current task as there is no way to be sure about the
         validity because the dumped stack can be a moving target.

       - Make the ORC unwinder behave the same way as the frame pointer
         unwinder when dumping an inactive tasks stack and do not skip
         the first frame.

       - Prevent ORC unwinding before ORC data has been initialized

       - Immediately terminate unwinding when a unknown ORC entry type
         is found.

       - Prevent premature stop of the unwinder caused by IRET frames.

       - Fix another infinite loop in objtool caused by a negative
         offset which was not catched.

       - Address a few build warnings in the ORC unwinder and add
         missing static/ro_after_init annotations"

* tag 'x86-urgent-2020-05-10' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/unwind/orc: Move ORC sorting variables under !CONFIG_MODULES
  x86/apic: Move TSC deadline timer debug printk
  ftrace/x86: Fix trace event registration for syscalls without arguments
  x86/mm/cpa: Flush direct map alias during cpa
  objtool: Fix infinite loop in for_offset_range()
  x86/unwind/orc: Fix premature unwind stoppage due to IRET frames
  x86/unwind/orc: Fix error path for bad ORC entry type
  x86/unwind/orc: Prevent unwinding before ORC initialization
  x86/unwind/orc: Don't skip the first frame for inactive tasks
  x86/unwind: Prevent false warnings for non-current tasks
  x86/unwind/orc: Convert global variables to static
  x86/entry/64: Fix unwind hints in rewind_stack_do_exit()
  x86/entry/64: Fix unwind hints in __switch_to_asm()
  x86/entry/64: Fix unwind hints in kernel exit path
  x86/entry/64: Fix unwind hints in register clearing code
  objtool: Fix stack offset tracking for indirect CFAs
  • Loading branch information
torvalds committed May 10, 2020
2 parents 8b00083 + fb9cbbc commit c14cab2
Show file tree
Hide file tree
Showing 11 changed files with 138 additions and 90 deletions.
40 changes: 21 additions & 19 deletions arch/x86/entry/calling.h
Original file line number Diff line number Diff line change
Expand Up @@ -98,13 +98,6 @@ For 32-bit we have the following conventions - kernel is built with
#define SIZEOF_PTREGS 21*8

.macro PUSH_AND_CLEAR_REGS rdx=%rdx rax=%rax save_ret=0
/*
* Push registers and sanitize registers of values that a
* speculation attack might otherwise want to exploit. The
* lower registers are likely clobbered well before they
* could be put to use in a speculative execution gadget.
* Interleave XOR with PUSH for better uop scheduling:
*/
.if \save_ret
pushq %rsi /* pt_regs->si */
movq 8(%rsp), %rsi /* temporarily store the return address in %rsi */
Expand All @@ -114,34 +107,43 @@ For 32-bit we have the following conventions - kernel is built with
pushq %rsi /* pt_regs->si */
.endif
pushq \rdx /* pt_regs->dx */
xorl %edx, %edx /* nospec dx */
pushq %rcx /* pt_regs->cx */
xorl %ecx, %ecx /* nospec cx */
pushq \rax /* pt_regs->ax */
pushq %r8 /* pt_regs->r8 */
xorl %r8d, %r8d /* nospec r8 */
pushq %r9 /* pt_regs->r9 */
xorl %r9d, %r9d /* nospec r9 */
pushq %r10 /* pt_regs->r10 */
xorl %r10d, %r10d /* nospec r10 */
pushq %r11 /* pt_regs->r11 */
xorl %r11d, %r11d /* nospec r11*/
pushq %rbx /* pt_regs->rbx */
xorl %ebx, %ebx /* nospec rbx*/
pushq %rbp /* pt_regs->rbp */
xorl %ebp, %ebp /* nospec rbp*/
pushq %r12 /* pt_regs->r12 */
xorl %r12d, %r12d /* nospec r12*/
pushq %r13 /* pt_regs->r13 */
xorl %r13d, %r13d /* nospec r13*/
pushq %r14 /* pt_regs->r14 */
xorl %r14d, %r14d /* nospec r14*/
pushq %r15 /* pt_regs->r15 */
xorl %r15d, %r15d /* nospec r15*/
UNWIND_HINT_REGS

.if \save_ret
pushq %rsi /* return address on top of stack */
.endif

/*
* Sanitize registers of values that a speculation attack might
* otherwise want to exploit. The lower registers are likely clobbered
* well before they could be put to use in a speculative execution
* gadget.
*/
xorl %edx, %edx /* nospec dx */
xorl %ecx, %ecx /* nospec cx */
xorl %r8d, %r8d /* nospec r8 */
xorl %r9d, %r9d /* nospec r9 */
xorl %r10d, %r10d /* nospec r10 */
xorl %r11d, %r11d /* nospec r11 */
xorl %ebx, %ebx /* nospec rbx */
xorl %ebp, %ebp /* nospec rbp */
xorl %r12d, %r12d /* nospec r12 */
xorl %r13d, %r13d /* nospec r13 */
xorl %r14d, %r14d /* nospec r14 */
xorl %r15d, %r15d /* nospec r15 */

.endm

.macro POP_REGS pop_rdi=1 skip_r11rcx=0
Expand Down
14 changes: 7 additions & 7 deletions arch/x86/entry/entry_64.S
Original file line number Diff line number Diff line change
Expand Up @@ -249,7 +249,6 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, SYM_L_GLOBAL)
*/
syscall_return_via_sysret:
/* rcx and r11 are already restored (see code above) */
UNWIND_HINT_EMPTY
POP_REGS pop_rdi=0 skip_r11rcx=1

/*
Expand All @@ -258,6 +257,7 @@ syscall_return_via_sysret:
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
UNWIND_HINT_EMPTY

pushq RSP-RDI(%rdi) /* RSP */
pushq (%rdi) /* RDI */
Expand All @@ -279,8 +279,7 @@ SYM_CODE_END(entry_SYSCALL_64)
* %rdi: prev task
* %rsi: next task
*/
SYM_CODE_START(__switch_to_asm)
UNWIND_HINT_FUNC
SYM_FUNC_START(__switch_to_asm)
/*
* Save callee-saved registers
* This must match the order in inactive_task_frame
Expand Down Expand Up @@ -321,7 +320,7 @@ SYM_CODE_START(__switch_to_asm)
popq %rbp

jmp __switch_to
SYM_CODE_END(__switch_to_asm)
SYM_FUNC_END(__switch_to_asm)

/*
* A newly forked process directly context switches into this address.
Expand Down Expand Up @@ -512,7 +511,7 @@ SYM_CODE_END(spurious_entries_start)
* +----------------------------------------------------+
*/
SYM_CODE_START(interrupt_entry)
UNWIND_HINT_FUNC
UNWIND_HINT_IRET_REGS offset=16
ASM_CLAC
cld

Expand Down Expand Up @@ -544,9 +543,9 @@ SYM_CODE_START(interrupt_entry)
pushq 5*8(%rdi) /* regs->eflags */
pushq 4*8(%rdi) /* regs->cs */
pushq 3*8(%rdi) /* regs->ip */
UNWIND_HINT_IRET_REGS
pushq 2*8(%rdi) /* regs->orig_ax */
pushq 8(%rdi) /* return address */
UNWIND_HINT_FUNC

movq (%rdi), %rdi
jmp 2f
Expand Down Expand Up @@ -637,6 +636,7 @@ SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
*/
movq %rsp, %rdi
movq PER_CPU_VAR(cpu_tss_rw + TSS_sp0), %rsp
UNWIND_HINT_EMPTY

/* Copy the IRET frame to the trampoline stack. */
pushq 6*8(%rdi) /* SS */
Expand Down Expand Up @@ -1739,7 +1739,7 @@ SYM_CODE_START(rewind_stack_do_exit)

movq PER_CPU_VAR(cpu_current_top_of_stack), %rax
leaq -PTREGS_SIZE(%rax), %rsp
UNWIND_HINT_FUNC sp_offset=PTREGS_SIZE
UNWIND_HINT_REGS

call do_exit
SYM_CODE_END(rewind_stack_do_exit)
5 changes: 3 additions & 2 deletions arch/x86/include/asm/ftrace.h
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,12 @@ static inline bool arch_syscall_match_sym_name(const char *sym, const char *name
{
/*
* Compare the symbol name with the system call name. Skip the
* "__x64_sys", "__ia32_sys" or simple "sys" prefix.
* "__x64_sys", "__ia32_sys", "__do_sys" or simple "sys" prefix.
*/
return !strcmp(sym + 3, name + 3) ||
(!strncmp(sym, "__x64_", 6) && !strcmp(sym + 9, name + 3)) ||
(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3));
(!strncmp(sym, "__ia32_", 7) && !strcmp(sym + 10, name + 3)) ||
(!strncmp(sym, "__do_sys", 8) && !strcmp(sym + 8, name + 3));
}

#ifndef COMPILE_OFFSETS
Expand Down
2 changes: 1 addition & 1 deletion arch/x86/include/asm/unwind.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct unwind_state {
#if defined(CONFIG_UNWINDER_ORC)
bool signal, full_regs;
unsigned long sp, bp, ip;
struct pt_regs *regs;
struct pt_regs *regs, *prev_regs;
#elif defined(CONFIG_UNWINDER_FRAME_POINTER)
bool got_irq;
unsigned long *bp, *orig_sp, ip;
Expand Down
27 changes: 14 additions & 13 deletions arch/x86/kernel/apic/apic.c
Original file line number Diff line number Diff line change
Expand Up @@ -352,8 +352,6 @@ static void __setup_APIC_LVTT(unsigned int clocks, int oneshot, int irqen)
* According to Intel, MFENCE can do the serialization here.
*/
asm volatile("mfence" : : : "memory");

printk_once(KERN_DEBUG "TSC deadline timer enabled\n");
return;
}

Expand Down Expand Up @@ -546,7 +544,7 @@ static struct clock_event_device lapic_clockevent = {
};
static DEFINE_PER_CPU(struct clock_event_device, lapic_events);

static u32 hsx_deadline_rev(void)
static __init u32 hsx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x3a; /* EP */
Expand All @@ -556,7 +554,7 @@ static u32 hsx_deadline_rev(void)
return ~0U;
}

static u32 bdx_deadline_rev(void)
static __init u32 bdx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x02: return 0x00000011;
Expand All @@ -568,7 +566,7 @@ static u32 bdx_deadline_rev(void)
return ~0U;
}

static u32 skx_deadline_rev(void)
static __init u32 skx_deadline_rev(void)
{
switch (boot_cpu_data.x86_stepping) {
case 0x03: return 0x01000136;
Expand All @@ -581,7 +579,7 @@ static u32 skx_deadline_rev(void)
return ~0U;
}

static const struct x86_cpu_id deadline_match[] = {
static const struct x86_cpu_id deadline_match[] __initconst = {
X86_MATCH_INTEL_FAM6_MODEL( HASWELL_X, &hsx_deadline_rev),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_X, 0x0b000020),
X86_MATCH_INTEL_FAM6_MODEL( BROADWELL_D, &bdx_deadline_rev),
Expand All @@ -603,18 +601,19 @@ static const struct x86_cpu_id deadline_match[] = {
{},
};

static void apic_check_deadline_errata(void)
static __init bool apic_validate_deadline_timer(void)
{
const struct x86_cpu_id *m;
u32 rev;

if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER) ||
boot_cpu_has(X86_FEATURE_HYPERVISOR))
return;
if (!boot_cpu_has(X86_FEATURE_TSC_DEADLINE_TIMER))
return false;
if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
return true;

m = x86_match_cpu(deadline_match);
if (!m)
return;
return true;

/*
* Function pointers will have the MSB set due to address layout,
Expand All @@ -626,11 +625,12 @@ static void apic_check_deadline_errata(void)
rev = (u32)m->driver_data;

if (boot_cpu_data.microcode >= rev)
return;
return true;

setup_clear_cpu_cap(X86_FEATURE_TSC_DEADLINE_TIMER);
pr_err(FW_BUG "TSC_DEADLINE disabled due to Errata; "
"please update microcode to version: 0x%x (or later)\n", rev);
return false;
}

/*
Expand Down Expand Up @@ -2092,7 +2092,8 @@ void __init init_apic_mappings(void)
{
unsigned int new_apicid;

apic_check_deadline_errata();
if (apic_validate_deadline_timer())
pr_debug("TSC deadline timer available\n");

if (x2apic_mode) {
boot_cpu_physical_apicid = read_apic_id();
Expand Down
3 changes: 2 additions & 1 deletion arch/x86/kernel/dumpstack_64.c
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,8 @@ int get_stack_info(unsigned long *stack, struct task_struct *task,
*/
if (visit_mask) {
if (*visit_mask & (1UL << info->type)) {
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
if (task == current)
printk_deferred_once(KERN_WARNING "WARNING: stack recursion on stack type %d\n", info->type);
goto unknown;
}
*visit_mask |= 1UL << info->type;
Expand Down
3 changes: 3 additions & 0 deletions arch/x86/kernel/unwind_frame.c
Original file line number Diff line number Diff line change
Expand Up @@ -344,6 +344,9 @@ bool unwind_next_frame(struct unwind_state *state)
if (IS_ENABLED(CONFIG_X86_32))
goto the_end;

if (state->task != current)
goto the_end;

if (state->regs) {
printk_deferred_once(KERN_WARNING
"WARNING: kernel stack regs at %p in %s:%d has bad 'bp' value %p\n",
Expand Down
Loading

0 comments on commit c14cab2

Please sign in to comment.