From b1378a561fd16afdd96ef0bc912b1bcd2b85a68e Mon Sep 17 00:00:00 2001 From: "Chang S. Bae" Date: Tue, 18 Sep 2018 16:08:53 -0700 Subject: [PATCH] x86/fsgsbase/64: Introduce FS/GS base helper functions Introduce FS/GS base access functionality via , not yet used by anything directly. Factor out task_seg_base() from x86/ptrace.c and rename it to x86_fsgsbase_read_task() to make it part of the new helpers. This will allow us to enhance FSGSBASE support and eventually enable the FSBASE/GSBASE instructions. An "inactive" GS base refers to a base saved at kernel entry and being part of an inactive, non-running/stopped user-task. (The typical ptrace model.) Here are the new functions: x86_fsbase_read_task() x86_gsbase_read_task() x86_fsbase_write_task() x86_gsbase_write_task() x86_fsbase_read_cpu() x86_fsbase_write_cpu() x86_gsbase_read_cpu_inactive() x86_gsbase_write_cpu_inactive() As an advantage of the unified namespace we can now see all FS/GSBASE API use in the kernel via the following 'git grep' pattern: $ git grep x86_.*sbase [ mingo: Wrote new changelog. ] Based-on-code-from: Andy Lutomirski Suggested-by: Ingo Molnar Signed-off-by: Chang S. Bae Cc: Andy Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Markus T Metzger Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1537312139-5580-3-git-send-email-chang.seok.bae@intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fsgsbase.h | 50 ++++++++++++++++ arch/x86/kernel/process_64.c | 124 ++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/ptrace.c | 51 ++--------------- 3 files changed, 179 insertions(+), 46 deletions(-) create mode 100644 arch/x86/include/asm/fsgsbase.h diff --git a/arch/x86/include/asm/fsgsbase.h b/arch/x86/include/asm/fsgsbase.h new file mode 100644 index 000000000000..1ab465ee23fe --- /dev/null +++ b/arch/x86/include/asm/fsgsbase.h @@ -0,0 +1,50 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_FSGSBASE_H +#define _ASM_FSGSBASE_H 1 + +#ifndef __ASSEMBLY__ + +#ifdef CONFIG_X86_64 + +#include + +unsigned long x86_fsgsbase_read_task(struct task_struct *task, + unsigned short selector); + +/* + * Read/write a task's fsbase or gsbase. This returns the value that + * the FS/GS base would have (if the task were to be resumed). These + * work on current or on a different non-running task. + */ +unsigned long x86_fsbase_read_task(struct task_struct *task); +unsigned long x86_gsbase_read_task(struct task_struct *task); +int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase); +int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase); + +/* Helper functions for reading/writing FS/GS base */ + +static inline unsigned long x86_fsbase_read_cpu(void) +{ + unsigned long fsbase; + + rdmsrl(MSR_FS_BASE, fsbase); + return fsbase; +} + +void x86_fsbase_write_cpu(unsigned long fsbase); + +static inline unsigned long x86_gsbase_read_cpu_inactive(void) +{ + unsigned long gsbase; + + rdmsrl(MSR_KERNEL_GS_BASE, gsbase); + return gsbase; +} + +void x86_gsbase_write_cpu_inactive(unsigned long gsbase); + +#endif /* CONFIG_X86_64 */ + +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_FSGSBASE_H */ diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index ea5ea850348d..2a53ff8d1baf 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -54,6 +54,7 @@ #include #include #include +#include #ifdef CONFIG_IA32_EMULATION /* Not included via unistd.h */ #include @@ -286,6 +287,129 @@ static __always_inline void load_seg_legacy(unsigned short prev_index, } } +unsigned long x86_fsgsbase_read_task(struct task_struct *task, + unsigned short selector) +{ + unsigned short idx = selector >> 3; + unsigned long base; + + if (likely((selector & SEGMENT_TI_MASK) == 0)) { + if (unlikely(idx >= GDT_ENTRIES)) + return 0; + + /* + * There are no user segments in the GDT with nonzero bases + * other than the TLS segments. + */ + if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) + return 0; + + idx -= GDT_ENTRY_TLS_MIN; + base = get_desc_base(&task->thread.tls_array[idx]); + } else { +#ifdef CONFIG_MODIFY_LDT_SYSCALL + struct ldt_struct *ldt; + + /* + * If performance here mattered, we could protect the LDT + * with RCU. This is a slow path, though, so we can just + * take the mutex. + */ + mutex_lock(&task->mm->context.lock); + ldt = task->mm->context.ldt; + if (unlikely(idx >= ldt->nr_entries)) + base = 0; + else + base = get_desc_base(ldt->entries + idx); + mutex_unlock(&task->mm->context.lock); +#else + base = 0; +#endif + } + + return base; +} + +void x86_fsbase_write_cpu(unsigned long fsbase) +{ + /* + * Set the selector to 0 as a notion, that the segment base is + * overwritten, which will be checked for skipping the segment load + * during context switch. + */ + loadseg(FS, 0); + wrmsrl(MSR_FS_BASE, fsbase); +} + +void x86_gsbase_write_cpu_inactive(unsigned long gsbase) +{ + /* Set the selector to 0 for the same reason as %fs above. */ + loadseg(GS, 0); + wrmsrl(MSR_KERNEL_GS_BASE, gsbase); +} + +unsigned long x86_fsbase_read_task(struct task_struct *task) +{ + unsigned long fsbase; + + if (task == current) + fsbase = x86_fsbase_read_cpu(); + else if (task->thread.fsindex == 0) + fsbase = task->thread.fsbase; + else + fsbase = x86_fsgsbase_read_task(task, task->thread.fsindex); + + return fsbase; +} + +unsigned long x86_gsbase_read_task(struct task_struct *task) +{ + unsigned long gsbase; + + if (task == current) + gsbase = x86_gsbase_read_cpu_inactive(); + else if (task->thread.gsindex == 0) + gsbase = task->thread.gsbase; + else + gsbase = x86_fsgsbase_read_task(task, task->thread.gsindex); + + return gsbase; +} + +int x86_fsbase_write_task(struct task_struct *task, unsigned long fsbase) +{ + /* + * Not strictly needed for %fs, but do it for symmetry + * with %gs + */ + if (unlikely(fsbase >= TASK_SIZE_MAX)) + return -EPERM; + + preempt_disable(); + task->thread.fsbase = fsbase; + if (task == current) + x86_fsbase_write_cpu(fsbase); + task->thread.fsindex = 0; + preempt_enable(); + + return 0; +} + +int x86_gsbase_write_task(struct task_struct *task, unsigned long gsbase) +{ + if (unlikely(gsbase >= TASK_SIZE_MAX)) + return -EPERM; + + preempt_disable(); + task->thread.gsbase = gsbase; + if (task == current) + x86_gsbase_write_cpu_inactive(gsbase); + task->thread.gsindex = 0; + preempt_enable(); + + return 0; +} + int copy_thread_tls(unsigned long clone_flags, unsigned long sp, unsigned long arg, struct task_struct *p, unsigned long tls) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 3acbf45cb7fb..fbde2a7ce377 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include "tls.h" @@ -343,49 +343,6 @@ static int set_segment_reg(struct task_struct *task, return 0; } -static unsigned long task_seg_base(struct task_struct *task, - unsigned short selector) -{ - unsigned short idx = selector >> 3; - unsigned long base; - - if (likely((selector & SEGMENT_TI_MASK) == 0)) { - if (unlikely(idx >= GDT_ENTRIES)) - return 0; - - /* - * There are no user segments in the GDT with nonzero bases - * other than the TLS segments. - */ - if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX) - return 0; - - idx -= GDT_ENTRY_TLS_MIN; - base = get_desc_base(&task->thread.tls_array[idx]); - } else { -#ifdef CONFIG_MODIFY_LDT_SYSCALL - struct ldt_struct *ldt; - - /* - * If performance here mattered, we could protect the LDT - * with RCU. This is a slow path, though, so we can just - * take the mutex. - */ - mutex_lock(&task->mm->context.lock); - ldt = task->mm->context.ldt; - if (unlikely(idx >= ldt->nr_entries)) - base = 0; - else - base = get_desc_base(ldt->entries + idx); - mutex_unlock(&task->mm->context.lock); -#else - base = 0; -#endif - } - - return base; -} - #endif /* CONFIG_X86_32 */ static unsigned long get_flags(struct task_struct *task) @@ -482,13 +439,15 @@ static unsigned long getreg(struct task_struct *task, unsigned long offset) if (task->thread.fsindex == 0) return task->thread.fsbase; else - return task_seg_base(task, task->thread.fsindex); + return x86_fsgsbase_read_task(task, + task->thread.fsindex); } case offsetof(struct user_regs_struct, gs_base): { if (task->thread.gsindex == 0) return task->thread.gsbase; else - return task_seg_base(task, task->thread.gsindex); + return x86_fsgsbase_read_task(task, + task->thread.gsindex); } #endif } -- 2.11.0