From: Linus Torvalds Date: Thu, 3 Sep 2015 22:46:07 +0000 (-0700) Subject: Merge branch 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel... X-Git-Tag: v4.3-rc1~90 X-Git-Url: http://git.osdn.net/view?a=commitdiff_plain;h=ca520cab25e0e8da717c596ccaa2c2b3650cfa09;p=uclinux-h8%2Flinux.git Merge branch 'locking-core-for-linus' of git://git./linux/kernel/git/tip/tip Pull locking and atomic updates from Ingo Molnar: "Main changes in this cycle are: - Extend atomic primitives with coherent logic op primitives (atomic_{or,and,xor}()) and deprecate the old partial APIs (atomic_{set,clear}_mask()) The old ops were incoherent with incompatible signatures across architectures and with incomplete support. Now every architecture supports the primitives consistently (by Peter Zijlstra) - Generic support for 'relaxed atomics': - _acquire/release/relaxed() flavours of xchg(), cmpxchg() and {add,sub}_return() - atomic_read_acquire() - atomic_set_release() This came out of porting qwrlock code to arm64 (by Will Deacon) - Clean up the fragile static_key APIs that were causing repeat bugs, by introducing a new one: DEFINE_STATIC_KEY_TRUE(name); DEFINE_STATIC_KEY_FALSE(name); which define a key of different types with an initial true/false value. Then allow: static_branch_likely() static_branch_unlikely() to take a key of either type and emit the right instruction for the case. To be able to know the 'type' of the static key we encode it in the jump entry (by Peter Zijlstra) - Static key self-tests (by Jason Baron) - qrwlock optimizations (by Waiman Long) - small futex enhancements (by Davidlohr Bueso) - ... and misc other changes" * 'locking-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (63 commits) jump_label/x86: Work around asm build bug on older/backported GCCs locking, ARM, atomics: Define our SMP atomics in terms of _relaxed() operations locking, include/llist: Use linux/atomic.h instead of asm/cmpxchg.h locking/qrwlock: Make use of _{acquire|release|relaxed}() atomics locking/qrwlock: Implement queue_write_unlock() using smp_store_release() locking/lockref: Remove homebrew cmpxchg64_relaxed() macro definition locking, asm-generic: Add _{relaxed|acquire|release}() variants for 'atomic_long_t' locking, asm-generic: Rework atomic-long.h to avoid bulk code duplication locking/atomics: Add _{acquire|release|relaxed}() variants of some atomic operations locking, compiler.h: Cast away attributes in the WRITE_ONCE() magic locking/static_keys: Make verify_keys() static jump label, locking/static_keys: Update docs locking/static_keys: Provide a selftest jump_label: Provide a self-test s390/uaccess, locking/static_keys: employ static_branch_likely() x86, tsc, locking/static_keys: Employ static_branch_likely() locking/static_keys: Add selftest locking/static_keys: Add a new static_key interface locking/static_keys: Rework update logic locking/static_keys: Add static_key_{en,dis}able() helpers ... --- ca520cab25e0e8da717c596ccaa2c2b3650cfa09 diff --cc arch/s390/kvm/interrupt.c index b277d50dcf76,57309e9cdd80..5c2c169395c3 --- a/arch/s390/kvm/interrupt.c +++ b/arch/s390/kvm/interrupt.c @@@ -1178,12 -1172,12 +1178,12 @@@ static int __inject_ckc(struct kvm_vcp { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CLOCK_COMP); + VCPU_EVENT(vcpu, 3, "%s", "inject: clock comparator external"); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CLOCK_COMP, - 0, 0, 2); + 0, 0); set_bit(IRQ_PEND_EXT_CLOCK_COMP, &li->pending_irqs); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + atomic_or(CPUSTAT_EXT_INT, li->cpuflags); return 0; } @@@ -1191,12 -1185,12 +1191,12 @@@ static int __inject_cpu_timer(struct kv { struct kvm_s390_local_interrupt *li = &vcpu->arch.local_int; - VCPU_EVENT(vcpu, 3, "inject: type %x", KVM_S390_INT_CPU_TIMER); + VCPU_EVENT(vcpu, 3, "%s", "inject: cpu timer external"); trace_kvm_s390_inject_vcpu(vcpu->vcpu_id, KVM_S390_INT_CPU_TIMER, - 0, 0, 2); + 0, 0); set_bit(IRQ_PEND_EXT_CPU_TIMER, &li->pending_irqs); - atomic_set_mask(CPUSTAT_EXT_INT, li->cpuflags); + atomic_or(CPUSTAT_EXT_INT, li->cpuflags); return 0; } diff --cc arch/s390/kvm/kvm-s390.c index 98df53c01343,b73302fb0507..c91eb941b444 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@@ -1283,79 -1198,43 +1283,79 @@@ int kvm_arch_vcpu_init(struct kvm_vcpu return 0; } +/* + * Backs up the current FP/VX register save area on a particular + * destination. Used to switch between different register save + * areas. + */ +static inline void save_fpu_to(struct fpu *dst) +{ + dst->fpc = current->thread.fpu.fpc; + dst->flags = current->thread.fpu.flags; + dst->regs = current->thread.fpu.regs; +} + +/* + * Switches the FP/VX register save area from which to lazy + * restore register contents. + */ +static inline void load_fpu_from(struct fpu *from) +{ + current->thread.fpu.fpc = from->fpc; + current->thread.fpu.flags = from->flags; + current->thread.fpu.regs = from->regs; +} + void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { - save_fp_ctl(&vcpu->arch.host_fpregs.fpc); - if (test_kvm_facility(vcpu->kvm, 129)) - save_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); - else - save_fp_regs(vcpu->arch.host_fpregs.fprs); - save_access_regs(vcpu->arch.host_acrs); + /* Save host register state */ + save_fpu_regs(); + save_fpu_to(&vcpu->arch.host_fpregs); + if (test_kvm_facility(vcpu->kvm, 129)) { - restore_fp_ctl(&vcpu->run->s.regs.fpc); - restore_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); - } else { - restore_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - restore_fp_regs(vcpu->arch.guest_fpregs.fprs); - } + current->thread.fpu.fpc = vcpu->run->s.regs.fpc; + current->thread.fpu.flags = FPU_USE_VX; + /* + * Use the register save area in the SIE-control block + * for register restore and save in kvm_arch_vcpu_put() + */ + current->thread.fpu.vxrs = + (__vector128 *)&vcpu->run->s.regs.vrs; + /* Always enable the vector extension for KVM */ + __ctl_set_vx(); + } else + load_fpu_from(&vcpu->arch.guest_fpregs); + + if (test_fp_ctl(current->thread.fpu.fpc)) + /* User space provided an invalid FPC, let's clear it */ + current->thread.fpu.fpc = 0; + + save_access_regs(vcpu->arch.host_acrs); restore_access_regs(vcpu->run->s.regs.acrs); gmap_enable(vcpu->arch.gmap); - atomic_set_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_or(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); } void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) { - atomic_clear_mask(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); + atomic_andnot(CPUSTAT_RUNNING, &vcpu->arch.sie_block->cpuflags); gmap_disable(vcpu->arch.gmap); - if (test_kvm_facility(vcpu->kvm, 129)) { - save_fp_ctl(&vcpu->run->s.regs.fpc); - save_vx_regs((__vector128 *)&vcpu->run->s.regs.vrs); - } else { - save_fp_ctl(&vcpu->arch.guest_fpregs.fpc); - save_fp_regs(vcpu->arch.guest_fpregs.fprs); - } - save_access_regs(vcpu->run->s.regs.acrs); - restore_fp_ctl(&vcpu->arch.host_fpregs.fpc); + + save_fpu_regs(); + if (test_kvm_facility(vcpu->kvm, 129)) - restore_vx_regs((__vector128 *)&vcpu->arch.host_vregs->vrs); + /* + * kvm_arch_vcpu_load() set up the register save area to + * the &vcpu->run->s.regs.vrs and, thus, the vector registers + * are already saved. Only the floating-point control must be + * copied. + */ + vcpu->run->s.regs.fpc = current->thread.fpu.fpc; else - restore_fp_regs(vcpu->arch.host_fpregs.fprs); + save_fpu_to(&vcpu->arch.guest_fpregs); + load_fpu_from(&vcpu->arch.host_fpregs); + + save_access_regs(vcpu->run->s.regs.acrs); restore_access_regs(vcpu->arch.host_acrs); } diff --cc arch/s390/lib/uaccess.c index 0d002a746bec,93cb1d09493d..ae4de559e3a0 --- a/arch/s390/lib/uaccess.c +++ b/arch/s390/lib/uaccess.c @@@ -370,10 -370,23 +370,10 @@@ long __strncpy_from_user(char *dst, con } EXPORT_SYMBOL(__strncpy_from_user); -/* - * The "old" uaccess variant without mvcos can be enforced with the - * uaccess_primary kernel parameter. This is mainly for debugging purposes. - */ -static int uaccess_primary __initdata; - -static int __init parse_uaccess_pt(char *__unused) -{ - uaccess_primary = 1; - return 0; -} -early_param("uaccess_primary", parse_uaccess_pt); - static int __init uaccess_init(void) { - if (!uaccess_primary && test_facility(27)) + if (test_facility(27)) - static_key_slow_inc(&have_mvcos); + static_branch_enable(&have_mvcos); return 0; } early_initcall(uaccess_init); diff --cc arch/x86/kernel/tsc.c index 79055cf2c497,b9cfd462f7e7..c8d52cb4cb6e --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@@ -284,26 -289,11 +289,19 @@@ u64 native_sched_clock(void * very important for it to be as fast as the platform * can achieve it. ) */ - if (!static_key_false(&__use_tsc)) { - /* No locking but a rare wrong value is not a big deal: */ - return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); - } - - /* read the Time Stamp Counter: */ - tsc_now = rdtsc(); - /* return the value in ns */ - return cycles_2_ns(tsc_now); + /* No locking but a rare wrong value is not a big deal: */ + return (jiffies_64 - INITIAL_JIFFIES) * (1000000000 / HZ); } +/* + * Generate a sched_clock if you already have a TSC value. + */ +u64 native_sched_clock_from_tsc(u64 tsc) +{ + return cycles_2_ns(tsc); +} + /* We need to define a real function for sched_clock, to override the weak default version */ #ifdef CONFIG_PARAVIRT