OSDN Git Service

KVM: PPC: Book3S HV: Change dec_expires to be relative to guest timebase
authorNicholas Piggin <npiggin@gmail.com>
Tue, 23 Nov 2021 09:52:00 +0000 (19:52 +1000)
committerMichael Ellerman <mpe@ellerman.id.au>
Wed, 24 Nov 2021 10:08:59 +0000 (21:08 +1100)
Change dec_expires to be relative to the guest timebase, and allow
it to be moved into low level P9 guest entry functions, to improve
SPR access scheduling.

Signed-off-by: Nicholas Piggin <npiggin@gmail.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Link: https://lore.kernel.org/r/20211123095231.1036501-23-npiggin@gmail.com
arch/powerpc/include/asm/kvm_book3s.h
arch/powerpc/include/asm/kvm_host.h
arch/powerpc/kvm/book3s_hv.c
arch/powerpc/kvm/book3s_hv_nested.c
arch/powerpc/kvm/book3s_hv_p9_entry.c
arch/powerpc/kvm/book3s_hv_rmhandlers.S

index 3d31f2c..91c9f93 100644 (file)
@@ -406,6 +406,12 @@ static inline ulong kvmppc_get_fault_dar(struct kvm_vcpu *vcpu)
        return vcpu->arch.fault_dar;
 }
 
+/* Expiry time of vcpu DEC relative to host TB */
+static inline u64 kvmppc_dec_expires_host_tb(struct kvm_vcpu *vcpu)
+{
+       return vcpu->arch.dec_expires - vcpu->arch.vcore->tb_offset;
+}
+
 static inline bool is_kvmppc_resume_guest(int r)
 {
        return (r == RESUME_GUEST || r == RESUME_GUEST_NV);
index e4d2319..21ca15c 100644 (file)
@@ -741,7 +741,7 @@ struct kvm_vcpu_arch {
 
        struct hrtimer dec_timer;
        u64 dec_jiffies;
-       u64 dec_expires;
+       u64 dec_expires;        /* Relative to guest timebase. */
        unsigned long pending_exceptions;
        u8 ceded;
        u8 prodded;
index d326e6a..bc4afec 100644 (file)
@@ -2261,8 +2261,7 @@ static int kvmppc_get_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                *val = get_reg_val(id, vcpu->arch.vcore->arch_compat);
                break;
        case KVM_REG_PPC_DEC_EXPIRY:
-               *val = get_reg_val(id, vcpu->arch.dec_expires +
-                                  vcpu->arch.vcore->tb_offset);
+               *val = get_reg_val(id, vcpu->arch.dec_expires);
                break;
        case KVM_REG_PPC_ONLINE:
                *val = get_reg_val(id, vcpu->arch.online);
@@ -2514,8 +2513,7 @@ static int kvmppc_set_one_reg_hv(struct kvm_vcpu *vcpu, u64 id,
                r = kvmppc_set_arch_compat(vcpu, set_reg_val(id, *val));
                break;
        case KVM_REG_PPC_DEC_EXPIRY:
-               vcpu->arch.dec_expires = set_reg_val(id, *val) -
-                       vcpu->arch.vcore->tb_offset;
+               vcpu->arch.dec_expires = set_reg_val(id, *val);
                break;
        case KVM_REG_PPC_ONLINE:
                i = set_reg_val(id, *val);
@@ -2902,13 +2900,13 @@ static void kvmppc_set_timer(struct kvm_vcpu *vcpu)
        unsigned long dec_nsec, now;
 
        now = get_tb();
-       if (now > vcpu->arch.dec_expires) {
+       if (now > kvmppc_dec_expires_host_tb(vcpu)) {
                /* decrementer has already gone negative */
                kvmppc_core_queue_dec(vcpu);
                kvmppc_core_prepare_to_enter(vcpu);
                return;
        }
-       dec_nsec = tb_to_ns(vcpu->arch.dec_expires - now);
+       dec_nsec = tb_to_ns(kvmppc_dec_expires_host_tb(vcpu) - now);
        hrtimer_start(&vcpu->arch.dec_timer, dec_nsec, HRTIMER_MODE_REL);
        vcpu->arch.timer_running = 1;
 }
@@ -3380,7 +3378,7 @@ static void post_guest_process(struct kvmppc_vcore *vc, bool is_master)
                 */
                spin_unlock(&vc->lock);
                /* cancel pending dec exception if dec is positive */
-               if (now < vcpu->arch.dec_expires &&
+               if (now < kvmppc_dec_expires_host_tb(vcpu) &&
                    kvmppc_core_pending_dec(vcpu))
                        kvmppc_core_dequeue_dec(vcpu);
 
@@ -4224,20 +4222,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
 
        load_spr_state(vcpu);
 
-       /*
-        * When setting DEC, we must always deal with irq_work_raise via NMI vs
-        * setting DEC. The problem occurs right as we switch into guest mode
-        * if a NMI hits and sets pending work and sets DEC, then that will
-        * apply to the guest and not bring us back to the host.
-        *
-        * irq_work_raise could check a flag (or possibly LPCR[HDICE] for
-        * example) and set HDEC to 1? That wouldn't solve the nested hv
-        * case which needs to abort the hcall or zero the time limit.
-        *
-        * XXX: Another day's problem.
-        */
-       mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);
-
        if (kvmhv_on_pseries()) {
                /*
                 * We need to save and restore the guest visible part of the
@@ -4263,6 +4247,23 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                        hvregs.vcpu_token = vcpu->vcpu_id;
                }
                hvregs.hdec_expiry = time_limit;
+
+               /*
+                * When setting DEC, we must always deal with irq_work_raise
+                * via NMI vs setting DEC. The problem occurs right as we
+                * switch into guest mode if a NMI hits and sets pending work
+                * and sets DEC, then that will apply to the guest and not
+                * bring us back to the host.
+                *
+                * irq_work_raise could check a flag (or possibly LPCR[HDICE]
+                * for example) and set HDEC to 1? That wouldn't solve the
+                * nested hv case which needs to abort the hcall or zero the
+                * time limit.
+                *
+                * XXX: Another day's problem.
+                */
+               mtspr(SPRN_DEC, kvmppc_dec_expires_host_tb(vcpu) - tb);
+
                mtspr(SPRN_DAR, vcpu->arch.shregs.dar);
                mtspr(SPRN_DSISR, vcpu->arch.shregs.dsisr);
                trap = plpar_hcall_norets(H_ENTER_NESTED, __pa(&hvregs),
@@ -4274,6 +4275,12 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                vcpu->arch.psscr = mfspr(SPRN_PSSCR_PR);
                mtspr(SPRN_PSSCR_PR, host_psscr);
 
+               dec = mfspr(SPRN_DEC);
+               if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+                       dec = (s32) dec;
+               tb = mftb();
+               vcpu->arch.dec_expires = dec + (tb + vc->tb_offset);
+
                /* H_CEDE has to be handled now, not later */
                if (trap == BOOK3S_INTERRUPT_SYSCALL && !vcpu->arch.nested &&
                    kvmppc_get_gpr(vcpu, 3) == H_CEDE) {
@@ -4281,6 +4288,7 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                        kvmppc_set_gpr(vcpu, 3, 0);
                        trap = 0;
                }
+
        } else {
                kvmppc_xive_push_vcpu(vcpu);
                trap = kvmhv_vcpu_entry_p9(vcpu, time_limit, lpcr);
@@ -4312,12 +4320,6 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit,
                        vcpu->arch.slb_max = 0;
        }
 
-       dec = mfspr(SPRN_DEC);
-       if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
-               dec = (s32) dec;
-       tb = mftb();
-       vcpu->arch.dec_expires = dec + tb;
-
        store_spr_state(vcpu);
 
        restore_p9_host_os_sprs(vcpu, &host_os_sprs);
@@ -4827,7 +4829,7 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit,
         * by L2 and the L1 decrementer is provided in hdec_expires
         */
        if (kvmppc_core_pending_dec(vcpu) &&
-                       ((get_tb() < vcpu->arch.dec_expires) ||
+                       ((get_tb() < kvmppc_dec_expires_host_tb(vcpu)) ||
                         (trap == BOOK3S_INTERRUPT_SYSCALL &&
                          kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
                kvmppc_core_dequeue_dec(vcpu);
index ed8a2c9..7bed0b9 100644 (file)
@@ -358,6 +358,7 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        /* convert TB values/offsets to host (L0) values */
        hdec_exp = l2_hv.hdec_expiry - vc->tb_offset;
        vc->tb_offset += l2_hv.tb_offset;
+       vcpu->arch.dec_expires += l2_hv.tb_offset;
 
        /* set L1 state to L2 state */
        vcpu->arch.nested = l2;
@@ -399,6 +400,8 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu)
        if (l2_regs.msr & MSR_TS_MASK)
                vcpu->arch.shregs.msr |= MSR_TS_S;
        vc->tb_offset = saved_l1_hv.tb_offset;
+       /* XXX: is this always the same delta as saved_l1_hv.tb_offset? */
+       vcpu->arch.dec_expires -= l2_hv.tb_offset;
        restore_hv_regs(vcpu, &saved_l1_hv);
        vcpu->arch.purr += delta_purr;
        vcpu->arch.spurr += delta_spurr;
index fb9cb34..814b0df 100644 (file)
@@ -188,7 +188,7 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
        struct kvm *kvm = vcpu->kvm;
        struct kvm_nested_guest *nested = vcpu->arch.nested;
        struct kvmppc_vcore *vc = vcpu->arch.vcore;
-       s64 hdec;
+       s64 hdec, dec;
        u64 tb, purr, spurr;
        u64 *exsave;
        bool ri_set;
@@ -317,6 +317,8 @@ int kvmhv_vcpu_entry_p9(struct kvm_vcpu *vcpu, u64 time_limit, unsigned long lpc
         */
        mtspr(SPRN_HDEC, hdec);
 
+       mtspr(SPRN_DEC, vcpu->arch.dec_expires - tb);
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 tm_return_to_guest:
 #endif
@@ -461,6 +463,12 @@ tm_return_to_guest:
        vcpu->arch.shregs.sprg2 = mfspr(SPRN_SPRG2);
        vcpu->arch.shregs.sprg3 = mfspr(SPRN_SPRG3);
 
+       dec = mfspr(SPRN_DEC);
+       if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+               dec = (s32) dec;
+       tb = mftb();
+       vcpu->arch.dec_expires = dec + tb;
+
        /* Preserve PSSCR[FAKE_SUSPEND] until we've called kvmppc_save_tm_hv */
        mtspr(SPRN_PSSCR, host_psscr |
              (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG));
index be79ae7..3f1aeff 100644 (file)
@@ -814,10 +814,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
         * Set the decrementer to the guest decrementer.
         */
        ld      r8,VCPU_DEC_EXPIRES(r4)
-       /* r8 is a host timebase value here, convert to guest TB */
-       ld      r5,HSTATE_KVM_VCORE(r13)
-       ld      r6,VCORE_TB_OFFSET_APPL(r5)
-       add     r8,r8,r6
        mftb    r7
        subf    r3,r7,r8
        mtspr   SPRN_DEC,r3
@@ -1192,9 +1188,6 @@ guest_bypass:
        mftb    r6
        extsw   r5,r5
 16:    add     r5,r5,r6
-       /* r5 is a guest timebase value here, convert to host TB */
-       ld      r4,VCORE_TB_OFFSET_APPL(r3)
-       subf    r5,r4,r5
        std     r5,VCPU_DEC_EXPIRES(r9)
 
        /* Increment exit count, poke other threads to exit */
@@ -2160,9 +2153,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
        /* save expiry time of guest decrementer */
        add     r3, r3, r5
        ld      r4, HSTATE_KVM_VCPU(r13)
-       ld      r5, HSTATE_KVM_VCORE(r13)
-       ld      r6, VCORE_TB_OFFSET_APPL(r5)
-       subf    r3, r6, r3      /* convert to host TB value */
        std     r3, VCPU_DEC_EXPIRES(r4)
 
 #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING
@@ -2259,9 +2249,6 @@ END_FTR_SECTION_IFCLR(CPU_FTR_TM)
 
        /* Restore guest decrementer */
        ld      r3, VCPU_DEC_EXPIRES(r4)
-       ld      r5, HSTATE_KVM_VCORE(r13)
-       ld      r6, VCORE_TB_OFFSET_APPL(r5)
-       add     r3, r3, r6      /* convert host TB to guest TB value */
        mftb    r7
        subf    r3, r7, r3
        mtspr   SPRN_DEC, r3